Encoder_ISO8859_9.st
author Claus Gittinger <cg@exept.de>
Fri, 05 Mar 2004 18:28:27 +0100
changeset 8081 b468050174a9
child 8114 05274a80fcc4
permissions -rw-r--r--
initial checkin

"{ Encoding: utf8 }"

"{ Package: 'stx:libbasic' }"

"{ NameSpace: CharacterEncoderImplementations }"

ISO8859_1 subclass:#ISO8859_9
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text-Encodings'
!


!ISO8859_9 class methodsFor:'mapping'!

mapFileURL1_relativePathName
    ^ 'ISO8859/8859-9.TXT'
!

mapping
"
# From: http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT

#
#       Name:             ISO/IEC 8859-9:1999 to Unicode
#       Unicode version:  3.0
#       Table version:    1.0
#       Table format:     Format A
#       Date:             1999 July 27
#       Authors:          Ken Whistler <kenw@sybase.com>
#
#       Copyright (c) 1991-1999 Unicode, Inc.  All Rights reserved.
#
#       This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
#       No claims are made as to fitness for any particular purpose.  No
#       warranties of any kind are expressed or implied.  The recipient
#       agrees to determine applicability of information provided.  If this
#       file has been provided on magnetic media by Unicode, Inc., the sole
#       remedy for any claim will be exchange of defective media within 90
#       days of receipt.
#
#       Unicode, Inc. hereby grants the right to freely use the information
#       supplied in this file in the creation of products supporting the
#       Unicode Standard, and to make copies of this file in any form for
#       internal or external distribution as long as this notice remains
#       attached.
#
#       General notes:
#
#       This table contains the data the Unicode Consortium has on how
#       ISO/IEC 8859-9:1999 characters map into Unicode.
#
#       Format:  Three tab-separated columns
#                Column #1 is the ISO/IEC 8859-9 code (in hex as 0xXX)
#                Column #2 is the Unicode (in hex as 0xXXXX)
#                Column #3 the Unicode name (follows a comment sign, '#')
#
#       The entries are in ISO/IEC 8859-9 order.
#
#       ISO/IEC 8859-9 is also equivalent to ISO-IR-148.
#
#       Version history
#       1.0 version updates 0.1 version by adding mappings for all
#       control characters.
#
#       Updated versions of this file may be found in:
#               <ftp://ftp.unicode.org/Public/MAPPINGS/>
#
#       Any comments or problems, contact <errata@unicode.org>
#       Please note that <errata@unicode.org> is an archival address;
#       notices will be checked, but do not expect an immediate response.
#
0x00    0x0000  #       NULL
0x01    0x0001  #       START OF HEADING
0x02    0x0002  #       START OF TEXT
0x03    0x0003  #       END OF TEXT
0x04    0x0004  #       END OF TRANSMISSION
0x05    0x0005  #       ENQUIRY
0x06    0x0006  #       ACKNOWLEDGE
0x07    0x0007  #       BELL
0x08    0x0008  #       BACKSPACE
0x09    0x0009  #       HORIZONTAL TABULATION
0x0A    0x000A  #       LINE FEED
0x0B    0x000B  #       VERTICAL TABULATION
0x0C    0x000C  #       FORM FEED
0x0D    0x000D  #       CARRIAGE RETURN
0x0E    0x000E  #       SHIFT OUT
0x0F    0x000F  #       SHIFT IN
0x10    0x0010  #       DATA LINK ESCAPE
0x11    0x0011  #       DEVICE CONTROL ONE
0x12    0x0012  #       DEVICE CONTROL TWO
0x13    0x0013  #       DEVICE CONTROL THREE
0x14    0x0014  #       DEVICE CONTROL FOUR
0x15    0x0015  #       NEGATIVE ACKNOWLEDGE
0x16    0x0016  #       SYNCHRONOUS IDLE
0x17    0x0017  #       END OF TRANSMISSION BLOCK
0x18    0x0018  #       CANCEL
0x19    0x0019  #       END OF MEDIUM
0x1A    0x001A  #       SUBSTITUTE
0x1B    0x001B  #       ESCAPE
0x1C    0x001C  #       FILE SEPARATOR
0x1D    0x001D  #       GROUP SEPARATOR
0x1E    0x001E  #       RECORD SEPARATOR
0x1F    0x001F  #       UNIT SEPARATOR
0x20    0x0020  #       SPACE
0x21    0x0021  #       EXCLAMATION MARK
0x22    0x0022  #       QUOTATION MARK
0x23    0x0023  #       NUMBER SIGN
0x24    0x0024  #       DOLLAR SIGN
0x25    0x0025  #       PERCENT SIGN
0x26    0x0026  #       AMPERSAND
0x27    0x0027  #       APOSTROPHE
0x28    0x0028  #       LEFT PARENTHESIS
0x29    0x0029  #       RIGHT PARENTHESIS
0x2A    0x002A  #       ASTERISK
0x2B    0x002B  #       PLUS SIGN
0x2C    0x002C  #       COMMA
0x2D    0x002D  #       HYPHEN-MINUS
0x2E    0x002E  #       FULL STOP
0x2F    0x002F  #       SOLIDUS
0x30    0x0030  #       DIGIT ZERO
0x31    0x0031  #       DIGIT ONE
0x32    0x0032  #       DIGIT TWO
0x33    0x0033  #       DIGIT THREE
0x34    0x0034  #       DIGIT FOUR
0x35    0x0035  #       DIGIT FIVE
0x36    0x0036  #       DIGIT SIX
0x37    0x0037  #       DIGIT SEVEN
0x38    0x0038  #       DIGIT EIGHT
0x39    0x0039  #       DIGIT NINE
0x3A    0x003A  #       COLON
0x3B    0x003B  #       SEMICOLON
0x3C    0x003C  #       LESS-THAN SIGN
0x3D    0x003D  #       EQUALS SIGN
0x3E    0x003E  #       GREATER-THAN SIGN
0x3F    0x003F  #       QUESTION MARK
0x40    0x0040  #       COMMERCIAL AT
0x41    0x0041  #       LATIN CAPITAL LETTER A
0x42    0x0042  #       LATIN CAPITAL LETTER B
0x43    0x0043  #       LATIN CAPITAL LETTER C
0x44    0x0044  #       LATIN CAPITAL LETTER D
0x45    0x0045  #       LATIN CAPITAL LETTER E
0x46    0x0046  #       LATIN CAPITAL LETTER F
0x47    0x0047  #       LATIN CAPITAL LETTER G
0x48    0x0048  #       LATIN CAPITAL LETTER H
0x49    0x0049  #       LATIN CAPITAL LETTER I
0x4A    0x004A  #       LATIN CAPITAL LETTER J
0x4B    0x004B  #       LATIN CAPITAL LETTER K
0x4C    0x004C  #       LATIN CAPITAL LETTER L
0x4D    0x004D  #       LATIN CAPITAL LETTER M
0x4E    0x004E  #       LATIN CAPITAL LETTER N
0x4F    0x004F  #       LATIN CAPITAL LETTER O
0x50    0x0050  #       LATIN CAPITAL LETTER P
0x51    0x0051  #       LATIN CAPITAL LETTER Q
0x52    0x0052  #       LATIN CAPITAL LETTER R
0x53    0x0053  #       LATIN CAPITAL LETTER S
0x54    0x0054  #       LATIN CAPITAL LETTER T
0x55    0x0055  #       LATIN CAPITAL LETTER U
0x56    0x0056  #       LATIN CAPITAL LETTER V
0x57    0x0057  #       LATIN CAPITAL LETTER W
0x58    0x0058  #       LATIN CAPITAL LETTER X
0x59    0x0059  #       LATIN CAPITAL LETTER Y
0x5A    0x005A  #       LATIN CAPITAL LETTER Z
0x5B    0x005B  #       LEFT SQUARE BRACKET
0x5C    0x005C  #       REVERSE SOLIDUS
0x5D    0x005D  #       RIGHT SQUARE BRACKET
0x5E    0x005E  #       CIRCUMFLEX ACCENT
0x5F    0x005F  #       LOW LINE
0x60    0x0060  #       GRAVE ACCENT
0x61    0x0061  #       LATIN SMALL LETTER A
0x62    0x0062  #       LATIN SMALL LETTER B
0x63    0x0063  #       LATIN SMALL LETTER C
0x64    0x0064  #       LATIN SMALL LETTER D
0x65    0x0065  #       LATIN SMALL LETTER E
0x66    0x0066  #       LATIN SMALL LETTER F
0x67    0x0067  #       LATIN SMALL LETTER G
0x68    0x0068  #       LATIN SMALL LETTER H
0x69    0x0069  #       LATIN SMALL LETTER I
0x6A    0x006A  #       LATIN SMALL LETTER J
0x6B    0x006B  #       LATIN SMALL LETTER K
0x6C    0x006C  #       LATIN SMALL LETTER L
0x6D    0x006D  #       LATIN SMALL LETTER M
0x6E    0x006E  #       LATIN SMALL LETTER N
0x6F    0x006F  #       LATIN SMALL LETTER O
0x70    0x0070  #       LATIN SMALL LETTER P
0x71    0x0071  #       LATIN SMALL LETTER Q
0x72    0x0072  #       LATIN SMALL LETTER R
0x73    0x0073  #       LATIN SMALL LETTER S
0x74    0x0074  #       LATIN SMALL LETTER T
0x75    0x0075  #       LATIN SMALL LETTER U
0x76    0x0076  #       LATIN SMALL LETTER V
0x77    0x0077  #       LATIN SMALL LETTER W
0x78    0x0078  #       LATIN SMALL LETTER X
0x79    0x0079  #       LATIN SMALL LETTER Y
0x7A    0x007A  #       LATIN SMALL LETTER Z
0x7B    0x007B  #       LEFT CURLY BRACKET
0x7C    0x007C  #       VERTICAL LINE
0x7D    0x007D  #       RIGHT CURLY BRACKET
0x7E    0x007E  #       TILDE
0x7F    0x007F  #       DELETE
0x80    0x0080  #       <control>
0x81    0x0081  #       <control>
0x82    0x0082  #       <control>
0x83    0x0083  #       <control>
0x84    0x0084  #       <control>
0x85    0x0085  #       <control>
0x86    0x0086  #       <control>
0x87    0x0087  #       <control>
0x88    0x0088  #       <control>
0x89    0x0089  #       <control>
0x8A    0x008A  #       <control>
0x8B    0x008B  #       <control>
0x8C    0x008C  #       <control>
0x8D    0x008D  #       <control>
0x8E    0x008E  #       <control>
0x8F    0x008F  #       <control>
0x90    0x0090  #       <control>
0x91    0x0091  #       <control>
0x92    0x0092  #       <control>
0x93    0x0093  #       <control>
0x94    0x0094  #       <control>
0x95    0x0095  #       <control>
0x96    0x0096  #       <control>
0x97    0x0097  #       <control>
0x98    0x0098  #       <control>
0x99    0x0099  #       <control>
0x9A    0x009A  #       <control>
0x9B    0x009B  #       <control>
0x9C    0x009C  #       <control>
0x9D    0x009D  #       <control>
0x9E    0x009E  #       <control>
0x9F    0x009F  #       <control>
0xA0    0x00A0  #       NO-BREAK SPACE
0xA1    0x00A1  #       INVERTED EXCLAMATION MARK
0xA2    0x00A2  #       CENT SIGN
0xA3    0x00A3  #       POUND SIGN
0xA4    0x00A4  #       CURRENCY SIGN
0xA5    0x00A5  #       YEN SIGN
0xA6    0x00A6  #       BROKEN BAR
0xA7    0x00A7  #       SECTION SIGN
0xA8    0x00A8  #       DIAERESIS
0xA9    0x00A9  #       COPYRIGHT SIGN
0xAA    0x00AA  #       FEMININE ORDINAL INDICATOR
0xAB    0x00AB  #       LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0xAC    0x00AC  #       NOT SIGN
0xAD    0x00AD  #       SOFT HYPHEN
0xAE    0x00AE  #       REGISTERED SIGN
0xAF    0x00AF  #       MACRON
0xB0    0x00B0  #       DEGREE SIGN
0xB1    0x00B1  #       PLUS-MINUS SIGN
0xB2    0x00B2  #       SUPERSCRIPT TWO
0xB3    0x00B3  #       SUPERSCRIPT THREE
0xB4    0x00B4  #       ACUTE ACCENT
0xB5    0x00B5  #       MICRO SIGN
0xB6    0x00B6  #       PILCROW SIGN
0xB7    0x00B7  #       MIDDLE DOT
0xB8    0x00B8  #       CEDILLA
0xB9    0x00B9  #       SUPERSCRIPT ONE
0xBA    0x00BA  #       MASCULINE ORDINAL INDICATOR
0xBB    0x00BB  #       RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0xBC    0x00BC  #       VULGAR FRACTION ONE QUARTER
0xBD    0x00BD  #       VULGAR FRACTION ONE HALF
0xBE    0x00BE  #       VULGAR FRACTION THREE QUARTERS
0xBF    0x00BF  #       INVERTED QUESTION MARK
0xC0    0x00C0  #       LATIN CAPITAL LETTER A WITH GRAVE
0xC1    0x00C1  #       LATIN CAPITAL LETTER A WITH ACUTE
0xC2    0x00C2  #       LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0xC3    0x00C3  #       LATIN CAPITAL LETTER A WITH TILDE
0xC4    0x00C4  #       LATIN CAPITAL LETTER A WITH DIAERESIS
0xC5    0x00C5  #       LATIN CAPITAL LETTER A WITH RING ABOVE
0xC6    0x00C6  #       LATIN CAPITAL LETTER AE
0xC7    0x00C7  #       LATIN CAPITAL LETTER C WITH CEDILLA
0xC8    0x00C8  #       LATIN CAPITAL LETTER E WITH GRAVE
0xC9    0x00C9  #       LATIN CAPITAL LETTER E WITH ACUTE
0xCA    0x00CA  #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0xCB    0x00CB  #       LATIN CAPITAL LETTER E WITH DIAERESIS
0xCC    0x00CC  #       LATIN CAPITAL LETTER I WITH GRAVE
0xCD    0x00CD  #       LATIN CAPITAL LETTER I WITH ACUTE
0xCE    0x00CE  #       LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0xCF    0x00CF  #       LATIN CAPITAL LETTER I WITH DIAERESIS
0xD0    0x011E  #       LATIN CAPITAL LETTER G WITH BREVE
0xD1    0x00D1  #       LATIN CAPITAL LETTER N WITH TILDE
0xD2    0x00D2  #       LATIN CAPITAL LETTER O WITH GRAVE
0xD3    0x00D3  #       LATIN CAPITAL LETTER O WITH ACUTE
0xD4    0x00D4  #       LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0xD5    0x00D5  #       LATIN CAPITAL LETTER O WITH TILDE
0xD6    0x00D6  #       LATIN CAPITAL LETTER O WITH DIAERESIS
0xD7    0x00D7  #       MULTIPLICATION SIGN
0xD8    0x00D8  #       LATIN CAPITAL LETTER O WITH STROKE
0xD9    0x00D9  #       LATIN CAPITAL LETTER U WITH GRAVE
0xDA    0x00DA  #       LATIN CAPITAL LETTER U WITH ACUTE
0xDB    0x00DB  #       LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0xDC    0x00DC  #       LATIN CAPITAL LETTER U WITH DIAERESIS
0xDD    0x0130  #       LATIN CAPITAL LETTER I WITH DOT ABOVE
0xDE    0x015E  #       LATIN CAPITAL LETTER S WITH CEDILLA
0xDF    0x00DF  #       LATIN SMALL LETTER SHARP S
0xE0    0x00E0  #       LATIN SMALL LETTER A WITH GRAVE
0xE1    0x00E1  #       LATIN SMALL LETTER A WITH ACUTE
0xE2    0x00E2  #       LATIN SMALL LETTER A WITH CIRCUMFLEX
0xE3    0x00E3  #       LATIN SMALL LETTER A WITH TILDE
0xE4    0x00E4  #       LATIN SMALL LETTER A WITH DIAERESIS
0xE5    0x00E5  #       LATIN SMALL LETTER A WITH RING ABOVE
0xE6    0x00E6  #       LATIN SMALL LETTER AE
0xE7    0x00E7  #       LATIN SMALL LETTER C WITH CEDILLA
0xE8    0x00E8  #       LATIN SMALL LETTER E WITH GRAVE
0xE9    0x00E9  #       LATIN SMALL LETTER E WITH ACUTE
0xEA    0x00EA  #       LATIN SMALL LETTER E WITH CIRCUMFLEX
0xEB    0x00EB  #       LATIN SMALL LETTER E WITH DIAERESIS
0xEC    0x00EC  #       LATIN SMALL LETTER I WITH GRAVE
0xED    0x00ED  #       LATIN SMALL LETTER I WITH ACUTE
0xEE    0x00EE  #       LATIN SMALL LETTER I WITH CIRCUMFLEX
0xEF    0x00EF  #       LATIN SMALL LETTER I WITH DIAERESIS
0xF0    0x011F  #       LATIN SMALL LETTER G WITH BREVE
0xF1    0x00F1  #       LATIN SMALL LETTER N WITH TILDE
0xF2    0x00F2  #       LATIN SMALL LETTER O WITH GRAVE
0xF3    0x00F3  #       LATIN SMALL LETTER O WITH ACUTE
0xF4    0x00F4  #       LATIN SMALL LETTER O WITH CIRCUMFLEX
0xF5    0x00F5  #       LATIN SMALL LETTER O WITH TILDE
0xF6    0x00F6  #       LATIN SMALL LETTER O WITH DIAERESIS
0xF7    0x00F7  #       DIVISION SIGN
0xF8    0x00F8  #       LATIN SMALL LETTER O WITH STROKE
0xF9    0x00F9  #       LATIN SMALL LETTER U WITH GRAVE
0xFA    0x00FA  #       LATIN SMALL LETTER U WITH ACUTE
0xFB    0x00FB  #       LATIN SMALL LETTER U WITH CIRCUMFLEX
0xFC    0x00FC  #       LATIN SMALL LETTER U WITH DIAERESIS
0xFD    0x0131  #       LATIN SMALL LETTER DOTLESS I
0xFE    0x015F  #       LATIN SMALL LETTER S WITH CEDILLA
0xFF    0x00FF  #       LATIN SMALL LETTER Y WITH DIAERESIS



"
! !

!ISO8859_9 class methodsFor:'queries'!

namesOfEncoding
    ^ #( 'iso8859-9' 'iso-8859-9' 'latin5' 'latin-5' 'iso-ir-148')
! !

!ISO8859_9 methodsFor:'encoding & decoding'!

decode:codeArg
    "Automagically generated by generateCode - do not modify.
     Decode from my encoding into unicode."

    |code "{ Class: SmallInteger }"|

    code := codeArg.
    code > 16rCF ifTrue:[
        code <= 16rFE ifTrue:[
            code == 16rD0 ifTrue:[
                ^  "16r00D0" 16r011E " LATIN CAPITAL LETTER G WITH BREVE " 
            ].
            code == 16rDD ifTrue:[
                ^  "16r00DD" 16r0130 " LATIN CAPITAL LETTER I WITH DOT ABOVE " 
            ].
            code == 16rDE ifTrue:[
                ^  "16r00DE" 16r015E " LATIN CAPITAL LETTER S WITH CEDILLA " 
            ].
            code == 16rF0 ifTrue:[
                ^  "16r00F0" 16r011F " LATIN SMALL LETTER G WITH BREVE " 
            ].
            code == 16rFD ifTrue:[
                ^  "16r00FD" 16r0131 " LATIN SMALL LETTER DOTLESS I " 
            ].
            code == 16rFE ifTrue:[
                ^  "16r00FE" 16r015F " LATIN SMALL LETTER S WITH CEDILLA " 
            ].
        ].
    ].
    ^ super decode:code
!

encode:unicodeArg
    "Automagically generated by generateCode - do not modify.
     Encode from unicode into my encoding."

    |unicode "{ Class: SmallInteger }"|

    unicode := unicodeArg.
    unicode > 16rCF ifTrue:[
        ((unicode == 16rDD)
         or:[(unicode == 16rDE)
         or:[(unicode == 16rF0)
         or:[(unicode == 16rD0)
         or:[(unicode == 16rFD)
         or:[(unicode == 16rFE)]]]]]) ifTrue:[
            ^ self decodingError.
        ].
        unicode == 16r11E ifTrue:[
            ^  "16r011E" 16r00D0 " LATIN CAPITAL LETTER G WITH BREVE " 
        ].
        unicode == 16r11F ifTrue:[
            ^  "16r011F" 16r00F0 " LATIN SMALL LETTER G WITH BREVE " 
        ].
        unicode == 16r130 ifTrue:[
            ^  "16r0130" 16r00DD " LATIN CAPITAL LETTER I WITH DOT ABOVE " 
        ].
        unicode == 16r131 ifTrue:[
            ^  "16r0131" 16r00FD " LATIN SMALL LETTER DOTLESS I " 
        ].
        unicode == 16r15E ifTrue:[
            ^  "16r015E" 16r00DE " LATIN CAPITAL LETTER S WITH CEDILLA " 
        ].
        unicode == 16r15F ifTrue:[
            ^  "16r015F" 16r00FE " LATIN SMALL LETTER S WITH CEDILLA " 
        ].
    ].
    ^ super encode:unicode
! !

!ISO8859_9 class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/Attic/Encoder_ISO8859_9.st,v 1.1 2004-03-05 17:22:53 cg Exp $'
! !