CharacterEncoderImplementations__MAC_Cyrillic.st
author Claus Gittinger <cg@exept.de>
Fri, 05 Mar 2004 18:28:27 +0100
changeset 8081 b468050174a9
child 8114 05274a80fcc4
permissions -rw-r--r--
initial checkin

"{ Encoding: utf8 }"

"{ Package: 'stx:libbasic' }"

"{ NameSpace: CharacterEncoderImplementations }"

SingleByteEncoder subclass:#MAC_Cyrillic
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text-Encodings'
!


!MAC_Cyrillic class methodsFor:'mapping'!

mapFileURL1_relativePathName
    ^ 'VENDORS/APPLE/CYRILLIC.TXT'
!

mapping
"
# From: http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT

#=======================================================================
#   File name:  CYRILLIC.TXT
#
#   Contents:   Map (external version) from Mac OS Cyrillic
#               character set to Unicode 2.1 through Unicode 3.2
#
#   Copyright:  (c) 1995-2002 by Apple Computer, Inc., all rights
#               reserved.
#
#   Contact:    charsets@apple.com
#
#   Changes:
#
#      b3,c1 2002-Dec-19    Update URLs, notes. Matches internal
#                           utom<b2>.
#       b02  1999-Sep-22    Encoding changed for Mac OS 9.0 to merge
#                           with Mac OS Ukrainian and support EURO SIGN;
#                           Change mappings for 0xA2, 0xB6, and 0xFF.
#                           Update contact e-mail address. Matches
#                           internal utom<b2>, ufrm<b2>, and Text
#                           Encoding Converter version 1.5.
#       n05  1998-Feb-05    Update header comments to new format; no
#                           mapping changes.  Matches internal utom<n3>,
#                           ufrm<n13>, and Text Encoding Converter
#                           version 1.3.
#       n03  1995-Apr-15    First version (after fixing some typos).
#                           Matches internal ufrm<n5>.
#
# Standard header:
# ----------------
#
#   Apple, the Apple logo, and Macintosh are trademarks of Apple
#   Computer, Inc., registered in the United States and other countries.
#   Unicode is a trademark of Unicode Inc. For the sake of brevity,
#   throughout this document, 'Macintosh' can be used to refer to
#   Macintosh computers and 'Unicode' can be used to refer to the
#   Unicode standard.
#
#   Apple makes no warranty or representation, either express or
#   implied, with respect to these tables, their quality, accuracy, or
#   fitness for a particular purpose. In no event will Apple be liable
#   for direct, indirect, special, incidental, or consequential damages 
#   resulting from any defect or inaccuracy in this document or the
#   accompanying tables.
#
#   These mapping tables and character lists are subject to change.
#   The latest tables should be available from the following:
#
#   <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
#
#   For general information about Mac OS encodings and these mapping
#   tables, see the file 'README.TXT'.
#
# Format:
# -------
#
#   Three tab-separated columns;
#   '#' begins a comment which continues to the end of the line.
#     Column #1 is the Mac OS Cyrillic code (in hex as 0xNN)
#     Column #2 is the corresponding Unicode (in hex as 0xNNNN)
#     Column #3 is a comment containing the Unicode name
#
#   The entries are in Mac OS Cyrillic code order.
#
#   Control character mappings are not shown in this table, following
#   the conventions of the standard UTC mapping tables. However, the
#   Mac OS Cyrillic character set uses the standard control characters
#   at 0x00-0x1F and 0x7F.
#
# Notes on Mac OS Cyrillic:
# -------------------------
#
#   This is the 'Euro sign' version of Mac Cyrillic for Mac OS 9.0 and
#   later. Before Mac OS 9.0, there were two separate Slavic Cyrillic
#   encodings:
#
#   1. The Cyrillic currency sign variant (used for localized Russian
#      and Bulgarian systems), which had the following:
#           0xA2  U+00A2 CENT SIGN
#           0xB6  U+2202 PARTIAL DIFFERENTIAL
#           0xFF  U+00A4 CURRENCY SIGN
#
#   2. The Ukrainian currency sign variant (used for localized Ukrainian
#      systems and the pre-9.0 Cyrillic Language Kit), which had the
#      following:
#           0xA2  U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
#           0xB6  U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
#           0xFF  U+00A4 CURRENCY SIGN
#
#   This new Cyrillic Euro sign version is based on the old Ukrainian
#   currency sign variant, with 0xFF changed to be EURO SIGN.
#
#   The Mac OS Cyrillic encoding includes the Cyrillic letter repertoire
#   of ISO 8859-5 (although not at the same code points). This covers
#   most of the Slavic languages written in Cyrillic script.
#
#   The Mac OS Cyrillic encoding also includes a number of characters
#   needed for the Mac OS user interface and localization (e.g.
#   ellipsis, bullet, copyright sign). All of the characters in Mac OS
#   Cyrillic that are also in the Mac OS Roman encoding are at the
#   same code point in both; this improves application compatibility.
#
#   Note: There is a common Ukrainian glyph variation in which the glyph
#   for CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I may or may not
#   have a dot above.
#
# Unicode mapping issues and notes:
# ---------------------------------
#
# Details of mapping changes in each version:
# -------------------------------------------
#
#   Changes from version n05 to version b02:
#
#   - Encoding changed for Mac OS 9.0 to merge with Mac OS Ukrainian and
#   support EURO SIGN. 0xA2 changed from U+00A2 to U+0490; 0xB6 changed
#   from U+2202 to U+0491; 0xFF changed from U+00A4 to U+20AC.
#
##################

0x20    0x0020  # SPACE
0x21    0x0021  # EXCLAMATION MARK
0x22    0x0022  # QUOTATION MARK
0x23    0x0023  # NUMBER SIGN
0x24    0x0024  # DOLLAR SIGN
0x25    0x0025  # PERCENT SIGN
0x26    0x0026  # AMPERSAND
0x27    0x0027  # APOSTROPHE
0x28    0x0028  # LEFT PARENTHESIS
0x29    0x0029  # RIGHT PARENTHESIS
0x2A    0x002A  # ASTERISK
0x2B    0x002B  # PLUS SIGN
0x2C    0x002C  # COMMA
0x2D    0x002D  # HYPHEN-MINUS
0x2E    0x002E  # FULL STOP
0x2F    0x002F  # SOLIDUS
0x30    0x0030  # DIGIT ZERO
0x31    0x0031  # DIGIT ONE
0x32    0x0032  # DIGIT TWO
0x33    0x0033  # DIGIT THREE
0x34    0x0034  # DIGIT FOUR
0x35    0x0035  # DIGIT FIVE
0x36    0x0036  # DIGIT SIX
0x37    0x0037  # DIGIT SEVEN
0x38    0x0038  # DIGIT EIGHT
0x39    0x0039  # DIGIT NINE
0x3A    0x003A  # COLON
0x3B    0x003B  # SEMICOLON
0x3C    0x003C  # LESS-THAN SIGN
0x3D    0x003D  # EQUALS SIGN
0x3E    0x003E  # GREATER-THAN SIGN
0x3F    0x003F  # QUESTION MARK
0x40    0x0040  # COMMERCIAL AT
0x41    0x0041  # LATIN CAPITAL LETTER A
0x42    0x0042  # LATIN CAPITAL LETTER B
0x43    0x0043  # LATIN CAPITAL LETTER C
0x44    0x0044  # LATIN CAPITAL LETTER D
0x45    0x0045  # LATIN CAPITAL LETTER E
0x46    0x0046  # LATIN CAPITAL LETTER F
0x47    0x0047  # LATIN CAPITAL LETTER G
0x48    0x0048  # LATIN CAPITAL LETTER H
0x49    0x0049  # LATIN CAPITAL LETTER I
0x4A    0x004A  # LATIN CAPITAL LETTER J
0x4B    0x004B  # LATIN CAPITAL LETTER K
0x4C    0x004C  # LATIN CAPITAL LETTER L
0x4D    0x004D  # LATIN CAPITAL LETTER M
0x4E    0x004E  # LATIN CAPITAL LETTER N
0x4F    0x004F  # LATIN CAPITAL LETTER O
0x50    0x0050  # LATIN CAPITAL LETTER P
0x51    0x0051  # LATIN CAPITAL LETTER Q
0x52    0x0052  # LATIN CAPITAL LETTER R
0x53    0x0053  # LATIN CAPITAL LETTER S
0x54    0x0054  # LATIN CAPITAL LETTER T
0x55    0x0055  # LATIN CAPITAL LETTER U
0x56    0x0056  # LATIN CAPITAL LETTER V
0x57    0x0057  # LATIN CAPITAL LETTER W
0x58    0x0058  # LATIN CAPITAL LETTER X
0x59    0x0059  # LATIN CAPITAL LETTER Y
0x5A    0x005A  # LATIN CAPITAL LETTER Z
0x5B    0x005B  # LEFT SQUARE BRACKET
0x5C    0x005C  # REVERSE SOLIDUS
0x5D    0x005D  # RIGHT SQUARE BRACKET
0x5E    0x005E  # CIRCUMFLEX ACCENT
0x5F    0x005F  # LOW LINE
0x60    0x0060  # GRAVE ACCENT
0x61    0x0061  # LATIN SMALL LETTER A
0x62    0x0062  # LATIN SMALL LETTER B
0x63    0x0063  # LATIN SMALL LETTER C
0x64    0x0064  # LATIN SMALL LETTER D
0x65    0x0065  # LATIN SMALL LETTER E
0x66    0x0066  # LATIN SMALL LETTER F
0x67    0x0067  # LATIN SMALL LETTER G
0x68    0x0068  # LATIN SMALL LETTER H
0x69    0x0069  # LATIN SMALL LETTER I
0x6A    0x006A  # LATIN SMALL LETTER J
0x6B    0x006B  # LATIN SMALL LETTER K
0x6C    0x006C  # LATIN SMALL LETTER L
0x6D    0x006D  # LATIN SMALL LETTER M
0x6E    0x006E  # LATIN SMALL LETTER N
0x6F    0x006F  # LATIN SMALL LETTER O
0x70    0x0070  # LATIN SMALL LETTER P
0x71    0x0071  # LATIN SMALL LETTER Q
0x72    0x0072  # LATIN SMALL LETTER R
0x73    0x0073  # LATIN SMALL LETTER S
0x74    0x0074  # LATIN SMALL LETTER T
0x75    0x0075  # LATIN SMALL LETTER U
0x76    0x0076  # LATIN SMALL LETTER V
0x77    0x0077  # LATIN SMALL LETTER W
0x78    0x0078  # LATIN SMALL LETTER X
0x79    0x0079  # LATIN SMALL LETTER Y
0x7A    0x007A  # LATIN SMALL LETTER Z
0x7B    0x007B  # LEFT CURLY BRACKET
0x7C    0x007C  # VERTICAL LINE
0x7D    0x007D  # RIGHT CURLY BRACKET
0x7E    0x007E  # TILDE
#
0x80    0x0410  # CYRILLIC CAPITAL LETTER A
0x81    0x0411  # CYRILLIC CAPITAL LETTER BE
0x82    0x0412  # CYRILLIC CAPITAL LETTER VE
0x83    0x0413  # CYRILLIC CAPITAL LETTER GHE
0x84    0x0414  # CYRILLIC CAPITAL LETTER DE
0x85    0x0415  # CYRILLIC CAPITAL LETTER IE
0x86    0x0416  # CYRILLIC CAPITAL LETTER ZHE
0x87    0x0417  # CYRILLIC CAPITAL LETTER ZE
0x88    0x0418  # CYRILLIC CAPITAL LETTER I
0x89    0x0419  # CYRILLIC CAPITAL LETTER SHORT I
0x8A    0x041A  # CYRILLIC CAPITAL LETTER KA
0x8B    0x041B  # CYRILLIC CAPITAL LETTER EL
0x8C    0x041C  # CYRILLIC CAPITAL LETTER EM
0x8D    0x041D  # CYRILLIC CAPITAL LETTER EN
0x8E    0x041E  # CYRILLIC CAPITAL LETTER O
0x8F    0x041F  # CYRILLIC CAPITAL LETTER PE
0x90    0x0420  # CYRILLIC CAPITAL LETTER ER
0x91    0x0421  # CYRILLIC CAPITAL LETTER ES
0x92    0x0422  # CYRILLIC CAPITAL LETTER TE
0x93    0x0423  # CYRILLIC CAPITAL LETTER U
0x94    0x0424  # CYRILLIC CAPITAL LETTER EF
0x95    0x0425  # CYRILLIC CAPITAL LETTER HA
0x96    0x0426  # CYRILLIC CAPITAL LETTER TSE
0x97    0x0427  # CYRILLIC CAPITAL LETTER CHE
0x98    0x0428  # CYRILLIC CAPITAL LETTER SHA
0x99    0x0429  # CYRILLIC CAPITAL LETTER SHCHA
0x9A    0x042A  # CYRILLIC CAPITAL LETTER HARD SIGN
0x9B    0x042B  # CYRILLIC CAPITAL LETTER YERU
0x9C    0x042C  # CYRILLIC CAPITAL LETTER SOFT SIGN
0x9D    0x042D  # CYRILLIC CAPITAL LETTER E
0x9E    0x042E  # CYRILLIC CAPITAL LETTER YU
0x9F    0x042F  # CYRILLIC CAPITAL LETTER YA
0xA0    0x2020  # DAGGER
0xA1    0x00B0  # DEGREE SIGN
0xA2    0x0490  # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
0xA3    0x00A3  # POUND SIGN
0xA4    0x00A7  # SECTION SIGN
0xA5    0x2022  # BULLET
0xA6    0x00B6  # PILCROW SIGN
0xA7    0x0406  # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0xA8    0x00AE  # REGISTERED SIGN
0xA9    0x00A9  # COPYRIGHT SIGN
0xAA    0x2122  # TRADE MARK SIGN
0xAB    0x0402  # CYRILLIC CAPITAL LETTER DJE
0xAC    0x0452  # CYRILLIC SMALL LETTER DJE
0xAD    0x2260  # NOT EQUAL TO
0xAE    0x0403  # CYRILLIC CAPITAL LETTER GJE
0xAF    0x0453  # CYRILLIC SMALL LETTER GJE
0xB0    0x221E  # INFINITY
0xB1    0x00B1  # PLUS-MINUS SIGN
0xB2    0x2264  # LESS-THAN OR EQUAL TO
0xB3    0x2265  # GREATER-THAN OR EQUAL TO
0xB4    0x0456  # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0xB5    0x00B5  # MICRO SIGN
0xB6    0x0491  # CYRILLIC SMALL LETTER GHE WITH UPTURN
0xB7    0x0408  # CYRILLIC CAPITAL LETTER JE
0xB8    0x0404  # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0xB9    0x0454  # CYRILLIC SMALL LETTER UKRAINIAN IE
0xBA    0x0407  # CYRILLIC CAPITAL LETTER YI
0xBB    0x0457  # CYRILLIC SMALL LETTER YI
0xBC    0x0409  # CYRILLIC CAPITAL LETTER LJE
0xBD    0x0459  # CYRILLIC SMALL LETTER LJE
0xBE    0x040A  # CYRILLIC CAPITAL LETTER NJE
0xBF    0x045A  # CYRILLIC SMALL LETTER NJE
0xC0    0x0458  # CYRILLIC SMALL LETTER JE
0xC1    0x0405  # CYRILLIC CAPITAL LETTER DZE
0xC2    0x00AC  # NOT SIGN
0xC3    0x221A  # SQUARE ROOT
0xC4    0x0192  # LATIN SMALL LETTER F WITH HOOK
0xC5    0x2248  # ALMOST EQUAL TO
0xC6    0x2206  # INCREMENT
0xC7    0x00AB  # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0xC8    0x00BB  # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0xC9    0x2026  # HORIZONTAL ELLIPSIS
0xCA    0x00A0  # NO-BREAK SPACE
0xCB    0x040B  # CYRILLIC CAPITAL LETTER TSHE
0xCC    0x045B  # CYRILLIC SMALL LETTER TSHE
0xCD    0x040C  # CYRILLIC CAPITAL LETTER KJE
0xCE    0x045C  # CYRILLIC SMALL LETTER KJE
0xCF    0x0455  # CYRILLIC SMALL LETTER DZE
0xD0    0x2013  # EN DASH
0xD1    0x2014  # EM DASH
0xD2    0x201C  # LEFT DOUBLE QUOTATION MARK
0xD3    0x201D  # RIGHT DOUBLE QUOTATION MARK
0xD4    0x2018  # LEFT SINGLE QUOTATION MARK
0xD5    0x2019  # RIGHT SINGLE QUOTATION MARK
0xD6    0x00F7  # DIVISION SIGN
0xD7    0x201E  # DOUBLE LOW-9 QUOTATION MARK
0xD8    0x040E  # CYRILLIC CAPITAL LETTER SHORT U
0xD9    0x045E  # CYRILLIC SMALL LETTER SHORT U
0xDA    0x040F  # CYRILLIC CAPITAL LETTER DZHE
0xDB    0x045F  # CYRILLIC SMALL LETTER DZHE
0xDC    0x2116  # NUMERO SIGN
0xDD    0x0401  # CYRILLIC CAPITAL LETTER IO
0xDE    0x0451  # CYRILLIC SMALL LETTER IO
0xDF    0x044F  # CYRILLIC SMALL LETTER YA
0xE0    0x0430  # CYRILLIC SMALL LETTER A
0xE1    0x0431  # CYRILLIC SMALL LETTER BE
0xE2    0x0432  # CYRILLIC SMALL LETTER VE
0xE3    0x0433  # CYRILLIC SMALL LETTER GHE
0xE4    0x0434  # CYRILLIC SMALL LETTER DE
0xE5    0x0435  # CYRILLIC SMALL LETTER IE
0xE6    0x0436  # CYRILLIC SMALL LETTER ZHE
0xE7    0x0437  # CYRILLIC SMALL LETTER ZE
0xE8    0x0438  # CYRILLIC SMALL LETTER I
0xE9    0x0439  # CYRILLIC SMALL LETTER SHORT I
0xEA    0x043A  # CYRILLIC SMALL LETTER KA
0xEB    0x043B  # CYRILLIC SMALL LETTER EL
0xEC    0x043C  # CYRILLIC SMALL LETTER EM
0xED    0x043D  # CYRILLIC SMALL LETTER EN
0xEE    0x043E  # CYRILLIC SMALL LETTER O
0xEF    0x043F  # CYRILLIC SMALL LETTER PE
0xF0    0x0440  # CYRILLIC SMALL LETTER ER
0xF1    0x0441  # CYRILLIC SMALL LETTER ES
0xF2    0x0442  # CYRILLIC SMALL LETTER TE
0xF3    0x0443  # CYRILLIC SMALL LETTER U
0xF4    0x0444  # CYRILLIC SMALL LETTER EF
0xF5    0x0445  # CYRILLIC SMALL LETTER HA
0xF6    0x0446  # CYRILLIC SMALL LETTER TSE
0xF7    0x0447  # CYRILLIC SMALL LETTER CHE
0xF8    0x0448  # CYRILLIC SMALL LETTER SHA
0xF9    0x0449  # CYRILLIC SMALL LETTER SHCHA
0xFA    0x044A  # CYRILLIC SMALL LETTER HARD SIGN
0xFB    0x044B  # CYRILLIC SMALL LETTER YERU
0xFC    0x044C  # CYRILLIC SMALL LETTER SOFT SIGN
0xFD    0x044D  # CYRILLIC SMALL LETTER E
0xFE    0x044E  # CYRILLIC SMALL LETTER YU
0xFF    0x20AC  # EURO SIGN

"
! !

!MAC_Cyrillic class methodsFor:'queries'!

namesOfEncoding
    ^ #( #'mac-cyrillic' 'maccyrillic' )
! !

!MAC_Cyrillic class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__MAC_Cyrillic.st,v 1.1 2004-03-05 17:18:40 cg Exp $'
! !