CharacterEncoderImplementations__MAC_Thai.st
author Stefan Vogel <sv@exept.de>
Mon, 22 Jun 2015 11:33:37 +0200
branchexpecco_2_7_5_branch
changeset 18499 b132ac7c9d6a
parent 8148 dbf64e3142d9
child 17711 39faaaf888b4
permissions -rw-r--r--
GLIBC 2.12 compatibility

"
 COPYRIGHT (c) 2004 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

"{ Package: 'stx:libbasic' }"

"{ NameSpace: CharacterEncoderImplementations }"

TwoByteEncoder subclass:#MAC_Thai
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text-Encodings'
!

!MAC_Thai class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2004 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
! !

!MAC_Thai class methodsFor:'mapping'!

mapFileURL1_relativePathName
    ^ 'VENDORS/APPLE/THAI.TXT'
!

mapping
"
# From: http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/THAI.TXT

#=======================================================================
#   File name:  THAI.TXT
#
#   Contents:   Map (external version) from Mac OS Thai
#               character set to Unicode 3.2
#
#   Copyright:  (c) 1995-2002 by Apple Computer, Inc., all rights
#               reserved.
#
#   Contact:    charsets@apple.com
#
#   Changes:
#
#      b3,c1 2002-Dec-19    Update mapping for 0xDB to use new Unicode
#                           3.2 WORD JOINER instead of ZWNBSP (BOM).
#                           Update URLs. Matches internal utom<b3>.
#       b02  1999-Sep-22    Update contact e-mail address. Matches
#                           internal utom<b1>, ufrm<b2>, and Text
#                           Encoding Converter version 1.5.
#       n07  1998-Feb-05    Update to match internal utom<n5>, ufrm<n13>
#                           and Text Encoding Converter version 1.3:
#                           Use standard Unicodes plus transcoding hints
#                           instead of single corporate characters; see
#                           details below. Also update header comments
#                           to new format.
#       n04  1995-Nov-17    First version (after fixing some typos).
#                           Matches internal ufrm<n6>.
#
# Standard header:
# ----------------
#
#   Apple, the Apple logo, and Macintosh are trademarks of Apple
#   Computer, Inc., registered in the United States and other countries.
#   Unicode is a trademark of Unicode Inc. For the sake of brevity,
#   throughout this document, 'Macintosh' can be used to refer to
#   Macintosh computers and 'Unicode' can be used to refer to the
#   Unicode standard.
#
#   Apple makes no warranty or representation, either express or
#   implied, with respect to these tables, their quality, accuracy, or
#   fitness for a particular purpose. In no event will Apple be liable
#   for direct, indirect, special, incidental, or consequential damages 
#   resulting from any defect or inaccuracy in this document or the
#   accompanying tables.
#
#   These mapping tables and character lists are subject to change.
#   The latest tables should be available from the following:
#
#   <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
#
#   For general information about Mac OS encodings and these mapping
#   tables, see the file 'README.TXT'.
#
# Format:
# -------
#
#   Three tab-separated columns;
#   '#' begins a comment which continues to the end of the line.
#     Column #1 is the Mac OS Thai code (in hex as 0xNN)
#     Column #2 is the corresponding Unicode or Unicode sequence
#       (in hex as 0xNNNN or 0xNNNN+0xNNNN).
#     Column #3 is a comment containing the Unicode name
#
#   The entries are in Mac OS Thai code order.
#
#   Some of these mappings require the use of corporate characters.
#   See the file 'CORPCHAR.TXT' and notes below.
#
#   Control character mappings are not shown in this table, following
#   the conventions of the standard UTC mapping tables. However, the
#   Mac OS Thai character set uses the standard control characters at
#   0x00-0x1F and 0x7F.
#
# Notes on Mac OS Thai:
# ---------------------
#
#   Codes 0xA1-0xDA and 0xDF-0xFB are the character set from Thai
#   standard TIS 620-2533, except that the following changes are
#   made:
#     0xEE is TRADE MARK SIGN (instead of THAI CHARACTER YAMAKKAN)
#     0xFA is REGISTERED SIGN (instead of THAI CHARACTER ANGKHANKHU)
#     0xFB is COPYRIGHT SIGN (instead of THAI CHARACTER KHOMUT)
#
#   Codes 0x80-0x82, 0x8D-0x8E, 0x91, 0x9D-0x9E, and 0xDB-0xDE are
#   various additional punctuation marks (e.g. curly quotes,
#   ellipsis), no-break space, and two special characters 'word join'
#   and 'word break'.
#
#   Codes 0x83-0x8C, 0x8F, and 0x92-0x9C are for positional variants
#   of the upper vowels, tone marks, and other signs at 0xD1,
#   0xD4-0xD7, and 0xE7-0xED. The positional variants would normally
#   be considered presentation forms only and not characters. In most
#   cases they are not typed directly; they are selected automatically
#   at display time by the WorldScript software. However, using the
#   Thai-DTP keyboard, the presentation forms can in fact be typed
#   directly using dead keys. Thus they must be treated as real
#   characters in the Mac OS Thai encoding. They are mapped using
#   variant tags; see below.
#
#   Several code points are undefined and unused (they cannot be
#   typed using any of the Mac OS Thai keyboard layouts): 0x90, 0x9F,
#   0xFC-0xFE. These are not shown in the table below.
#
# Unicode mapping issues and notes:
# ---------------------------------
#
#   The goals in the Apple mappings provided here are:
#   - Ensure roundtrip mapping from every character in the Mac OS Thai
#   character set to Unicode and back
#   - Use standard Unicode characters as much as possible, to maximize
#   interchangeability of the resulting Unicode text. Whenever possible,
#   avoid having content carried by private-use characters.
#
#   To satisfy both goals, we use private use characters to mark variants
#   that are similar to a sequence of one or more standard Unicode
#   characters.
#
#   Apple has defined a block of 32 corporate characters as 'transcoding
#   hints.' These are used in combination with standard Unicode characters
#   to force them to be treated in a special way for mapping to other
#   encodings; they have no other effect. Sixteen of these transcoding
#   hints are 'grouping hints' - they indicate that the next 2-4 Unicode
#   characters should be treated as a single entity for transcoding. The
#   other sixteen transcoding hints are 'variant tags' - they are like
#   combining characters, and can follow a standard Unicode (or a sequence
#   consisting of a base character and other combining characters) to
#   cause it to be treated in a special way for transcoding. These always
#   terminate a combining-character sequence.
#
#   The transcoding coding hints used in this mapping table are four
#   variant tags in the range 0xF873-75. Since these are combined with
#   standard Unicode characters, some characters in the Mac OS Thai
#   character set map to a sequence of two Unicodes instead of a single
#   Unicode character. For example, the Mac OS Thai character at 0x83 is a
#   low-left positional variant of THAI CHARACTER MAI EK (the standard
#   mapping is for the abstract character at 0xE8). So 0x83 is mapped to
#   0x0E48 (THAI CHARACTER MAI EK) + 0xF875 (a variant tag).
#
# Details of mapping changes in each version:
# -------------------------------------------
#
#   Changes from version b02 to version b03/c01:
#
#   - Update mapping for 0xDB to use new Unicode 3.2 character U+2060
#     WORD JOINER instead of U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM)
#
#   Changes from version n04 to version n07:
#
#   - Changed mappings of the positional variants to use standard
#   Unicodes + transcoding hint, instead of using single corporate
#   zone characters. This affected the mappings for the following:
#   0x83-08C, 0x8F, 0x92-0x9C
#
#   - Just comment out unused code points in the table, instead
#   of mapping them to U+FFFD.
#
##################

0x20    0x0020  # SPACE
0x21    0x0021  # EXCLAMATION MARK
0x22    0x0022  # QUOTATION MARK
0x23    0x0023  # NUMBER SIGN
0x24    0x0024  # DOLLAR SIGN
0x25    0x0025  # PERCENT SIGN
0x26    0x0026  # AMPERSAND
0x27    0x0027  # APOSTROPHE
0x28    0x0028  # LEFT PARENTHESIS
0x29    0x0029  # RIGHT PARENTHESIS
0x2A    0x002A  # ASTERISK
0x2B    0x002B  # PLUS SIGN
0x2C    0x002C  # COMMA
0x2D    0x002D  # HYPHEN-MINUS
0x2E    0x002E  # FULL STOP
0x2F    0x002F  # SOLIDUS
0x30    0x0030  # DIGIT ZERO
0x31    0x0031  # DIGIT ONE
0x32    0x0032  # DIGIT TWO
0x33    0x0033  # DIGIT THREE
0x34    0x0034  # DIGIT FOUR
0x35    0x0035  # DIGIT FIVE
0x36    0x0036  # DIGIT SIX
0x37    0x0037  # DIGIT SEVEN
0x38    0x0038  # DIGIT EIGHT
0x39    0x0039  # DIGIT NINE
0x3A    0x003A  # COLON
0x3B    0x003B  # SEMICOLON
0x3C    0x003C  # LESS-THAN SIGN
0x3D    0x003D  # EQUALS SIGN
0x3E    0x003E  # GREATER-THAN SIGN
0x3F    0x003F  # QUESTION MARK
0x40    0x0040  # COMMERCIAL AT
0x41    0x0041  # LATIN CAPITAL LETTER A
0x42    0x0042  # LATIN CAPITAL LETTER B
0x43    0x0043  # LATIN CAPITAL LETTER C
0x44    0x0044  # LATIN CAPITAL LETTER D
0x45    0x0045  # LATIN CAPITAL LETTER E
0x46    0x0046  # LATIN CAPITAL LETTER F
0x47    0x0047  # LATIN CAPITAL LETTER G
0x48    0x0048  # LATIN CAPITAL LETTER H
0x49    0x0049  # LATIN CAPITAL LETTER I
0x4A    0x004A  # LATIN CAPITAL LETTER J
0x4B    0x004B  # LATIN CAPITAL LETTER K
0x4C    0x004C  # LATIN CAPITAL LETTER L
0x4D    0x004D  # LATIN CAPITAL LETTER M
0x4E    0x004E  # LATIN CAPITAL LETTER N
0x4F    0x004F  # LATIN CAPITAL LETTER O
0x50    0x0050  # LATIN CAPITAL LETTER P
0x51    0x0051  # LATIN CAPITAL LETTER Q
0x52    0x0052  # LATIN CAPITAL LETTER R
0x53    0x0053  # LATIN CAPITAL LETTER S
0x54    0x0054  # LATIN CAPITAL LETTER T
0x55    0x0055  # LATIN CAPITAL LETTER U
0x56    0x0056  # LATIN CAPITAL LETTER V
0x57    0x0057  # LATIN CAPITAL LETTER W
0x58    0x0058  # LATIN CAPITAL LETTER X
0x59    0x0059  # LATIN CAPITAL LETTER Y
0x5A    0x005A  # LATIN CAPITAL LETTER Z
0x5B    0x005B  # LEFT SQUARE BRACKET
0x5C    0x005C  # REVERSE SOLIDUS
0x5D    0x005D  # RIGHT SQUARE BRACKET
0x5E    0x005E  # CIRCUMFLEX ACCENT
0x5F    0x005F  # LOW LINE
0x60    0x0060  # GRAVE ACCENT
0x61    0x0061  # LATIN SMALL LETTER A
0x62    0x0062  # LATIN SMALL LETTER B
0x63    0x0063  # LATIN SMALL LETTER C
0x64    0x0064  # LATIN SMALL LETTER D
0x65    0x0065  # LATIN SMALL LETTER E
0x66    0x0066  # LATIN SMALL LETTER F
0x67    0x0067  # LATIN SMALL LETTER G
0x68    0x0068  # LATIN SMALL LETTER H
0x69    0x0069  # LATIN SMALL LETTER I
0x6A    0x006A  # LATIN SMALL LETTER J
0x6B    0x006B  # LATIN SMALL LETTER K
0x6C    0x006C  # LATIN SMALL LETTER L
0x6D    0x006D  # LATIN SMALL LETTER M
0x6E    0x006E  # LATIN SMALL LETTER N
0x6F    0x006F  # LATIN SMALL LETTER O
0x70    0x0070  # LATIN SMALL LETTER P
0x71    0x0071  # LATIN SMALL LETTER Q
0x72    0x0072  # LATIN SMALL LETTER R
0x73    0x0073  # LATIN SMALL LETTER S
0x74    0x0074  # LATIN SMALL LETTER T
0x75    0x0075  # LATIN SMALL LETTER U
0x76    0x0076  # LATIN SMALL LETTER V
0x77    0x0077  # LATIN SMALL LETTER W
0x78    0x0078  # LATIN SMALL LETTER X
0x79    0x0079  # LATIN SMALL LETTER Y
0x7A    0x007A  # LATIN SMALL LETTER Z
0x7B    0x007B  # LEFT CURLY BRACKET
0x7C    0x007C  # VERTICAL LINE
0x7D    0x007D  # RIGHT CURLY BRACKET
0x7E    0x007E  # TILDE
#
0x80    0x00AB  # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x81    0x00BB  # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x82    0x2026  # HORIZONTAL ELLIPSIS
0x83    0x0E48+0xF875   # THAI CHARACTER MAI EK, low left position
0x84    0x0E49+0xF875   # THAI CHARACTER MAI THO, low left position
0x85    0x0E4A+0xF875   # THAI CHARACTER MAI TRI, low left position
0x86    0x0E4B+0xF875   # THAI CHARACTER MAI CHATTAWA, low left position
0x87    0x0E4C+0xF875   # THAI CHARACTER THANTHAKHAT, low left position
0x88    0x0E48+0xF873   # THAI CHARACTER MAI EK, low position
0x89    0x0E49+0xF873   # THAI CHARACTER MAI THO, low position
0x8A    0x0E4A+0xF873   # THAI CHARACTER MAI TRI, low position
0x8B    0x0E4B+0xF873   # THAI CHARACTER MAI CHATTAWA, low position
0x8C    0x0E4C+0xF873   # THAI CHARACTER THANTHAKHAT, low position
0x8D    0x201C  # LEFT DOUBLE QUOTATION MARK
0x8E    0x201D  # RIGHT DOUBLE QUOTATION MARK
0x8F    0x0E4D+0xF874   # THAI CHARACTER NIKHAHIT, left position
#
0x91    0x2022  # BULLET
0x92    0x0E31+0xF874   # THAI CHARACTER MAI HAN-AKAT, left position
0x93    0x0E47+0xF874   # THAI CHARACTER MAITAIKHU, left position
0x94    0x0E34+0xF874   # THAI CHARACTER SARA I, left position
0x95    0x0E35+0xF874   # THAI CHARACTER SARA II, left position
0x96    0x0E36+0xF874   # THAI CHARACTER SARA UE, left position
0x97    0x0E37+0xF874   # THAI CHARACTER SARA UEE, left position
0x98    0x0E48+0xF874   # THAI CHARACTER MAI EK, left position
0x99    0x0E49+0xF874   # THAI CHARACTER MAI THO, left position
0x9A    0x0E4A+0xF874   # THAI CHARACTER MAI TRI, left position
0x9B    0x0E4B+0xF874   # THAI CHARACTER MAI CHATTAWA, left position
0x9C    0x0E4C+0xF874   # THAI CHARACTER THANTHAKHAT, left position
0x9D    0x2018  # LEFT SINGLE QUOTATION MARK
0x9E    0x2019  # RIGHT SINGLE QUOTATION MARK
#
0xA0    0x00A0  # NO-BREAK SPACE
0xA1    0x0E01  # THAI CHARACTER KO KAI
0xA2    0x0E02  # THAI CHARACTER KHO KHAI
0xA3    0x0E03  # THAI CHARACTER KHO KHUAT
0xA4    0x0E04  # THAI CHARACTER KHO KHWAI
0xA5    0x0E05  # THAI CHARACTER KHO KHON
0xA6    0x0E06  # THAI CHARACTER KHO RAKHANG
0xA7    0x0E07  # THAI CHARACTER NGO NGU
0xA8    0x0E08  # THAI CHARACTER CHO CHAN
0xA9    0x0E09  # THAI CHARACTER CHO CHING
0xAA    0x0E0A  # THAI CHARACTER CHO CHANG
0xAB    0x0E0B  # THAI CHARACTER SO SO
0xAC    0x0E0C  # THAI CHARACTER CHO CHOE
0xAD    0x0E0D  # THAI CHARACTER YO YING
0xAE    0x0E0E  # THAI CHARACTER DO CHADA
0xAF    0x0E0F  # THAI CHARACTER TO PATAK
0xB0    0x0E10  # THAI CHARACTER THO THAN
0xB1    0x0E11  # THAI CHARACTER THO NANGMONTHO
0xB2    0x0E12  # THAI CHARACTER THO PHUTHAO
0xB3    0x0E13  # THAI CHARACTER NO NEN
0xB4    0x0E14  # THAI CHARACTER DO DEK
0xB5    0x0E15  # THAI CHARACTER TO TAO
0xB6    0x0E16  # THAI CHARACTER THO THUNG
0xB7    0x0E17  # THAI CHARACTER THO THAHAN
0xB8    0x0E18  # THAI CHARACTER THO THONG
0xB9    0x0E19  # THAI CHARACTER NO NU
0xBA    0x0E1A  # THAI CHARACTER BO BAIMAI
0xBB    0x0E1B  # THAI CHARACTER PO PLA
0xBC    0x0E1C  # THAI CHARACTER PHO PHUNG
0xBD    0x0E1D  # THAI CHARACTER FO FA
0xBE    0x0E1E  # THAI CHARACTER PHO PHAN
0xBF    0x0E1F  # THAI CHARACTER FO FAN
0xC0    0x0E20  # THAI CHARACTER PHO SAMPHAO
0xC1    0x0E21  # THAI CHARACTER MO MA
0xC2    0x0E22  # THAI CHARACTER YO YAK
0xC3    0x0E23  # THAI CHARACTER RO RUA
0xC4    0x0E24  # THAI CHARACTER RU
0xC5    0x0E25  # THAI CHARACTER LO LING
0xC6    0x0E26  # THAI CHARACTER LU
0xC7    0x0E27  # THAI CHARACTER WO WAEN
0xC8    0x0E28  # THAI CHARACTER SO SALA
0xC9    0x0E29  # THAI CHARACTER SO RUSI
0xCA    0x0E2A  # THAI CHARACTER SO SUA
0xCB    0x0E2B  # THAI CHARACTER HO HIP
0xCC    0x0E2C  # THAI CHARACTER LO CHULA
0xCD    0x0E2D  # THAI CHARACTER O ANG
0xCE    0x0E2E  # THAI CHARACTER HO NOKHUK
0xCF    0x0E2F  # THAI CHARACTER PAIYANNOI
0xD0    0x0E30  # THAI CHARACTER SARA A
0xD1    0x0E31  # THAI CHARACTER MAI HAN-AKAT
0xD2    0x0E32  # THAI CHARACTER SARA AA
0xD3    0x0E33  # THAI CHARACTER SARA AM
0xD4    0x0E34  # THAI CHARACTER SARA I
0xD5    0x0E35  # THAI CHARACTER SARA II
0xD6    0x0E36  # THAI CHARACTER SARA UE
0xD7    0x0E37  # THAI CHARACTER SARA UEE
0xD8    0x0E38  # THAI CHARACTER SARA U
0xD9    0x0E39  # THAI CHARACTER SARA UU
0xDA    0x0E3A  # THAI CHARACTER PHINTHU
0xDB    0x2060  # WORD JOINER # for Unicode 3.2 and later
0xDC    0x200B  # ZERO WIDTH SPACE
0xDD    0x2013  # EN DASH
0xDE    0x2014  # EM DASH
0xDF    0x0E3F  # THAI CURRENCY SYMBOL BAHT
0xE0    0x0E40  # THAI CHARACTER SARA E
0xE1    0x0E41  # THAI CHARACTER SARA AE
0xE2    0x0E42  # THAI CHARACTER SARA O
0xE3    0x0E43  # THAI CHARACTER SARA AI MAIMUAN
0xE4    0x0E44  # THAI CHARACTER SARA AI MAIMALAI
0xE5    0x0E45  # THAI CHARACTER LAKKHANGYAO
0xE6    0x0E46  # THAI CHARACTER MAIYAMOK
0xE7    0x0E47  # THAI CHARACTER MAITAIKHU
0xE8    0x0E48  # THAI CHARACTER MAI EK
0xE9    0x0E49  # THAI CHARACTER MAI THO
0xEA    0x0E4A  # THAI CHARACTER MAI TRI
0xEB    0x0E4B  # THAI CHARACTER MAI CHATTAWA
0xEC    0x0E4C  # THAI CHARACTER THANTHAKHAT
0xED    0x0E4D  # THAI CHARACTER NIKHAHIT
0xEE    0x2122  # TRADE MARK SIGN
0xEF    0x0E4F  # THAI CHARACTER FONGMAN
0xF0    0x0E50  # THAI DIGIT ZERO
0xF1    0x0E51  # THAI DIGIT ONE
0xF2    0x0E52  # THAI DIGIT TWO
0xF3    0x0E53  # THAI DIGIT THREE
0xF4    0x0E54  # THAI DIGIT FOUR
0xF5    0x0E55  # THAI DIGIT FIVE
0xF6    0x0E56  # THAI DIGIT SIX
0xF7    0x0E57  # THAI DIGIT SEVEN
0xF8    0x0E58  # THAI DIGIT EIGHT
0xF9    0x0E59  # THAI DIGIT NINE
0xFA    0x00AE  # REGISTERED SIGN
0xFB    0x00A9  # COPYRIGHT SIGN

"
! !

!MAC_Thai class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__MAC_Thai.st,v 1.3 2004-03-09 21:59:00 cg Exp $'
! !