author | Claus Gittinger <cg@exept.de> |
Fri, 27 Feb 2015 18:16:39 +0100 | |
changeset 17565 | 29224f55218c |
parent 17564 | 67ae75f28757 |
child 17566 | a990c12c71c0 |
permissions | -rw-r--r-- |
17490 | 1 |
"{ Encoding: utf8 }" |
2 |
||
3 |
" |
|
4 |
COPYRIGHT (c) 2015 by eXept Software AG |
|
5 |
All Rights Reserved |
|
6 |
||
7 |
This software is furnished under a license and may be used |
|
8 |
only in accordance with the terms of that license and with the |
|
9 |
inclusion of the above copyright notice. This software may not |
|
10 |
be provided or otherwise made available to, or used by, any |
|
11 |
other person. No title to or ownership of the software is |
|
12 |
hereby transferred. |
|
13 |
" |
|
14 |
"{ Package: 'stx:libbasic' }" |
|
15 |
||
16 |
"{ NameSpace: CharacterEncoderImplementations }" |
|
17 |
||
18 |
ISO10646_to_UTF8 subclass:#ISO10646_to_UTF8_MAC |
|
19 |
instanceVariableNames:'' |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
20 |
classVariableNames:'AccentMap DecomposeMap ComposeMap' |
17490 | 21 |
poolDictionaries:'' |
22 |
category:'Collections-Text-Encodings' |
|
23 |
! |
|
24 |
||
25 |
!ISO10646_to_UTF8_MAC class methodsFor:'documentation'! |
|
26 |
||
27 |
copyright |
|
28 |
" |
|
29 |
COPYRIGHT (c) 2015 by eXept Software AG |
|
30 |
All Rights Reserved |
|
31 |
||
32 |
This software is furnished under a license and may be used |
|
33 |
only in accordance with the terms of that license and with the |
|
34 |
inclusion of the above copyright notice. This software may not |
|
35 |
be provided or otherwise made available to, or used by, any |
|
36 |
other person. No title to or ownership of the software is |
|
37 |
hereby transferred. |
|
38 |
" |
|
39 |
! |
|
40 |
||
41 |
documentation |
|
42 |
" |
|
43 |
UTF-8 can encode some diacritical characters (umlauts) in multiple ways: |
|
44 |
- either with a single uniode (e.g. ae -> ä -> ä -> C3 A4) |
|
45 |
- or as so called 'Normalization Form canonical Decomposition', i.e. as a regular 'a' followed by a |
|
46 |
combining diacritical mark (for example: acute). |
|
47 |
||
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
48 |
MAC OSX needs the second form for its file names. |
17490 | 49 |
However, OSX does not decompose the ranges U+2000-U+2FFF, U+F900-U+FAFF and U+2F800-U+2FAFF. |
50 |
||
51 |
This is a q&d hack, to at least support the first page (latin1) characters. |
|
52 |
Will be enhanced for the 2nd and 3rd unicode page, when I find time. |
|
53 |
||
54 |
[author:] |
|
55 |
Claus Gittinger |
|
56 |
||
57 |
[instance variables:] |
|
58 |
||
59 |
[class variables:] |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
60 |
ComposeMap DecomposeMap |
17490 | 61 |
|
62 |
[see also:] |
|
63 |
http://developer.apple.com/library/mac/#qa/qa2001/qa1173.html |
|
64 |
||
65 |
" |
|
66 |
! ! |
|
67 |
||
68 |
!ISO10646_to_UTF8_MAC class methodsFor:'initialization'! |
|
69 |
||
70 |
initializeDecomposeMap |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
71 |
"the map which decomposes a diacritical character into its two components" |
17490 | 72 |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
73 |
DecomposeMap := Dictionary new. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
74 |
ComposeMap := Dictionary new. |
17490 | 75 |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
76 |
#( |
17565
29224f55218c
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17564
diff
changeset
|
77 |
(16r0300 "gravis" 'AÀaàEÈeèIÌiìoòOÒUÙuùNǸnǹÜǛüǜ') |
29224f55218c
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17564
diff
changeset
|
78 |
(16r0301 "akut" 'AÁaáEÉeéIÍiíOÓoóUÚuúyýYÝCĆcćNŃnńRŔrŕSŚsśZŹzźGǴgǵÆǼæǽØǾøǿÜǗüǘ') |
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
79 |
(16r0302 "circonflex" 'AÂaâEÊeêIÎiîOÔoôUÛuûCĈcĉGĜgĝHĤhĥJĴjĵSŜsŝWŴwŵYŶyŷ') |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
80 |
(16r0303 "tilde" 'AÃaãNÑnñOÕoõUŨuũ') |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
81 |
(16r0308 "umlaut" 'AÄaäOÖoöUÜuüIÏiïyÿYŸ') |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
82 |
(16r030A "ring" 'AÅaåUŮuů') |
17565
29224f55218c
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17564
diff
changeset
|
83 |
(16r030C "breve" 'CČcčDĎEĚeěNŇnňRŘrřSŠsšZŽzžAǍaǎIǏiǐOǑoǒUǓuǔGǦgǧKǨkǩÜǙüǚ') |
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
84 |
(16r0327 "cedille" 'CÇc窺TŢtţ') |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
85 |
) do:[:eachPair | |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
86 |
|composeCode mapping| |
17490 | 87 |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
88 |
composeCode := eachPair first. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
89 |
mapping := eachPair second. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
90 |
mapping pairWiseDo:[:baseChar :composedChar | |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
91 |
"/ setup, so that we find |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
92 |
"/ DecomposeMap at:"$à codePoint" 16rE0 put:#( "$a codePoint" 16r61 "greve codePoint" 16r0300). |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
93 |
DecomposeMap |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
94 |
at:composedChar codePoint |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
95 |
put:(Array with:baseChar codePoint with:composeCode) |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
96 |
]. |
17490 | 97 |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
98 |
ComposeMap at:composeCode put:mapping. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
99 |
]. |
17490 | 100 |
! ! |
101 |
||
102 |
!ISO10646_to_UTF8_MAC methodsFor:'encoding & decoding'! |
|
103 |
||
104 |
compositionOf: baseChar with: diacriticalChar to: outStream |
|
105 |
"compose two characters into one |
|
106 |
a + umlaut-diacritic-mark -> ä." |
|
107 |
||
108 |
|cp map i| |
|
109 |
||
110 |
cp := diacriticalChar codePoint. |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
111 |
map := ComposeMap at:cp ifAbsent:nil. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
112 |
|
17490 | 113 |
map notNil ifTrue:[ |
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
114 |
"/ compose |
17490 | 115 |
i := map indexOf: baseChar. |
116 |
i ~~ 0 ifTrue:[ |
|
117 |
outStream nextPut: (map at:i+1). |
|
118 |
^ self. |
|
119 |
]. |
|
120 |
]. |
|
121 |
||
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
122 |
"/ leave as is |
17490 | 123 |
outStream nextPut: baseChar. |
124 |
outStream nextPut: diacriticalChar. |
|
125 |
! |
|
126 |
||
127 |
decodeString:aStringOrByteCollection |
|
128 |
"return a Unicode string from the passed in UTF-8-MAC encoded string. |
|
129 |
This is UTF-8 with compose-characters decomposed |
|
130 |
(i.e. as separate codes, not as single combined characters). |
|
131 |
||
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
132 |
For now, here is a limited version, which should work |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
133 |
at least for most european countries... |
17490 | 134 |
" |
135 |
||
136 |
|s buff previous| |
|
137 |
||
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
138 |
s := super decodeString:aStringOrByteCollection. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
139 |
(s contains:[:char | char codePoint between:16r0300 and:16r0327]) ifFalse:[^ s]. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
140 |
|
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
141 |
ComposeMap isNil ifTrue:[ |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
142 |
self class initializeDecomposeMap |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
143 |
]. |
17522
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
144 |
|
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
145 |
buff := CharacterWriteStream on:''. |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
146 |
previous := nil. |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
147 |
s do:[:each | |
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
148 |
(each codePoint between:16r0300 and:16r0327) ifTrue:[ |
17522
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
149 |
self compositionOf:previous with:each to:buff. |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
150 |
previous := nil. |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
151 |
] ifFalse:[ |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
152 |
previous notNil ifTrue:[ |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
153 |
buff nextPut:previous. |
17490 | 154 |
]. |
17522
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
155 |
previous := each. |
17490 | 156 |
]. |
157 |
]. |
|
17522
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
158 |
previous notNil ifTrue:[ |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
159 |
buff nextPut:previous. |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
160 |
]. |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
161 |
^ buff contents. |
17490 | 162 |
|
163 |
" |
|
164 |
(ISO10646_to_UTF8 new encodeString:'aäoöuü') asByteArray |
|
165 |
-> #[97 195 164 111 195 182 117 195 188] |
|
166 |
||
167 |
(ISO10646_to_UTF8 new decodeString: |
|
168 |
(ISO10646_to_UTF8 new encodeString:'aäoöuü') asByteArray) |
|
169 |
||
170 |
(ISO10646_to_UTF8_MAC new encodeString:'aäoöuü') asByteArray |
|
171 |
-> #[97 97 204 136 111 111 204 136 117 117 204 136] |
|
172 |
||
173 |
(ISO10646_to_UTF8_MAC new decodeString: |
|
174 |
(ISO10646_to_UTF8_MAC new encodeString:'aäoöuü') asByteArray) |
|
175 |
" |
|
176 |
! |
|
177 |
||
178 |
decompositionOf: codePointIn into:outBlockWithTwoArgs |
|
179 |
"if required, decompose a diacritical character into a base character and a punctuation; |
|
180 |
eg. ä -> a + umlaut-diacritic-mark. |
|
181 |
Pass both as args to the given block. |
|
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
182 |
For non diactit. chars, pass a nil diacrit-mark value. |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
183 |
Return true, if a decomposition was done." |
17490 | 184 |
|
185 |
|entry| |
|
186 |
||
187 |
codePointIn < 16rC0 ifTrue:[ ^ false ]. |
|
188 |
||
189 |
entry := DecomposeMap at:codePointIn ifAbsent:nil. |
|
190 |
entry isNil ifTrue:[ ^ false ]. |
|
191 |
||
192 |
outBlockWithTwoArgs value:(entry at:1) value:(entry at:2). |
|
193 |
^ true |
|
194 |
! |
|
195 |
||
196 |
encodeString:aUnicodeString |
|
197 |
"return the UTF-8-MAC representation of a aUnicodeString. |
|
198 |
This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as |
|
199 |
single combined characters). |
|
200 |
||
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
201 |
For now, here is a limited version, which should work |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
202 |
at least for most european countries... |
17490 | 203 |
" |
204 |
||
205 |
|gen s decomp codePoint composeCodePoint| |
|
206 |
||
207 |
DecomposeMap isNil ifTrue:[ |
|
208 |
self class initializeDecomposeMap |
|
209 |
]. |
|
210 |
||
211 |
gen := |
|
212 |
[:codePointArg | |
|
213 |
|codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"| |
|
214 |
||
215 |
codePoint := codePointArg. |
|
216 |
codePoint <= 16r7F ifTrue:[ |
|
217 |
s nextPut:(Character value:codePoint). |
|
218 |
] ifFalse:[ |
|
219 |
b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000). |
|
220 |
v := codePoint bitShift:-6. |
|
221 |
v <= 16r1F ifTrue:[ |
|
222 |
s nextPut:(Character value:(v bitOr:2r11000000)). |
|
223 |
s nextPut:b1. |
|
224 |
] ifFalse:[ |
|
225 |
b2 := Character value:((v bitAnd:16r3F) bitOr:2r10000000). |
|
226 |
v := v bitShift:-6. |
|
227 |
v <= 16r0F ifTrue:[ |
|
228 |
s nextPut:(Character value:(v bitOr:2r11100000)). |
|
229 |
s nextPut:b2; nextPut:b1. |
|
230 |
] ifFalse:[ |
|
231 |
b3 := Character value:((v bitAnd:16r3F) bitOr:2r10000000). |
|
232 |
v := v bitShift:-6. |
|
233 |
v <= 16r07 ifTrue:[ |
|
234 |
s nextPut:(Character value:(v bitOr:2r11110000)). |
|
235 |
s nextPut:b3; nextPut:b2; nextPut:b1. |
|
236 |
] ifFalse:[ |
|
237 |
b4 := Character value:((v bitAnd:16r3F) bitOr:2r10000000). |
|
238 |
v := v bitShift:-6. |
|
239 |
v <= 16r03 ifTrue:[ |
|
240 |
s nextPut:(Character value:(v bitOr:2r11111000)). |
|
241 |
s nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1. |
|
242 |
] ifFalse:[ |
|
243 |
b5 := Character value:((v bitAnd:16r3F) bitOr:2r10000000). |
|
244 |
v := v bitShift:-6. |
|
245 |
v <= 16r01 ifTrue:[ |
|
246 |
s nextPut:(Character value:(v bitOr:2r11111100)). |
|
247 |
s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1. |
|
248 |
] ifFalse:[ |
|
249 |
"/ cannot happen - we only support up to 30 bit characters |
|
250 |
self error:'ascii value > 31bit in utf8Encode'. |
|
251 |
] |
|
252 |
]. |
|
253 |
]. |
|
254 |
]. |
|
255 |
]. |
|
256 |
]. |
|
257 |
]. |
|
258 |
||
17564
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
259 |
decomp := |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
260 |
[:baseCodePointArg :composeCodePointArg | |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
261 |
codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg |
67ae75f28757
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17522
diff
changeset
|
262 |
]. |
17490 | 263 |
|
264 |
s := WriteStream on:(String uninitializedNew:aUnicodeString size). |
|
265 |
aUnicodeString do:[:eachCharacter | |
|
266 |
|needExtra| |
|
267 |
||
268 |
codePoint := eachCharacter codePoint. |
|
269 |
needExtra := self decompositionOf: codePoint into:decomp. |
|
270 |
gen value:codePoint. |
|
271 |
needExtra ifTrue:[ |
|
272 |
gen value:composeCodePoint |
|
273 |
]. |
|
274 |
]. |
|
275 |
||
276 |
^ s contents |
|
277 |
||
278 |
" |
|
279 |
(self encodeString:'hello') asByteArray #[104 101 108 108 111] |
|
280 |
(self encodeString:(Character value:16r40) asString) asByteArray #[64] |
|
281 |
(self encodeString:(Character value:16r7F) asString) asByteArray #[127] |
|
282 |
(self encodeString:(Character value:16r80) asString) asByteArray #[194 128] |
|
283 |
(self encodeString:(Character value:16rFF) asString) asByteArray #[195 191] |
|
284 |
||
285 |
(ISO10646_to_UTF8 new encodeString:'aäoöuü') asByteArray |
|
286 |
-> #[97 195 164 111 195 182 117 195 188] |
|
287 |
(ISO10646_to_UTF8_MAC new encodeString:'aäoöuü') asByteArray |
|
288 |
-> #[97 97 204 136 111 111 204 136 117 117 204 136] |
|
17522
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
289 |
|
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
290 |
ISO10646_to_UTF8_MAC new decodeString: |
eea77b0b2c82
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17497
diff
changeset
|
291 |
(ISO10646_to_UTF8_MAC new encodeString:'Packages aus VSE für Smalltalk_X') asByteArray |
17490 | 292 |
" |
293 |
! ! |
|
294 |
||
17497
36ab19b73c1f
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17490
diff
changeset
|
295 |
!ISO10646_to_UTF8_MAC methodsFor:'queries'! |
36ab19b73c1f
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17490
diff
changeset
|
296 |
|
36ab19b73c1f
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17490
diff
changeset
|
297 |
nameOfEncoding |
36ab19b73c1f
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17490
diff
changeset
|
298 |
^ #'utf8-mac' |
36ab19b73c1f
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17490
diff
changeset
|
299 |
! ! |
36ab19b73c1f
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17490
diff
changeset
|
300 |
|
17490 | 301 |
!ISO10646_to_UTF8_MAC class methodsFor:'documentation'! |
302 |
||
303 |
version |
|
17565
29224f55218c
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17564
diff
changeset
|
304 |
^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.5 2015-02-27 17:16:39 cg Exp $' |
17490 | 305 |
! |
306 |
||
307 |
version_CVS |
|
17565
29224f55218c
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
Claus Gittinger <cg@exept.de>
parents:
17564
diff
changeset
|
308 |
^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.5 2015-02-27 17:16:39 cg Exp $' |
17490 | 309 |
! ! |
310 |