author | Stefan Vogel <sv@exept.de> |
Tue, 28 Apr 2020 16:21:34 +0200 | |
changeset 25373 | f030619565e1 |
parent 25340 | 9230ffff3935 |
permissions | -rw-r--r-- |
23981 | 1 |
"{ Encoding: utf8 }" |
2 |
||
8048 | 3 |
" |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
4 |
COPYRIGHT (c) 2004 by eXept Software AG |
14209 | 5 |
All Rights Reserved |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
6 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
7 |
This software is furnished under a license and may be used |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
8 |
only in accordance with the terms of that license and with the |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
9 |
inclusion of the above copyright notice. This software may not |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
10 |
be provided or otherwise made available to, or used by, any |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
11 |
other person. No title to or ownership of the software is |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
12 |
hereby transferred. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
13 |
" |
8114
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
14 |
"{ Package: 'stx:libbasic' }" |
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
15 |
|
17491 | 16 |
"{ NameSpace: Smalltalk }" |
17 |
||
8118 | 18 |
Object subclass:#CharacterEncoder |
22597 | 19 |
instanceVariableNames:'' |
20 |
classVariableNames:'AccessLock CachedEncoders EncoderClassesByName EncodersByName |
|
21 |
EncodingDetectors Jis7KanjiEscapeSequence |
|
22 |
Jis7KanjiOldEscapeSequence Jis7RomanEscapeSequence |
|
23 |
JisISO2022EscapeSequence NullEncoderInstance' |
|
24 |
poolDictionaries:'' |
|
25 |
category:'Collections-Text-Encodings' |
|
7969 | 26 |
! |
27 |
||
7914 | 28 |
CharacterEncoder subclass:#CompoundEncoder |
22597 | 29 |
instanceVariableNames:'decoder encoder' |
30 |
classVariableNames:'' |
|
31 |
poolDictionaries:'' |
|
32 |
privateIn:CharacterEncoder |
|
7915 | 33 |
! |
34 |
||
22470 | 35 |
CharacterEncoder subclass:#NullEncoder |
22597 | 36 |
instanceVariableNames:'' |
37 |
classVariableNames:'' |
|
38 |
poolDictionaries:'' |
|
39 |
privateIn:CharacterEncoder |
|
7932 | 40 |
! |
41 |
||
7914 | 42 |
CharacterEncoder subclass:#InverseEncoder |
22597 | 43 |
instanceVariableNames:'decoder readAhead' |
44 |
classVariableNames:'' |
|
45 |
poolDictionaries:'' |
|
46 |
privateIn:CharacterEncoder |
|
7915 | 47 |
! |
48 |
||
22470 | 49 |
CharacterEncoder::NullEncoder subclass:#DefaultEncoder |
22597 | 50 |
instanceVariableNames:'' |
51 |
classVariableNames:'' |
|
52 |
poolDictionaries:'' |
|
53 |
privateIn:CharacterEncoder |
|
7915 | 54 |
! |
55 |
||
7892 | 56 |
CharacterEncoder subclass:#OtherEncoding |
22597 | 57 |
instanceVariableNames:'' |
58 |
classVariableNames:'' |
|
59 |
poolDictionaries:'' |
|
60 |
privateIn:CharacterEncoder |
|
7915 | 61 |
! |
62 |
||
7919 | 63 |
CharacterEncoder subclass:#TwoStepEncoder |
22597 | 64 |
instanceVariableNames:'encoder1 encoder2' |
65 |
classVariableNames:'' |
|
66 |
poolDictionaries:'' |
|
67 |
privateIn:CharacterEncoder |
|
7919 | 68 |
! |
69 |
||
7893 | 70 |
!CharacterEncoder class methodsFor:'documentation'! |
71 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
72 |
copyright |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
73 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
74 |
COPYRIGHT (c) 2004 by eXept Software AG |
14209 | 75 |
All Rights Reserved |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
76 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
77 |
This software is furnished under a license and may be used |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
78 |
only in accordance with the terms of that license and with the |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
79 |
inclusion of the above copyright notice. This software may not |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
80 |
be provided or otherwise made available to, or used by, any |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
81 |
other person. No title to or ownership of the software is |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
82 |
hereby transferred. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
83 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
84 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
85 |
|
7893 | 86 |
documentation |
87 |
" |
|
22397 | 88 |
please read howToAddMoreCoders. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
89 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
90 |
Character mappings are based on information in character maps found at either: |
8226 | 91 |
http://std.dkuug.dk/i18n/charmaps |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
92 |
or: |
8226 | 93 |
http://www.unicode.org/Public/MAPPINGS |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
94 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
95 |
No Warranty. |
8226 | 96 |
|
20227 | 97 |
All the ISO 8859 codesets include ASCII as a proper codeset within them: |
8226 | 98 |
|
20227 | 99 |
ISO-8859-1: Latin 1 - Western European Languages. |
100 |
ISO-8859-2: Latin 2 - Eastern European Languages. |
|
101 |
ISO-8859-3: Latin 3 - Afrikaans, Catalan, Dutch, English, Esperanto, German, |
|
102 |
Italian, Maltese, Spanish and Turkish. |
|
103 |
ISO-8859-4: Latin 4 - Danish, English, Estonian, Finnish, German, Greenlandic, Lappish and Latvian. |
|
104 |
ISO-8859-5: Latin/Cyrillic - Bulgarian, Byelorussian, English, Macedonian, Russian, Serbo-Croat and Ukranian. |
|
105 |
ISO-8859-6: Latin/Arabic - Arabic. |
|
106 |
ISO-8859-7: Latin/Greek - Greek. |
|
107 |
ISO-8859-8: Latin/Hebrew - Hebrew. |
|
108 |
ISO-8859-9: Latin 5 - Danish, Dutch, English, Finnish, French, German, Irish, Italian, |
|
109 |
Norwegian, Portuguese, Spanish, Swedish and Turkish. |
|
110 |
ISO-8859-10: Latin 6 - Danish, English, Estonian, Finnish, German, Greenlandic, Icelandic, |
|
8226 | 111 |
Sami (Lappish), Latvian, Lithuanian, Norwegian, Faroese and Swedish. |
8810 | 112 |
[author:] |
113 |
Claus Gittinger |
|
22397 | 114 |
|
115 |
[see also:] |
|
116 |
EncodedStream |
|
117 |
Base64Coder |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
118 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
119 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
120 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
121 |
examples |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
122 |
" |
20227 | 123 |
[exBegin] |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
124 |
|s1 s2| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
125 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
126 |
s1 := 'hello'. |
9143 | 127 |
s2 := CharacterEncoder encodeString:s1 from:#'iso8859-1' into:#'unicode'. |
20227 | 128 |
s2 |
129 |
[exEnd] |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
130 |
|
20227 | 131 |
[exBegin] |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
132 |
|s1 s2| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
133 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
134 |
s1 := 'hello'. |
9143 | 135 |
s2 := CharacterEncoder encodeString:s1 from:#'iso8859-1' into:#'iso8859-7'. |
20227 | 136 |
s2 |
137 |
[exEnd] |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
138 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
139 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
140 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
141 |
howToAddMoreCoders |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
142 |
" |
9143 | 143 |
Coders can be hand-written or automagically generated via a mapping table. |
7932 | 144 |
Examples for hand-written coders are UTF8_to_ISO10464 or JIS0208_to_JIS7. |
145 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
146 |
The table driven encode/decode methods can be generated from a character mapping document |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
147 |
as found on the unicode consortium host |
9143 | 148 |
(for example: 'http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT') |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
149 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
150 |
or from the i18n character maps: |
9143 | 151 |
(for example: 'http://std.dkuug.dk/i18n/charmaps/ISO-8859-1 |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
152 |
|
9143 | 153 |
In order to add another coder (for example: for EBCDIC or ms-codePage 278), |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
154 |
perform the following steps: |
9143 | 155 |
- create a public subclass of CharacterEncoderImplementations::CharacterEncoderImplementation named (for example) CharacterEncoderImplementations::CP267. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
156 |
|
9143 | 157 |
- define the mappingURL1_relativeName (if the table is found on 'www.unicode.org') |
158 |
or the mappingURL2_relativeName (if it is found on 'std.dkuug.dk') method, which |
|
159 |
should return the name of the tables file, relative to the top directory there |
|
160 |
(which is '.../Public/MAPPINGS' on www.unicode.org and '.../i18n/charmaops' on 'std.dkuug.dk'. |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
161 |
|
9143 | 162 |
In this example, the table from 'std.dkuug.dk' is used, and named 'EBCDIC-CP-FI' there. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
163 |
|
16054 | 164 |
- generate code by evaluating (make sure that CharacterEncoderGenerator is loaded from stx:goodies): |
9143 | 165 |
CharacterEncoder::CP267 generateCode |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
166 |
|
20227 | 167 |
That's all!! |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
168 |
|
7909 | 169 |
|
170 |
The existing code was generated by: |
|
171 |
||
9143 | 172 |
CharacterEncoder::SingleByteEncoder subclassesDo:[:cls | Transcript showCR:cls name. cls flushCode; generateCode ] |
173 |
CharacterEncoder::SingleByteEncoder subclassesDo:[:cls | cls allSubclassesDo:[:sub | Transcript showCR:sub name. sub flushCode; generateSubclassCode]] |
|
7909 | 174 |
|
175 |
or individually: |
|
9143 | 176 |
CharacterEncoder::ASCII flushCode; generateCode. |
177 |
CharacterEncoder::ISO8859_1 flushCode; generateCode. |
|
178 |
CharacterEncoder::ISO8859_2 flushCode; generateCode. |
|
179 |
CharacterEncoder::ISO8859_3 flushCode; generateCode. |
|
180 |
CharacterEncoder::ISO8859_4 flushCode; generateCode. |
|
181 |
CharacterEncoder::ISO8859_5 flushCode; generateCode. |
|
182 |
CharacterEncoder::ISO8859_6 flushCode; generateCode. |
|
183 |
CharacterEncoder::ISO8859_7 flushCode; generateCode. |
|
184 |
CharacterEncoder::ISO8859_8 flushCode; generateCode. |
|
185 |
CharacterEncoder::ISO8859_9 flushCode; generateCode. |
|
186 |
CharacterEncoder::ISO8859_10 flushCode; generateCode. |
|
187 |
CharacterEncoder::ISO8859_11 flushCode; generateCode. |
|
188 |
CharacterEncoder::ISO8859_13 flushCode; generateCode. |
|
189 |
CharacterEncoder::ISO8859_14 flushCode; generateCode. |
|
190 |
CharacterEncoder::ISO8859_15 flushCode; generateCode. |
|
191 |
CharacterEncoder::ISO8859_16 flushCode; generateCode. |
|
192 |
CharacterEncoder::KOI8_R flushCode; generateCode. |
|
193 |
CharacterEncoder::GSM0338 flushCode; generateCode. |
|
7909 | 194 |
|
9143 | 195 |
CharacterEncoder::KOI8_U flushCode; generateSubclassCode. |
7912 | 196 |
|
9143 | 197 |
CharacterEncoder::JIS0208 flushCode; generateCode. |
13072
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
198 |
|
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
199 |
Please check if your encoder tables are complete; for example, with: |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
200 |
0 to:255 do:[:ebc | |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
201 |
|asc ebc2| |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
202 |
|
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
203 |
asc := CharacterEncoderImplementations::EBCDIC new decode:ebc. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
204 |
asc notNil ifTrue:[ |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
205 |
ebc2 := CharacterEncoderImplementations::EBCDIC new encode:asc. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
206 |
self assert:(ebc2 = ebc) |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
207 |
]. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
208 |
]. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
209 |
|
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
210 |
0 to:255 do:[:asc | |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
211 |
|ebc asc2| |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
212 |
|
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
213 |
ebc := CharacterEncoderImplementations::EBCDIC new encode:asc. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
214 |
ebc notNil ifTrue:[ |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
215 |
asc2 := CharacterEncoderImplementations::EBCDIC new decode:ebc. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
216 |
self assert:(asc2 = asc) |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
217 |
]. |
e189e07c16aa
changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents:
13063
diff
changeset
|
218 |
]. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
219 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
220 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
221 |
|
7971 | 222 |
!CharacterEncoder class methodsFor:'instance creation'! |
223 |
||
22584 | 224 |
decoderForUTF8 |
225 |
"return an encoder-instance which can map utf8 to/from unicode" |
|
226 |
||
227 |
^ InverseEncoder new decoder:self encoderForUTF8 |
|
228 |
||
229 |
" |
|
230 |
self encoderForUTF8 |
|
231 |
self decoderForUTF8 |
|
232 |
" |
|
233 |
! |
|
234 |
||
7971 | 235 |
encoderFor:encodingNameSymbol |
236 |
"given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
237 |
||
238 |
^ self |
|
8156 | 239 |
encoderFor:encodingNameSymbol |
240 |
ifAbsent:[ |
|
241 |
"/ proceed to ignore this error in the future. |
|
8352
20d2476f538e
add nullEncoder BEFORE raising an error
Claus Gittinger <cg@exept.de>
parents:
8262
diff
changeset
|
242 |
|
16054 | 243 |
"/ (EncodersByName at:#unicode) at:encodingNameSymbol put:NullEncoderInstance. |
244 |
"/ (EncoderClassesByName at:#unicode) at:encodingNameSymbol put:NullEncoder. |
|
8352
20d2476f538e
add nullEncoder BEFORE raising an error
Claus Gittinger <cg@exept.de>
parents:
8262
diff
changeset
|
245 |
|
8388
b5cf7abdfe64
no encoder: send a message to stdError instead of entering
Claus Gittinger <cg@exept.de>
parents:
8352
diff
changeset
|
246 |
"/ self error:'no encoder for ' , encodingNameSymbol mayProceed:true. |
17520 | 247 |
('CharacterEncoder [warning]: no encoder for "' , encodingNameSymbol,'"') infoPrintCR. |
8388
b5cf7abdfe64
no encoder: send a message to stdError instead of entering
Claus Gittinger <cg@exept.de>
parents:
8352
diff
changeset
|
248 |
|
8156 | 249 |
NullEncoderInstance |
250 |
] |
|
7971 | 251 |
|
252 |
" |
|
8388
b5cf7abdfe64
no encoder: send a message to stdError instead of entering
Claus Gittinger <cg@exept.de>
parents:
8352
diff
changeset
|
253 |
CharacterEncoder encoderFor:#'blabla2' |
7971 | 254 |
CharacterEncoder encoderFor:#'latin1' |
255 |
self encoderFor:#'arabic' |
|
256 |
self encoderFor:#'ms-arabic' |
|
8814 | 257 |
self encoderFor:#'cp1250' |
258 |
self encoderFor:#'cp1251' |
|
259 |
self encoderFor:#'cp1252' |
|
260 |
self encoderFor:#'cp1253' |
|
7971 | 261 |
self encoderFor:#'iso8859-5' |
262 |
self encoderFor:#'koi8-r' |
|
263 |
self encoderFor:#'koi8-u' |
|
264 |
self encoderFor:#'jis0208' |
|
265 |
self encoderFor:#'jis7' |
|
8087
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
266 |
self encoderFor:#'utf8' |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
267 |
(self encoderFor:#'utf16le') encodeString:'hello' |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
268 |
(self encoderFor:#'utf16le') encode:5 |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
269 |
(self encoderFor:#'utf16be') encodeString:'hello' |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
270 |
(self encoderFor:#'utf16be') encode:5 |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
271 |
(self encoderFor:#'utf32le') encodeString:'hello' |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
272 |
(self encoderFor:#'utf32be') encodeString:'hello' |
10111 | 273 |
self encoderFor:#'sgml' |
274 |
self encoderFor:#'java' |
|
25340 | 275 |
self encoderFor:#'cp850' |
276 |
self encoderFor:#'CP850' |
|
7971 | 277 |
" |
10111 | 278 |
|
14207 | 279 |
"Modified: / 12-07-2012 / 19:35:43 / cg" |
7971 | 280 |
! |
281 |
||
8168 | 282 |
encoderFor:encodingNameSymbolArg ifAbsent:exceptionValue |
7971 | 283 |
"given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
284 |
||
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
285 |
|encodingNameSymbol enc clsName cls unicodeEncoders unicodeEncoderClasses| |
8118 | 286 |
|
22579 | 287 |
encodingNameSymbolArg isNil ifTrue:[ |
288 |
^ NullEncoderInstance |
|
289 |
]. |
|
7972 | 290 |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
291 |
encodingNameSymbol := encodingNameSymbolArg asLowercase asSymbolIfInternedOrSelf. |
22579 | 292 |
(encodingNameSymbol == #'iso10646-1' or:[encodingNameSymbol == #unicode]) ifTrue:[ |
293 |
"encode unicode from/into unicode" |
|
294 |
^ NullEncoderInstance |
|
295 |
]. |
|
8168 | 296 |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
297 |
encodingNameSymbol includesMatchCharacters ifTrue:[ |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
298 |
AccessLock critical:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
299 |
unicodeEncoders := EncodersByName at:#unicode ifAbsent:nil. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
300 |
]. |
8155 | 301 |
unicodeEncoders notNil ifTrue:[ |
302 |
unicodeEncoders keysAndValuesDo:[:eachEncodingAlias :eachEncoderInstance | |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
303 |
(encodingNameSymbol matches:eachEncodingAlias) ifTrue:[ |
8155 | 304 |
^ eachEncoderInstance. |
305 |
]. |
|
306 |
]. |
|
307 |
]. |
|
8118 | 308 |
|
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
309 |
AccessLock critical:[ |
18305 | 310 |
unicodeEncoderClasses := self encoderClassesByName at:#unicode. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
311 |
]. |
8155 | 312 |
unicodeEncoderClasses notNil ifTrue:[ |
313 |
unicodeEncoderClasses keysAndValuesDo:[:eachEncodingAlias :eachEncoderClassOrName | |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
314 |
(encodingNameSymbol matches:eachEncodingAlias) ifTrue:[ |
8155 | 315 |
eachEncoderClassOrName isBehavior ifTrue:[ |
8194 | 316 |
cls := eachEncoderClassOrName |
317 |
] ifFalse:[ |
|
318 |
cls := CharacterEncoderImplementations at:eachEncoderClassOrName. |
|
8155 | 319 |
]. |
320 |
cls notNil ifTrue:[ |
|
321 |
^ cls new. |
|
322 |
] |
|
323 |
]. |
|
324 |
]. |
|
325 |
]. |
|
326 |
^ exceptionValue value |
|
7972 | 327 |
]. |
7971 | 328 |
|
8118 | 329 |
AccessLock critical:[ |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
330 |
unicodeEncoders := EncodersByName at:#unicode ifAbsentPut:[Dictionary new]. |
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
331 |
enc := unicodeEncoders at:encodingNameSymbol ifAbsent:nil. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
332 |
]. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
333 |
enc isNil ifTrue:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
334 |
AccessLock critical:[ |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
335 |
unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsentPut:[Dictionary new]. |
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
336 |
clsName := unicodeEncoderClasses at:encodingNameSymbol ifAbsent:nil. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
337 |
]. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
338 |
clsName notNil ifTrue:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
339 |
clsName isBehavior ifTrue:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
340 |
cls := clsName |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
341 |
] ifFalse:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
342 |
cls := CharacterEncoderImplementations at:clsName. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
343 |
]. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
344 |
cls notNil ifTrue:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
345 |
enc := cls new. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
346 |
AccessLock critical:[ |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
347 |
unicodeEncoders at:encodingNameSymbol put:enc. |
8155 | 348 |
] |
349 |
]. |
|
350 |
]. |
|
7973 | 351 |
]. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
352 |
|
8118 | 353 |
enc notNil ifTrue:[ |
8155 | 354 |
^ enc |
7973 | 355 |
]. |
7971 | 356 |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
357 |
"/ no direct encoder from unicode->encodingNameSymbol |
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
358 |
"/ search for unicode->any and: any->encodingNameSymbol |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
359 |
AccessLock critical:[ |
18305 | 360 |
unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsent:nil. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
361 |
]. |
8118 | 362 |
unicodeEncoderClasses keysAndValuesDo:[:eachEncodingAlias :eachEncoderClass | |
8155 | 363 |
|dict2 enc1 enc2| |
8118 | 364 |
|
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
365 |
AccessLock critical:[ |
18305 | 366 |
dict2 := self encoderClassesByName at:eachEncodingAlias ifAbsent:nil. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
367 |
]. |
8155 | 368 |
dict2 notNil ifTrue:[ |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
369 |
clsName := dict2 at:encodingNameSymbol ifAbsent:nil. |
8155 | 370 |
clsName notNil ifTrue:[ |
371 |
clsName isBehavior ifTrue:[ |
|
8194 | 372 |
cls := clsName |
8155 | 373 |
] ifFalse:[ |
374 |
cls := CharacterEncoderImplementations at:clsName. |
|
375 |
]. |
|
376 |
cls notNil ifTrue:[ |
|
377 |
enc2 := cls new. |
|
378 |
enc1 := self encoderFor:eachEncodingAlias. |
|
379 |
(enc1 notNil and:[enc2 notNil]) ifTrue:[ |
|
380 |
enc := TwoStepEncoder new encoder1:enc1 encoder2:enc2. |
|
381 |
AccessLock critical:[ |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
382 |
unicodeEncoders at:encodingNameSymbol put:enc. |
8155 | 383 |
]. |
384 |
^ enc. |
|
385 |
] |
|
386 |
] |
|
387 |
] |
|
388 |
]. |
|
7971 | 389 |
]. |
390 |
||
18305 | 391 |
self encoderClassesByName keysAndValuesDo:[:encoding1 :dict1 | |
8194 | 392 |
dict1 keysAndValuesDo:[:encoding2 :clsName1| |
393 |
|clsName2 cls1 cls2 dict2 enc1 enc2| |
|
394 |
||
395 |
encoding2 = encodingNameSymbol ifTrue:[ |
|
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
396 |
AccessLock critical:[ |
18305 | 397 |
dict2 := self encoderClassesByName at:#unicode. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
398 |
]. |
8194 | 399 |
clsName2 := dict2 at:encoding1 ifAbsent:nil. |
400 |
clsName2 notNil ifTrue:[ |
|
401 |
clsName1 isBehavior ifTrue:[ |
|
402 |
cls1 := clsName1 |
|
403 |
] ifFalse:[ |
|
404 |
cls1 := CharacterEncoderImplementations at:clsName1. |
|
405 |
]. |
|
406 |
clsName2 isBehavior ifTrue:[ |
|
407 |
cls2 := clsName2 |
|
408 |
] ifFalse:[ |
|
409 |
cls2 := CharacterEncoderImplementations at:clsName2. |
|
410 |
]. |
|
411 |
(cls1 notNil and:[cls2 notNil]) ifTrue:[ |
|
14207 | 412 |
enc1 := cls1 new. |
413 |
enc2 := cls2 new. |
|
8194 | 414 |
enc := TwoStepEncoder new encoder1:enc1 encoder2:enc2. |
415 |
^ enc. |
|
416 |
]. |
|
417 |
] |
|
418 |
] |
|
419 |
] |
|
420 |
]. |
|
421 |
||
7971 | 422 |
^ exceptionValue value |
423 |
||
424 |
" |
|
425 |
CharacterEncoder encoderFor:#'latin1' |
|
22579 | 426 |
self encoderFor:#'iso10646-1' |
7972 | 427 |
self encoderFor:#'arabic' |
428 |
self encoderFor:#'ms-arabic' |
|
429 |
self encoderFor:#'iso8859-5' |
|
7971 | 430 |
self encoderFor:#'koi8-r' |
431 |
self encoderFor:#'koi8-u' |
|
432 |
self encoderFor:#'jis0208' |
|
433 |
self encoderFor:#'jis7' |
|
7972 | 434 |
self encoderFor:#'unicode' |
17520 | 435 |
self encoderFor:#'UTF-8' |
436 |
self encoderFor:'UTF-8' |
|
7971 | 437 |
" |
14207 | 438 |
|
439 |
"Modified: / 12-07-2012 / 19:45:58 / cg" |
|
22579 | 440 |
"Modified (comment): / 05-03-2018 / 16:04:52 / stefan" |
7971 | 441 |
! |
442 |
||
8210 | 443 |
encoderForUTF8 |
8211 | 444 |
"return an encoder-instance which can map unicode into/from utf8" |
445 |
||
8210 | 446 |
^ self encoderFor:#utf8 |
447 |
||
448 |
" |
|
22470 | 449 |
self encoderForUTF8 |
8210 | 450 |
" |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
451 |
|
22470 | 452 |
"Modified (comment): / 17-01-2018 / 13:07:31 / stefan" |
8210 | 453 |
! |
454 |
||
7971 | 455 |
encoderToEncodeFrom:oldEncodingArg into:newEncodingArg |
8135 | 456 |
|oldEncoding newEncoding encoders encoderClasses encoder decoder clsName cls| |
8118 | 457 |
|
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
458 |
oldEncoding := oldEncodingArg ? #unicode. |
22470 | 459 |
oldEncoding == #'iso10646-1' ifTrue:[ oldEncoding := #unicode]. |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
460 |
newEncoding := newEncodingArg ? #unicode. |
22470 | 461 |
newEncoding == #'iso10646-1' ifTrue:[ newEncoding := #unicode]. |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
462 |
|
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
463 |
oldEncoding = newEncoding ifTrue:[^ NullEncoderInstance]. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
464 |
(oldEncoding match:newEncoding) ifTrue:[^ NullEncoderInstance]. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
465 |
|
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
466 |
(oldEncoding = #unicode) ifTrue:[ |
22470 | 467 |
"/ unicode -> something |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
468 |
^ self encoderFor:newEncoding. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
469 |
]. |
7972 | 470 |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
471 |
oldEncoding := oldEncoding asSymbol. |
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
472 |
newEncoding := newEncoding asSymbol. |
8120 | 473 |
|
8118 | 474 |
AccessLock critical:[ |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
475 |
encoders := EncodersByName at:oldEncoding ifAbsentPut:[Dictionary new]. |
8155 | 476 |
encoder := encoders at:newEncodingArg ifAbsent:nil. |
477 |
encoder isNil ifTrue:[ |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
478 |
encoderClasses := self encoderClassesByName at:oldEncoding ifAbsentPut:[Dictionary new]. |
8155 | 479 |
clsName := encoderClasses at:newEncoding ifAbsent:nil. |
480 |
clsName notNil ifTrue:[ |
|
481 |
clsName isBehavior ifTrue:[ |
|
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
482 |
cls := clsName |
8155 | 483 |
] ifFalse:[ |
484 |
cls := CharacterEncoderImplementations at:clsName. |
|
485 |
] |
|
486 |
]. |
|
487 |
]. |
|
7971 | 488 |
]. |
8262
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
489 |
cls notNil ifTrue:[ |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
490 |
encoder := cls new. |
550c67712dfa
do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents:
8261
diff
changeset
|
491 |
]. |
7971 | 492 |
|
8118 | 493 |
encoder isNil ifTrue:[ |
22470 | 494 |
"/ something -> unicode |
495 |
decoder := self encoderFor:oldEncoding. |
|
8155 | 496 |
(newEncoding == #unicode) ifTrue:[ |
497 |
encoder := InverseEncoder new decoder:decoder. |
|
498 |
] ifFalse:[ |
|
499 |
"/ do it as: oldEncoding -> unicode -> newEncoding |
|
500 |
"/ unicode -> something |
|
501 |
encoder := self encoderFor:newEncoding. |
|
502 |
encoder := CompoundEncoder new encoder:encoder decoder:decoder. |
|
503 |
]. |
|
7971 | 504 |
]. |
505 |
||
8118 | 506 |
AccessLock critical:[ |
8155 | 507 |
(EncodersByName at:oldEncoding) at:newEncoding put:encoder |
8118 | 508 |
]. |
509 |
^ encoder |
|
7971 | 510 |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
511 |
" |
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
512 |
CharacterEncoder initialize |
7972 | 513 |
CharacterEncoder encoderToEncodeFrom:#'latin1' into:#'jis7' |
8118 | 514 |
CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'mac-cyrillic' |
8087
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
515 |
CharacterEncoder encoderToEncodeFrom:#'ms-arabic' into:#'mac-arabic' |
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
516 |
CharacterEncoder encoderToEncodeFrom:#'iso8859-5' into:#'koi8-r' |
22470 | 517 |
CharacterEncoder encoderToEncodeFrom:#'iso8859-5' into:#'unicode' |
8087
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
518 |
CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'koi8-u' |
22470 | 519 |
CharacterEncoder encoderToEncodeFrom:#'utf-8' into:#unicode |
7971 | 520 |
" |
14207 | 521 |
|
522 |
"Modified: / 12-07-2012 / 19:45:15 / cg" |
|
22470 | 523 |
"Modified: / 16-01-2018 / 17:11:17 / stefan" |
524 |
"Modified (comment): / 17-01-2018 / 12:58:32 / stefan" |
|
7971 | 525 |
! ! |
526 |
||
7932 | 527 |
!CharacterEncoder class methodsFor:'Compatibility-ST80'! |
528 |
||
25233 | 529 |
encoderNamed:encoderName |
530 |
"/ q & d hack: |
|
531 |
"/ given a name (such as cp850), return an encoder instance |
|
7932 | 532 |
|
25124 | 533 |
|e| |
534 |
||
7932 | 535 |
encoderName == #default ifTrue:[ |
11262
5de131eaba9e
changed #classMenuCompareTwoRepositoryVersions
Claus Gittinger <cg@exept.de>
parents:
11228
diff
changeset
|
536 |
^ DefaultEncoder new |
7932 | 537 |
]. |
25124 | 538 |
e := self encoderFor:encoderName asSymbolIfInterned. |
539 |
e notNil ifTrue:[ |
|
540 |
^ e |
|
541 |
]. |
|
22699 | 542 |
self halt:'should not be reached'. |
7932 | 543 |
^ self new |
25124 | 544 |
|
545 |
" |
|
546 |
self encoderNamed:'foo' |
|
547 |
self encoderNamed:'utf8' |
|
548 |
self encoderNamed:'cp850' |
|
549 |
" |
|
7932 | 550 |
! |
551 |
||
552 |
platformName |
|
553 |
^ OperatingSystem platformName |
|
554 |
||
555 |
"Created: 20.6.1997 / 17:34:03 / cg" |
|
556 |
"Modified: 20.6.1997 / 17:38:40 / cg" |
|
557 |
! ! |
|
558 |
||
11316
0b2757774461
access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents:
11300
diff
changeset
|
559 |
!CharacterEncoder class methodsFor:'accessing'! |
0b2757774461
access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents:
11300
diff
changeset
|
560 |
|
0b2757774461
access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents:
11300
diff
changeset
|
561 |
nullEncoderInstance |
0b2757774461
access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents:
11300
diff
changeset
|
562 |
^ NullEncoderInstance |
0b2757774461
access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents:
11300
diff
changeset
|
563 |
! ! |
0b2757774461
access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents:
11300
diff
changeset
|
564 |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
565 |
!CharacterEncoder class methodsFor:'class initialization'! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
566 |
|
18305 | 567 |
encoderClassesByName |
568 |
EncoderClassesByName isNil ifTrue:[ |
|
569 |
self initializeEncoderClassesByName |
|
570 |
]. |
|
571 |
^ EncoderClassesByName |
|
572 |
! |
|
573 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
574 |
initialize |
17529 | 575 |
AccessLock notNil ifTrue:[^ self]. "/ already initialized |
576 |
||
22187 | 577 |
AccessLock := RecursionLock name:'CharacterEncoder'. |
8118 | 578 |
NullEncoderInstance := NullEncoder new. |
7973 | 579 |
|
8126 | 580 |
EncodersByName := Dictionary new. |
18305 | 581 |
CachedEncoders := Dictionary new. |
582 |
||
583 |
self initializeEncoderClassesByName. |
|
584 |
||
585 |
" |
|
586 |
self initialize |
|
587 |
" |
|
588 |
||
589 |
"Modified: / 01-04-2011 / 14:30:06 / cg" |
|
590 |
"Modified (format): / 23-01-2013 / 09:56:53 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
591 |
"Modified: / 27-02-2017 / 15:43:56 / stefan" |
18305 | 592 |
! |
593 |
||
594 |
initializeEncoderClassesByName |
|
595 |
"initialize the dictionary which maps commonly used names |
|
596 |
to encoder classes. |
|
22343 | 597 |
This is done, because some encodings come along with different names" |
18305 | 598 |
|
599 |
|ud| |
|
600 |
||
8126 | 601 |
EncoderClassesByName := Dictionary new. |
7972 | 602 |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
603 |
EncoderClassesByName at:#'unicode' put:(ud := Dictionary new:237). |
21711 | 604 |
ud at:#'fontspecific' put:NullEncoder. |
8154 | 605 |
ud at:#'adobe-fontspecific' put:NullEncoder. |
21711 | 606 |
ud at:#'ms-oem' put:NullEncoder. |
607 |
ud at:#'ms-default' put:NullEncoder. |
|
8152 | 608 |
|
21711 | 609 |
"/ className decoded-name array-of-encodingNames |
8118 | 610 |
#( |
21711 | 611 |
(ASCII unicode ( ascii 'us-ascii' 'iso-ir-6' 'ibm-367' 'ms-cp367' 'cp367' 'iso646-us' 'ibm-cp367' 'ansi_x3.4-1968' )) |
22348 | 612 |
(#'ASCII::ASCII7' unicode ( ascii7)) |
8118 | 613 |
|
21711 | 614 |
(BIG5 unicode ( big5 )) |
8118 | 615 |
|
21711 | 616 |
(CNS11643 unicode ( 'cns11643' )) |
8118 | 617 |
|
21711 | 618 |
(CP437 unicode ( 'cp437' 'cp-437' 'ibm-437' 'ms-cp437' 'microsoft-cp437' 'ibm-cp437' )) |
25124 | 619 |
(CP850 unicode ( 'cp850' 'cp-850' 'ms-cp850' 'microsoft-cp850' |
620 |
'oem850' 'oem-850' 'ms-oem850' 'microsoft-oem850' )) |
|
8118 | 621 |
|
21711 | 622 |
(EBCDIC unicode ( 'ebcdic' )) |
22348 | 623 |
(#'EBCDIC::EBCDIC_037' unicode ( 'ebcdic-037' 'cp-037' 'cp-37' )) |
13063
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
624 |
|
16054 | 625 |
"/ (GB2313_1980 unicode ( 'gb2313' 'gb2313-1980' )) |
626 |
||
21711 | 627 |
(GB2312_1980_0 unicode ( 'gb2312' 'gb2312.1980' 'gb2312.1980-0')) |
8118 | 628 |
|
21711 | 629 |
(HANGUL unicode ( 'hangul' )) |
8118 | 630 |
|
21711 | 631 |
(ISO10646_1 unicode ( unicode 'iso10646_1' 'iso10646-1' 'iso-10646-1' )) |
8118 | 632 |
|
23355 | 633 |
(ISO10646_to_UTF8 unicode ( utf8 'utf-8' 'utf_8' )) |
21711 | 634 |
(ISO10646_to_UTF16BE unicode ( utf16b utf16be 'utf-16b' 'utf-16be' )) |
24474 | 635 |
(ISO10646_to_UTF16LE unicode ( utf16l utf16le 'utf-16e' 'utf-16le' 'utf-16')) |
17491 | 636 |
|
21711 | 637 |
(ISO10646_to_UTF8_MAC unicode ( 'utf8-mac' 'utf-8-mac' )) |
638 |
(ISO10646_to_XMLUTF8 unicode ( 'utf8-XML' )) |
|
8118 | 639 |
|
21711 | 640 |
(ISO8859_1 unicode ( 'iso8859_1' 'iso8859-1' 'iso-8859-1' 'latin-1' 'latin1' 'iso-ir-100' 'ibm-819' 'ms-cp819' 'ibm-cp819' 'iso8859')) |
8118 | 641 |
|
21711 | 642 |
(ISO8859_2 unicode ( 'iso8859_2' 'iso8859-2' 'iso-8859-2' 'latin2' 'latin-2' 'iso-ir-101')) |
8118 | 643 |
|
21711 | 644 |
(ISO8859_3 unicode ( 'iso8859_3' 'iso8859-3' 'iso-8859-3' 'latin3' 'latin-3' 'iso-ir-109')) |
8118 | 645 |
|
21711 | 646 |
(ISO8859_4 unicode ( 'iso8859_4' 'iso8859-4' 'iso-8859-4' 'latin4' 'latin-4' 'iso-ir-110')) |
8118 | 647 |
|
21711 | 648 |
(ISO8859_5 unicode ( 'iso8859_5' 'iso8859-5' 'iso-8859-5' 'cyrillic' 'iso-ir-144' )) |
8118 | 649 |
|
21711 | 650 |
(ISO8859_6 unicode ( 'iso8859_6' 'iso8859-6' 'iso-8859-6' 'arabic' 'asmo-708' 'ecma-114' 'iso-ir-127' )) |
8118 | 651 |
|
21711 | 652 |
(ISO8859_7 unicode ( 'iso8859_7' 'iso8859-7' 'iso-8859-7' 'greek' 'iso-ir-126' 'ecma-118')) |
8118 | 653 |
|
21711 | 654 |
(ISO8859_8 unicode ( 'iso8859_8' 'iso8859-8' 'iso-8859-8' 'hebrew' 'iso-ir-138' )) |
8118 | 655 |
|
21711 | 656 |
(ISO8859_9 unicode ( 'iso8859_9' 'iso8859-9' 'iso-8859-9' 'latin5' 'latin-5' 'iso-ir-148')) |
8118 | 657 |
|
21711 | 658 |
(ISO8859_10 unicode ( 'iso8859_10' 'iso8859-10' 'iso-8859-10' 'latin6' 'latin-6' 'iso-ir-157')) |
8118 | 659 |
|
21711 | 660 |
(ISO8859_11 unicode ( 'iso8859_11' 'iso8859-11' 'iso-8859-11' 'thai' )) |
8118 | 661 |
|
21711 | 662 |
(ISO8859_13 unicode ( 'iso8859_13' 'iso8859-13' 'iso-8859-13' 'latin7' 'latin-7' )) |
8118 | 663 |
|
21711 | 664 |
(ISO8859_14 unicode ( 'iso8859_14' 'iso8859-14' 'iso-8859-14' 'latin8' 'latin-8' 'latin-celtic' )) |
8118 | 665 |
|
21711 | 666 |
(ISO8859_15 unicode ( 'iso8859_15' 'iso8859-15' 'iso-8859-15' 'latin9' 'latin-9' 'iso-ir-203')) |
8118 | 667 |
|
21711 | 668 |
(ISO8859_16 unicode ( 'iso8859_16' 'iso8859-16' 'iso-8859-16' 'latin10' 'latin-10' )) |
8118 | 669 |
|
21711 | 670 |
(JIS0201 unicode ( 'jis0201' #'jisx0201.1976-0')) |
8118 | 671 |
|
21711 | 672 |
(JIS0208 unicode ( jis0208 'jisx0208' 'jisx0208.1983-0' 'jisx0208.1990-0')) |
8118 | 673 |
|
21711 | 674 |
(JIS0208_to_JIS7 jis0208 ( jis7 'jis-7' 'x-jis7' 'x-iso2022-jp' 'iso2022-jp')) |
8118 | 675 |
|
21711 | 676 |
(JIS0208_to_EUC jis0208 ( euc #'x-euc-jp' )) |
8122 | 677 |
|
21711 | 678 |
(JIS0208_to_SJIS jis0208 ( 'sjis' 'shiftjis' 'x-sjis' #'x-shift-jis' #'shift-jis')) |
8176 | 679 |
|
21711 | 680 |
(JIS0212 unicode ( 'jis0212' )) |
8118 | 681 |
|
21711 | 682 |
(JOHAB unicode ( 'johab' )) |
8118 | 683 |
|
21711 | 684 |
(KOI7 unicode ( 'koi7' )) |
8118 | 685 |
|
21711 | 686 |
(KOI8_R unicode ( #'koi8-r' 'cp878' )) |
8118 | 687 |
|
21711 | 688 |
(KOI8_U unicode ( #'koi8-u' )) |
8118 | 689 |
|
21711 | 690 |
(KSC5601 unicode ( #'ksc5601' )) |
8118 | 691 |
|
21711 | 692 |
(MAC_Arabic unicode ( #'mac-arabic' 'macarabic' )) |
8118 | 693 |
|
21711 | 694 |
(MAC_CentralEuropean unicode ( #'mac-centraleuropean' #'mac-centraleurope' 'maccentraleurope' 'maccentraleuropean' )) |
8118 | 695 |
|
21711 | 696 |
(MAC_Croatian unicode ( #'mac-croatian' 'maccroatian')) |
8118 | 697 |
|
21711 | 698 |
(MAC_Cyrillic unicode ( #'mac-cyrillic' 'maccyrillic' )) |
8118 | 699 |
|
21711 | 700 |
(MAC_Dingbats unicode ( #'mac-dingbats' 'macdingbats' 'macdingbat')) |
8118 | 701 |
|
21711 | 702 |
(MAC_Farsi unicode ( #'mac-farsi' 'macfarsi' )) |
8118 | 703 |
|
21711 | 704 |
(MAC_Greek unicode ( #'mac-greek' #'macgreek' )) |
8118 | 705 |
|
21711 | 706 |
(MAC_Hebrew unicode ( #'mac-hebrew' #'machebrew' )) |
8118 | 707 |
|
21711 | 708 |
(MAC_Iceland unicode ( #'mac-iceland' #'maciceland' )) |
8118 | 709 |
|
21711 | 710 |
(MAC_Japanese unicode ( #'mac-japanese' #'macjapanese' )) |
8118 | 711 |
|
21711 | 712 |
(MAC_Korean unicode ( #'mac-korean' #'mackorean' )) |
8118 | 713 |
|
21711 | 714 |
(MAC_Roman unicode ( #'mac-roman' #'macroman' 'macintosh' 'cp10000' )) |
8118 | 715 |
|
21711 | 716 |
(MAC_Romanian unicode ( #'mac-romanian' #'macromanian' )) |
8118 | 717 |
|
21711 | 718 |
(MAC_Symbol unicode ( #'mac-symbol' #'macsymbol' )) |
8118 | 719 |
|
21711 | 720 |
(MAC_Thai unicode ( #'mac-thai' #'macthai' )) |
8118 | 721 |
|
21711 | 722 |
(MAC_Turkish unicode ( #'mac-turkish' #'macturkish' )) |
8118 | 723 |
|
22587 | 724 |
(MS_Ansi unicode ( #'ms-ansi' 'microsoft-ansi')) |
725 |
||
726 |
(MS_CP1252 unicode ( 'cp1252' 'cp-1252' 'ms-cp1252' 'microsoft-cp1252' 'windows-1252' 'windows-latin1')) |
|
8118 | 727 |
|
22587 | 728 |
(MS_Arabic unicode ( 'cp1256' 'cp-1256' 'ms-arabic' 'ms-cp1256' 'microsoft-cp1256' 'microsoft-arabic' 'windows-1256' )) |
8118 | 729 |
|
22587 | 730 |
(MS_Baltic unicode ( 'cp1257' 'cp-1257' 'ms-baltic' 'ms-cp1257' 'microsoft-cp1257' 'microsoft-baltic' 'windows-1257' )) |
8118 | 731 |
|
22587 | 732 |
(MS_Cyrillic unicode ( 'cp1251' 'cp-1251' 'ms-cyrillic' 'ms-cp1251' 'microsoft-cp1251' 'microsoft-cyrillic' 'windows-1251' )) |
8118 | 733 |
|
22587 | 734 |
(MS_EastEuropean unicode ( 'cp1250' 'cp-1250' 'ms-easteuropean' 'ms-ee' 'ms-cp1250' 'microsoft-cp1250' 'microsoft-easteuropean' 'windows-1250' )) |
8118 | 735 |
|
22587 | 736 |
(MS_Greek unicode ( 'cp1253' 'cp-1253' 'ms-greek' 'ms-cp1253' 'microsoft-cp1253' 'microsoft-greek' 'windows-1253' )) |
8118 | 737 |
|
22587 | 738 |
(MS_Hebrew unicode ( 'cp1255' 'cp-1255' 'ms-hebrew' 'ms-cp1255' 'microsoft-cp1255' 'microsoft-hebrew' 'windows-1255' )) |
8118 | 739 |
|
740 |
"/ (MS_Symbol unicode ( 'ms-symbol' 'microsoft-symbol' )) |
|
741 |
||
22587 | 742 |
(MS_Turkish unicode ( 'cp1254' 'cp-1254' 'ms-turkish' 'ms-cp1254' 'microsoft-cp1254' 'microsoft-turkish' 'windows-1254' )) |
8118 | 743 |
|
21711 | 744 |
(NEXT unicode ( 'next' 'nextstep' )) |
8186 | 745 |
|
10111 | 746 |
(ISO10646_to_SGML unicode ( 'sgml' )) |
747 |
(ISO10646_to_JavaText unicode ( 'java' 'javaText' )) |
|
21711 | 748 |
|
21723 | 749 |
"/ (AdobeStandard unicode ( 'Adobe Standard' 'AdobeStandard' 'Adobe' 'adobe-standard' )) |
750 |
"/ (AdobeSymbol unicode ( 'Adobe Symbol' 'AdobeSymbol' 'Symbol' 'adobe-symbol' )) |
|
8118 | 751 |
) triplesDo:[:className :decodesTo :encodesTo | |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
752 |
|decodesToDict| |
8134 | 753 |
|
8151 | 754 |
"/ notice that the encoders are not yet installed as autoloaded. |
755 |
"/ Therefore, we remember their names here. |
|
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
756 |
decodesToDict := EncoderClassesByName at:decodesTo ifAbsentPut:[Dictionary new]. |
8151 | 757 |
encodesTo do:[:eachEncodingAlias | |
23410 | 758 |
decodesToDict at:eachEncodingAlias put:className ifPresent:[:classAlready | self halt:'conflicting alias']. |
8151 | 759 |
]. |
8118 | 760 |
]. |
22597 | 761 |
"/ flush |
762 |
"/ EncodersByName := Dictionary new. |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
763 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
764 |
" |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
765 |
self initializeEncoderClassesByName |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
766 |
" |
10111 | 767 |
|
15966 | 768 |
"Modified (format): / 23-01-2013 / 09:56:53 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
21602
c63ec4a97409
Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents:
21471
diff
changeset
|
769 |
"Modified: / 27-02-2017 / 16:17:43 / stefan" |
22348 | 770 |
"Modified: / 12-11-2017 / 13:05:38 / cg" |
23410 | 771 |
"Modified: / 08-10-2018 / 08:59:01 / Claus Gittinger" |
24474 | 772 |
"Modified: / 26-07-2019 / 16:35:46 / Stefan Vogel" |
7892 | 773 |
! ! |
774 |
||
8122 | 775 |
!CharacterEncoder class methodsFor:'constants'! |
776 |
||
777 |
jis7KanjiEscapeSequence |
|
778 |
"return the escape sequence used to switch to kanji in jis7 encoded strings. |
|
779 |
This happens to be the same as ISO2022-JP's escape sequence." |
|
780 |
||
781 |
Jis7KanjiEscapeSequence isNil ifTrue:[ |
|
14209 | 782 |
Jis7KanjiEscapeSequence := Character esc asString , '$B'. |
8122 | 783 |
]. |
784 |
^ Jis7KanjiEscapeSequence. |
|
785 |
||
786 |
"Created: 26.2.1996 / 17:38:08 / cg" |
|
787 |
"Modified: 30.6.1997 / 16:03:16 / cg" |
|
788 |
! |
|
789 |
||
790 |
jis7KanjiOldEscapeSequence |
|
791 |
"return the escape sequence used to switch to kanji in some old jis7 encoded strings." |
|
792 |
||
793 |
Jis7KanjiOldEscapeSequence isNil ifTrue:[ |
|
8856 | 794 |
Jis7KanjiOldEscapeSequence := Character esc asString , '$@'. |
8122 | 795 |
]. |
796 |
^ Jis7KanjiOldEscapeSequence. |
|
797 |
! |
|
798 |
||
799 |
jis7RomanEscapeSequence |
|
800 |
"return the escape sequence used to switch to roman in jis7 encoded strings" |
|
801 |
||
802 |
Jis7RomanEscapeSequence isNil ifTrue:[ |
|
14209 | 803 |
Jis7RomanEscapeSequence := Character esc asString , '(J'. |
8122 | 804 |
]. |
805 |
^ Jis7RomanEscapeSequence. |
|
806 |
||
807 |
"Created: 26.2.1996 / 17:38:08 / cg" |
|
808 |
"Modified: 30.6.1997 / 16:03:16 / cg" |
|
809 |
! |
|
810 |
||
811 |
jisISO2022EscapeSequence |
|
812 |
"return the escape sequence used to switch to kanji in iso2022 encoded strings" |
|
813 |
||
814 |
JisISO2022EscapeSequence isNil ifTrue:[ |
|
14209 | 815 |
JisISO2022EscapeSequence := Character esc asString , '&@' , Character esc asString , '$B'. |
8122 | 816 |
]. |
817 |
^ JisISO2022EscapeSequence. |
|
818 |
! ! |
|
819 |
||
7892 | 820 |
!CharacterEncoder class methodsFor:'encoding & decoding'! |
821 |
||
22470 | 822 |
decodeString:anEncodedStringOrByteCollection |
823 |
^ self new decodeString:anEncodedStringOrByteCollection |
|
7892 | 824 |
|
22470 | 825 |
" |
826 |
CharacterEncoderImplementations::ISO8859_1 decodeString:'hello' |
|
827 |
CharacterEncoderImplementations::ISO8859_1 decodeString:'hello' asByteArray |
|
828 |
" |
|
829 |
||
830 |
"Modified (comment): / 17-01-2018 / 13:44:41 / stefan" |
|
7892 | 831 |
! |
832 |
||
7972 | 833 |
decodeString:aString from:oldEncoding |
22470 | 834 |
^ self encodeString:aString from:oldEncoding into:#unicode |
7892 | 835 |
|
836 |
" |
|
22470 | 837 |
self encodeString:'hello' into:#ebcdic |
7892 | 838 |
|
22470 | 839 |
self decodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic |
7892 | 840 |
" |
22470 | 841 |
|
842 |
"Modified (format): / 17-01-2018 / 15:47:00 / stefan" |
|
7892 | 843 |
! |
844 |
||
7994 | 845 |
encode:codePoint from:oldEncodingArg into:newEncodingArg |
8015 | 846 |
|oldEncoding newEncoding encoder| |
7994 | 847 |
|
22470 | 848 |
oldEncodingArg == newEncodingArg ifTrue:[ |
849 |
^ codePoint |
|
850 |
]. |
|
851 |
oldEncoding := oldEncodingArg. |
|
852 |
newEncoding := newEncodingArg. |
|
7994 | 853 |
|
22470 | 854 |
(oldEncoding isNil or:[oldEncoding == #'iso10646-1' or:[oldEncoding == #'ms-default']]) ifTrue:[ |
855 |
oldEncoding := #unicode |
|
856 |
]. |
|
7994 | 857 |
|
22470 | 858 |
(newEncoding isNil or:[newEncoding == #'iso10646-1' or:[newEncoding == #'ms-default']]) ifTrue:[ |
859 |
newEncoding := #unicode. |
|
860 |
]. |
|
861 |
||
862 |
oldEncoding == newEncoding ifTrue:[ |
|
863 |
^ codePoint |
|
8016 | 864 |
]. |
22470 | 865 |
|
866 |
(oldEncoding == #unicode and:[newEncoding == #'iso8859-1' and:[codePoint <= 16rFF]]) ifTrue:[ |
|
867 |
^ codePoint |
|
8016 | 868 |
]. |
22470 | 869 |
(newEncoding == #unicode and:[oldEncoding == #'iso8859-1' and:[codePoint <= 16rFF]]) ifTrue:[ |
870 |
^ codePoint |
|
871 |
]. |
|
872 |
||
8118 | 873 |
encoder := self encoderToEncodeFrom:oldEncoding into:newEncoding. |
8015 | 874 |
^ encoder encode:codePoint. |
22470 | 875 |
|
876 |
"Modified: / 17-01-2018 / 14:33:08 / stefan" |
|
7994 | 877 |
! |
878 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
879 |
encodeString:aUnicodeString |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
880 |
"given a string in unicode, return a string in my encoding for it" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
881 |
|
7912 | 882 |
^ self new encodeString:aUnicodeString |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
883 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
884 |
" |
22470 | 885 |
CharacterEncoderImplementations::ISO8859_1 encodeString:'hello' |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
886 |
" |
22470 | 887 |
|
888 |
"Modified (comment): / 16-01-2018 / 21:57:35 / stefan" |
|
7914 | 889 |
! |
890 |
||
7967 | 891 |
encodeString:aString from:oldEncodingArg into:newEncodingArg |
8015 | 892 |
|oldEncoding newEncoding encoder| |
7967 | 893 |
|
22470 | 894 |
oldEncodingArg == newEncodingArg ifTrue:[ |
895 |
^ aString |
|
896 |
]. |
|
14916 | 897 |
|
22470 | 898 |
oldEncoding := oldEncodingArg. |
899 |
newEncoding := newEncodingArg. |
|
900 |
"/ some hard coded aliases |
|
901 |
(oldEncoding isNil or:[oldEncoding == #'iso10646-1' or:[oldEncoding == #'ms-default']]) ifTrue:[ |
|
902 |
oldEncoding := #'unicode' |
|
903 |
]. |
|
7972 | 904 |
|
22470 | 905 |
(newEncoding isNil or:[newEncoding == #'iso10646-1' or:[newEncoding == #'ms-default']]) ifTrue:[ |
906 |
newEncoding := #'unicode' |
|
907 |
]. |
|
908 |
||
909 |
oldEncoding == newEncoding ifTrue:[ |
|
910 |
^ aString |
|
911 |
]. |
|
7967 | 912 |
|
14916 | 913 |
"/ for single-byte strings, iso8859-1 and unicode (up to FF) have the same encoding |
22470 | 914 |
(oldEncoding == #unicode and:[newEncoding == #'iso8859-1' and:[aString isWideString not]]) ifTrue:[ |
915 |
^ aString |
|
8016 | 916 |
]. |
22470 | 917 |
(newEncoding == #unicode and:[oldEncoding == #'iso8859-1' and:[aString isWideString not]]) ifTrue:[ |
918 |
^ aString |
|
8016 | 919 |
]. |
920 |
||
8118 | 921 |
encoder := self encoderToEncodeFrom:oldEncoding into:newEncoding. |
8015 | 922 |
^ encoder encodeString:aString. |
22470 | 923 |
|
924 |
" |
|
925 |
self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#ascii |
|
926 |
self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#unicode |
|
23981 | 927 |
self encodeString:(self encodeString:'Äh ... hello' into:#ebcdic) from:#ebcdic into:#utf8 |
22470 | 928 |
" |
929 |
||
930 |
"Modified (comment): / 17-01-2018 / 15:49:40 / stefan" |
|
7972 | 931 |
! |
932 |
||
933 |
encodeString:aString into:newEncoding |
|
22470 | 934 |
^ self encodeString:aString from:#unicode into:newEncoding |
13063
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
935 |
|
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
936 |
" |
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
937 |
self encodeString:'hello' into:#ebcdic |
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
938 |
|
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
939 |
self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#ascii |
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
940 |
self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#unicode |
22470 | 941 |
self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#utf8 |
13063
a17ba204b911
comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents:
12608
diff
changeset
|
942 |
" |
22470 | 943 |
|
944 |
"Modified (comment): / 17-01-2018 / 15:48:07 / stefan" |
|
7892 | 945 |
! ! |
946 |
||
947 |
!CharacterEncoder class methodsFor:'private'! |
|
948 |
||
949 |
flushCode |
|
8127 | 950 |
self initialize. |
7914 | 951 |
|
7892 | 952 |
self isAbstract ifFalse:[ |
14209 | 953 |
(self mapFileURL1_relativePathName notNil |
954 |
or:[ self mapFileURL2_relativePathName notNil]) ifTrue:[ |
|
955 |
self class removeSelector:#mapping. |
|
956 |
]. |
|
7892 | 957 |
]. |
958 |
||
959 |
" |
|
960 |
self flushCode |
|
961 |
" |
|
962 |
! ! |
|
963 |
||
964 |
!CharacterEncoder class methodsFor:'private-mapping setup'! |
|
965 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
966 |
generateCode |
7909 | 967 |
(CharacterEncoderCodeGenerator new targetClass:self) generateCode. |
968 |
! |
|
969 |
||
970 |
generateSubclassCode |
|
971 |
(CharacterEncoderCodeGenerator new targetClass:self) generateSubclassCode. |
|
7892 | 972 |
! |
973 |
||
7914 | 974 |
mapFileURL1_codeColumn |
975 |
^ 1 |
|
976 |
! |
|
977 |
||
7912 | 978 |
mapFileURL1_relativePathName |
21711 | 979 |
"must be redefined in concrete subclass(es)" |
7912 | 980 |
|
981 |
^ nil |
|
982 |
! |
|
983 |
||
984 |
mapFileURL2_relativePathName |
|
21711 | 985 |
"must be redefined in concrete subclass(es)" |
7912 | 986 |
|
987 |
^ nil |
|
988 |
! |
|
989 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
990 |
mappingURL1 |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
991 |
|rel| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
992 |
|
7912 | 993 |
rel := self mapFileURL1_relativePathName. |
994 |
rel isNil ifTrue:[ |
|
14209 | 995 |
^ nil |
7912 | 996 |
]. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
997 |
^ 'http://www.unicode.org/Public/MAPPINGS/' , rel |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
998 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
999 |
|
7892 | 1000 |
mappingURL2 |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1001 |
|rel| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1002 |
|
7912 | 1003 |
rel := self mapFileURL2_relativePathName. |
1004 |
rel isNil ifTrue:[ |
|
14209 | 1005 |
^ nil |
7912 | 1006 |
]. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1007 |
^ 'http://std.dkuug.dk/i18n/charmaps/' , rel |
7892 | 1008 |
! ! |
1009 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1010 |
!CharacterEncoder class methodsFor:'queries'! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1011 |
|
19465 | 1012 |
isAbstract |
1013 |
"Return if this class is an abstract class. |
|
1014 |
True is returned for CharacterEncoder here; false for subclasses. |
|
1015 |
Abstract subclasses must redefine this again." |
|
1016 |
||
1017 |
^ self == CharacterEncoder |
|
1018 |
! |
|
1019 |
||
7938 | 1020 |
isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg |
7994 | 1021 |
"return true, if superSetEncoding encoding includes all characters of subSetEncoding. |
1022 |
(this means: characters are included - not that they have the same encoding)" |
|
7938 | 1023 |
|
1024 |
|subSetEncoding superSetEncoding| |
|
1025 |
||
1026 |
subSetEncodingArg = superSetEncodingArg ifTrue:[^ true]. |
|
1027 |
subSetEncoding := subSetEncodingArg asLowercase. |
|
1028 |
superSetEncoding := superSetEncodingArg asLowercase. |
|
1029 |
||
1030 |
(subSetEncoding match:superSetEncoding) ifTrue:[^ true]. |
|
1031 |
||
8214
406c7fc10e12
assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents:
8211
diff
changeset
|
1032 |
(('iso10646*' match:superSetEncoding) |
406c7fc10e12
assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents:
8211
diff
changeset
|
1033 |
or:[superSetEncoding = 'unicode' |
24002 | 1034 |
or:[superSetEncoding = 'ms-ansi' |
1035 |
or:[superSetEncoding = 'ms-default']]]) ifTrue:[ |
|
8214
406c7fc10e12
assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents:
8211
diff
changeset
|
1036 |
"/ assume that any character is in unicode |
406c7fc10e12
assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents:
8211
diff
changeset
|
1037 |
^ true. |
7938 | 1038 |
]. |
1039 |
||
1040 |
"/ if the subSet is iso8859-*, that means ascii (i.e. the lower 7 bits of iso8859 only). |
|
1041 |
((subSetEncoding = 'iso8859*') or:[subSetEncoding = 'iso8859-*']) ifTrue:[ |
|
8168 | 1042 |
('ascii*' match:superSetEncoding) ifTrue:[^ true]. |
1043 |
('ms-ansi*' match:superSetEncoding) ifTrue:[^ true]. |
|
24002 | 1044 |
('ms-default*' match:superSetEncoding) ifTrue:[^ true]. |
7938 | 1045 |
]. |
1046 |
(subSetEncoding = 'ascii') ifTrue:[ |
|
8168 | 1047 |
('iso8859*' match:superSetEncoding) ifTrue:[^ true]. |
1048 |
('ms-ansi*' match:superSetEncoding) ifTrue:[^ true]. |
|
24002 | 1049 |
('ms-default*' match:superSetEncoding) ifTrue:[^ true]. |
7938 | 1050 |
]. |
1051 |
||
7923 | 1052 |
"/ TODO: check the charSets mappingTables... |
1053 |
"/ self halt. |
|
1054 |
^ false. |
|
1055 |
! |
|
1056 |
||
7919 | 1057 |
nameOfDecodedCode |
1058 |
"Most coders decode from their code into unicode / encode from unicode into their code. |
|
1059 |
There are a few exceptions to this, though - these must redefine this." |
|
1060 |
||
1061 |
^ #'unicode' |
|
1062 |
! |
|
1063 |
||
1064 |
nameOfEncoding |
|
7974 | 1065 |
^ (self nameWithoutPrefix asLowercase copyReplaceAll:$_ with:$-) asSymbol |
7919 | 1066 |
! |
1067 |
||
7959 | 1068 |
supportedExternalEncodings |
1069 |
"return an array of arrays containing the names of supported |
|
1070 |
encodings which are supported for external resources (i.e. files). |
|
1071 |
The first element contains the internally used symbolic name, |
|
1072 |
the second contains a user-readable string (description). |
|
1073 |
More than one external name may be mapped onto the same symbolic." |
|
1074 |
||
1075 |
^ #( |
|
8176 | 1076 |
('utf8' 'Unicode as 8Bit characters' ) |
8904 | 1077 |
('utf16BE' 'Unicode as 16Bit big-endian' ) |
1078 |
('utf16LE' 'Unicode as 16Bit little-endian' ) |
|
8176 | 1079 |
"/ ('utf7' 'Unicode as 7Bit characters' ) |
1080 |
"/ nil |
|
1081 |
('ascii' 'Common 7bit subset of iso8859' ) |
|
14188 | 1082 |
('iso8859-1' 'Western' ) |
1083 |
('iso8859-2' 'Central European' ) |
|
1084 |
('iso8859-3' 'South European' ) |
|
1085 |
('iso8859-4' 'Baltic' ) |
|
8176 | 1086 |
('iso8859-5' 'Cyrillic' ) |
1087 |
('iso8859-6' 'Arabic' ) |
|
1088 |
('iso8859-7' 'Greek' ) |
|
1089 |
('iso8859-8' 'Hebrew' ) |
|
14188 | 1090 |
('iso8859-15' 'Western with Euro' ) |
1091 |
('iso8859-16' 'South European with Euro' ) |
|
8176 | 1092 |
"/ nil |
16522 | 1093 |
('macintosh' 'MAC Western' ) |
1094 |
"/ nil |
|
8176 | 1095 |
('koi7' 'Cyrillic (Old)' ) |
1096 |
('koi8-r' 'Cyrillic' ) |
|
1097 |
('koi8-u' 'Cyrillic (Ukraine)' ) |
|
1098 |
"/ nil |
|
14188 | 1099 |
('cp437' 'Windows US / codepage 437' ) |
1100 |
('cp850' 'Windows Latin1 / codepage 850' ) |
|
1101 |
('cp1250' 'Windows Latin2 / codepage 1250' ) |
|
1102 |
('cp1251' 'Windows Cyrillic / codepage 1251') |
|
22584 | 1103 |
('cp1252' 'Windows ANSI / codepage 1252' ) |
8176 | 1104 |
"/ ('mac' 'macintosh 8 bit' ) |
1105 |
('next' 'NeXT 8 bit' ) |
|
1106 |
"/ ('hp' 'hpux 8 bit' ) |
|
1107 |
"/ nil |
|
1108 |
('euc' 'EUC - extended unix code (japanese)' ) |
|
1109 |
('jis7' 'JIS7 - jis 7bit escape codes (japanese)' ) |
|
1110 |
('iso-2022-jp' 'Same as jis 7bit' ) |
|
1111 |
('sjis' 'SJIS - shift jis 8bit codes (japanese)' ) |
|
1112 |
"/ nil |
|
1113 |
('gb' 'GB - mainland china' ) |
|
1114 |
('big5' 'BIG5 - taiwan' ) |
|
7959 | 1115 |
"/ ('ksc' 'korean' ) |
8186 | 1116 |
('sgml' 'SGML (XML/HTML) character escapes' ) |
10111 | 1117 |
('java' 'JavaText (\uXXXX) character escapes' ) |
7959 | 1118 |
) |
10111 | 1119 |
|
1120 |
"Modified: / 23-10-2006 / 13:27:48 / cg" |
|
7959 | 1121 |
! |
1122 |
||
7947 | 1123 |
userFriendlyNameOfEncoding |
7972 | 1124 |
^ self nameOfEncoding asUppercaseFirst |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1125 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1126 |
|
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1127 |
!CharacterEncoder class methodsFor:'utilities'! |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1128 |
|
23982 | 1129 |
detectAndSkipBOMInStream:stream |
1130 |
"skips over the BOM and returns one of |
|
1131 |
#utf8 |
|
1132 |
#utf32be |
|
1133 |
#utf32le |
|
1134 |
#utf16le |
|
1135 |
#utf16be |
|
1136 |
if no BOM is detected, the stream is repositions to where it was before." |
|
1137 |
||
1138 |
|pos byte1| |
|
1139 |
||
1140 |
pos := stream position. |
|
1141 |
stream atEnd ifTrue:[^ nil]. |
|
24012 | 1142 |
byte1 := stream peek asInteger. |
23982 | 1143 |
"/ EF-BB-BF -> utf8 |
1144 |
byte1 == 16rEF ifTrue:[ |
|
1145 |
stream next. |
|
24012 | 1146 |
stream peek asInteger == 16rBB ifTrue:[ |
23982 | 1147 |
stream next. |
24012 | 1148 |
stream next asInteger == 16rBF ifTrue:[ |
23982 | 1149 |
^ #utf8 |
1150 |
] |
|
1151 |
]. |
|
1152 |
stream position:pos. ^nil |
|
1153 |
]. |
|
1154 |
"00-00-FE-FF big endian utf32" |
|
1155 |
byte1 == 16r00 ifTrue:[ |
|
1156 |
stream next. |
|
24012 | 1157 |
stream peek asInteger == 16r00 ifTrue:[ |
23982 | 1158 |
stream next. |
24012 | 1159 |
stream peek asInteger == 16rFE ifTrue:[ |
23982 | 1160 |
stream next. |
24012 | 1161 |
stream next asInteger == 16rFF ifTrue:[ |
23982 | 1162 |
^ #utf32be |
1163 |
] |
|
1164 |
] |
|
1165 |
]. |
|
1166 |
stream position:pos. ^nil |
|
1167 |
]. |
|
1168 |
||
1169 |
"FF-FE little endian utf16 or utf32" |
|
1170 |
byte1 == 16rFF ifTrue:[ |
|
1171 |
stream next. |
|
24012 | 1172 |
stream peek asInteger == 16rFE ifTrue:[ |
23982 | 1173 |
stream next. |
24012 | 1174 |
stream peek asInteger == 0 ifTrue:[ |
23982 | 1175 |
stream next. |
24012 | 1176 |
stream next asInteger == 0 ifTrue:[ |
23982 | 1177 |
"FF-FE-00-00 little endian utf32" |
1178 |
^ #utf32le. |
|
1179 |
]. |
|
1180 |
stream skip:-2 |
|
1181 |
]. |
|
1182 |
^ #utf16le |
|
1183 |
]. |
|
1184 |
stream position:pos. ^nil |
|
1185 |
]. |
|
1186 |
||
1187 |
"FE-FF big endian utf16" |
|
1188 |
byte1 == 16rFE ifTrue:[ |
|
1189 |
stream next. |
|
24012 | 1190 |
stream next asInteger == 16rFF ifTrue:[ |
23982 | 1191 |
^ #utf16be |
1192 |
]. |
|
1193 |
]. |
|
1194 |
stream position:pos. |
|
1195 |
^ nil |
|
1196 |
||
1197 |
" |
|
1198 |
|s enc| |
|
1199 |
||
1200 |
s := #[1 2 3 4] readStream. |
|
1201 |
enc := self detectAndSkipBOMInStream:s. |
|
1202 |
self assert:(enc == nil). |
|
1203 |
self assert:(s position == 0). |
|
1204 |
||
1205 |
s := #[16rFF 2 3 4] readStream. |
|
1206 |
enc := self detectAndSkipBOMInStream:s. |
|
1207 |
self assert:(enc == nil). |
|
1208 |
self assert:(s position == 0). |
|
1209 |
||
1210 |
s := #[16rFF 16rFE 3 4] readStream. |
|
1211 |
enc := self detectAndSkipBOMInStream:s. |
|
1212 |
self assert:(enc == #utf16le). |
|
1213 |
self assert:(s position == 2). |
|
1214 |
||
1215 |
s := #[16rFE 16rFF 3 4] readStream. |
|
1216 |
enc := self detectAndSkipBOMInStream:s. |
|
1217 |
self assert:(enc == #utf16be). |
|
1218 |
self assert:(s position == 2). |
|
1219 |
||
1220 |
s := #[16rFF 16rFE 0 0 3 4] readStream. |
|
1221 |
enc := self detectAndSkipBOMInStream:s. |
|
1222 |
self assert:(enc == #utf32le). |
|
1223 |
self assert:(s position == 4). |
|
1224 |
||
1225 |
s := #[0 0 16rFE 16rFF 0 0 3 4] readStream. |
|
1226 |
enc := self detectAndSkipBOMInStream:s. |
|
1227 |
self assert:(enc == #utf32be). |
|
1228 |
self assert:(s position == 4). |
|
1229 |
||
1230 |
" |
|
1231 |
! |
|
1232 |
||
23981 | 1233 |
detectBOMInBuffer:buffer |
1234 |
"returns one of |
|
1235 |
#utf8 |
|
1236 |
#utf32be |
|
1237 |
#utf32le |
|
1238 |
#utf16le |
|
1239 |
#utf16be |
|
1240 |
nil" |
|
1241 |
||
23982 | 1242 |
^ self detectAndSkipBOMInStream:(buffer readStream) |
23981 | 1243 |
! |
1244 |
||
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1245 |
guessEncodingOfBuffer:buffer |
19465 | 1246 |
"try to guess a string-buffer's encoding. |
23981 | 1247 |
Basically looks for BOM (byte order marks) |
1248 |
pr a special string of the form |
|
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1249 |
encoding #name |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1250 |
or: |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1251 |
encoding: name |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1252 |
within the given buffer |
23981 | 1253 |
(which is usually found within the first few bytes of a textFile). |
1254 |
Many editors and tools write such comments (eg. emacs, st/x, etc.)" |
|
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1255 |
|
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1256 |
buffer size < 4 ifTrue:[ |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1257 |
"not enough bytes to determine the contents" |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1258 |
^ nil. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1259 |
]. |
19465 | 1260 |
EncodingDetectors isNil ifTrue:[ |
1261 |
self initializeEncodingDetectors. |
|
1262 |
]. |
|
1263 |
EncodingDetectors do:[:each | |
|
1264 |
|guess| |
|
10672
b6230a13035b
#guessEncodingOfBuffer - do NOT handle encoding=utf8
Stefan Vogel <sv@exept.de>
parents:
10111
diff
changeset
|
1265 |
|
19465 | 1266 |
(guess := each value:buffer) notNil ifTrue:[ |
1267 |
^ guess |
|
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1268 |
]. |
19465 | 1269 |
]. |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1270 |
^ nil |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1271 |
! |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1272 |
|
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1273 |
guessEncodingOfFile:aFilename |
23981 | 1274 |
"look for a BOM (byte order mark) or a special string of the form: |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1275 |
encoding #name |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1276 |
or: |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1277 |
encoding: name |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1278 |
within the given buffer |
23981 | 1279 |
(which is usually found in the first few bytes of a textFile). |
20403 | 1280 |
If that's not found, use heuristics (in CharacterArray) to guess. |
18762 | 1281 |
Return a symbol like #utf8." |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1282 |
|
22470 | 1283 |
|s buffer| |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1284 |
|
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1285 |
s := aFilename asFilename readStreamOrNil. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1286 |
s isNil ifTrue:[^ nil]. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1287 |
|
20645 | 1288 |
buffer := String new:512. |
22470 | 1289 |
s nextBytes:buffer size into:buffer. |
14169
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1290 |
s close. |
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1291 |
|
eab487f07a2b
comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents:
14094
diff
changeset
|
1292 |
^ self guessEncodingOfBuffer:buffer. |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1293 |
|
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1294 |
" |
14094 | 1295 |
self guessEncodingOfFile:'../../libview/resources/de.rs' asFilename |
1296 |
self guessEncodingOfFile:'../../libview/resources/ru.rs' asFilename |
|
1297 |
self guessEncodingOfFile:'../../libview/resources/th.rs' asFilename |
|
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1298 |
" |
13382 | 1299 |
|
1300 |
"Modified: / 31-05-2011 / 15:45:19 / cg" |
|
22470 | 1301 |
"Modified: / 16-01-2018 / 17:12:41 / stefan" |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1302 |
! |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1303 |
|
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1304 |
guessEncodingOfStream:aStream |
23981 | 1305 |
"look for a BOM (byte order mark) or a special string of the form: |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1306 |
encoding #name |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1307 |
or: |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1308 |
encoding: name |
18762 | 1309 |
in the first few bytes of aStream. |
1310 |
Return a symbol like #utf8." |
|
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1311 |
|
22470 | 1312 |
|oldPosition buffer| |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1313 |
|
20645 | 1314 |
"/ must be able to position back |
22470 | 1315 |
aStream isPositionable ifFalse:[ |
1316 |
^ nil |
|
1317 |
]. |
|
20645 | 1318 |
|
1319 |
buffer := String new:512. |
|
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1320 |
|
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1321 |
oldPosition := aStream position. |
22470 | 1322 |
aStream nextBytes:buffer size into:buffer. |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1323 |
aStream position:oldPosition. |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1324 |
|
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1325 |
^ self guessEncodingOfBuffer:buffer |
13382 | 1326 |
|
1327 |
"Modified: / 31-05-2011 / 15:45:23 / cg" |
|
22470 | 1328 |
"Modified: / 16-01-2018 / 17:12:57 / stefan" |
1329 |
"Modified (format): / 17-01-2018 / 15:51:09 / stefan" |
|
8810 | 1330 |
! |
1331 |
||
19465 | 1332 |
initializeEncodingDetectors |
1333 |
"setup the list of encoding detectors. |
|
1334 |
This is a list of blocks, which get a buffer as argument, |
|
1335 |
and return an encoding symbol or nil. |
|
1336 |
Can be customized for more detectors |
|
1337 |
(used to be hard-coded in guessEncodingOfBuffer:)" |
|
1338 |
||
1339 |
EncodingDetectors := OrderedCollection new. |
|
1340 |
||
1341 |
"check for Unicode Byte Order Marks (BOM)" |
|
23981 | 1342 |
EncodingDetectors add:[:buffer | self detectBOMInBuffer:buffer]. |
19465 | 1343 |
|
1344 |
"check for an inline encoding markup (charset= / encoding=) substring" |
|
1345 |
EncodingDetectors |
|
1346 |
add:[:buffer | |
|
22470 | 1347 |
|guess lcBuffer quote| |
19465 | 1348 |
|
1349 |
lcBuffer := buffer asLowercase. |
|
1350 |
||
1351 |
guess := |
|
1352 |
#(charset encoding) doWithExit:[:keyWord :exit | |
|
1353 |
|encoderOrNil idx s w enc| |
|
1354 |
||
1355 |
guess isNil ifTrue:[ |
|
1356 |
(idx := lcBuffer findString:keyWord) ~~ 0 ifTrue:[ |
|
1357 |
s := ReadStream on:buffer. |
|
22470 | 1358 |
s position:idx-1 + keyWord size. |
19465 | 1359 |
s skipSeparators. |
1360 |
||
1361 |
"do not include '=' here, otherwise |
|
1362 |
files containing xml code (<?xml charset='utf8'> will be parsed as UTF-8" |
|
1363 |
||
1364 |
[':#=' includes:s peek] whileTrue:[ |
|
1365 |
s next. |
|
1366 |
s skipSeparators. |
|
1367 |
]. |
|
1368 |
s skipSeparators. |
|
1369 |
('"''' includes:s peek) ifTrue:[ |
|
1370 |
quote := s next. |
|
1371 |
w := s upTo:quote. |
|
1372 |
] ifFalse:[ |
|
1373 |
w := s upToElementForWhich:[:ch | ch isSeparator or:[ch == $" or:[ch == $' or:[ch == $> ]]]]. |
|
1374 |
]. |
|
1375 |
w notNil ifTrue:[ |
|
1376 |
enc := w withoutQuotes. |
|
1377 |
(enc startsWith:'x-') ifTrue:[ |
|
1378 |
enc := enc copyFrom:3. |
|
1379 |
]. |
|
1380 |
encoderOrNil := self encoderFor:enc ifAbsent:nil. |
|
1381 |
encoderOrNil notNil ifTrue:[ |
|
1382 |
exit value:(encoderOrNil nameOfEncoding) |
|
1383 |
]. |
|
1384 |
]. |
|
1385 |
]. |
|
1386 |
]. |
|
1387 |
nil |
|
1388 |
]. |
|
1389 |
guess |
|
1390 |
]. |
|
22587 | 1391 |
|
1392 |
"/ check for a string like /*@!!Encoding:1252*/ |
|
1393 |
EncodingDetectors |
|
1394 |
add:[:buffer | |
|
1395 |
|guess idx s keyWord codePageNr enc encoderOrNil| |
|
1396 |
||
1397 |
keyWord := '@!!Encoding:'. |
|
1398 |
(idx := buffer findString:keyWord) ~~ 0 ifTrue:[ |
|
1399 |
s := ReadStream on:buffer. |
|
1400 |
s position:idx-1 + keyWord size. |
|
1401 |
s skipSeparators. |
|
1402 |
||
1403 |
s peek isDigit ifTrue:[ |
|
1404 |
codePageNr := Integer readFrom:s. |
|
1405 |
enc := 'cp%1' bindWith:codePageNr. |
|
1406 |
encoderOrNil := self encoderFor:enc ifAbsent:nil. |
|
1407 |
encoderOrNil notNil ifTrue:[ |
|
1408 |
guess := (encoderOrNil nameOfEncoding) |
|
1409 |
]. |
|
1410 |
]. |
|
1411 |
]. |
|
1412 |
guess |
|
1413 |
]. |
|
1414 |
||
19465 | 1415 |
"/ check for JIS7 encoding |
1416 |
EncodingDetectors |
|
1417 |
add:[:buffer | |
|
22470 | 1418 |
(buffer includesString:self jisISO2022EscapeSequence) ifTrue:[ |
19465 | 1419 |
#'iso2020-jp' |
1420 |
] ifFalse:[ |
|
22470 | 1421 |
(buffer includesString:self jis7KanjiEscapeSequence) ifTrue:[ |
19465 | 1422 |
#jis7 |
1423 |
] ifFalse:[ |
|
22470 | 1424 |
(buffer includesString:self jis7KanjiOldEscapeSequence) ifTrue:[ |
19465 | 1425 |
#jis7 |
1426 |
] ifFalse:[ |
|
1427 |
nil |
|
1428 |
] |
|
1429 |
] |
|
1430 |
] |
|
1431 |
]. |
|
1432 |
||
1433 |
"/ TODO: look for EUC, SJIS etc. |
|
1434 |
"/ Disabled, due to too many false positives. |
|
1435 |
"/ if required, think about it, fix it and uncomment it |
|
1436 |
"/ EncodingDetectors |
|
1437 |
"/ add:[:buffer | |
|
1438 |
"/ |guess idx| |
|
1439 |
"/ |
|
1440 |
"/ idx := buffer |
|
1441 |
"/ findFirst:[:char | |
|
1442 |
"/ |code| |
|
1443 |
"/ code := char codePoint. |
|
1444 |
"/ code between:16rA1 and: 16rFE |
|
1445 |
"/ ]. |
|
1446 |
"/ ((idx ~~ 0) |
|
1447 |
"/ and:[ (buffer at:(idx + 1)) codePoint between:16rA1 and: 16rFE ]) |
|
1448 |
"/ ifTrue:[ |
|
1449 |
"/ guess := #euc |
|
1450 |
"/ ] ifFalse:[ |
|
1451 |
"/ "/ look for SJIS ... |
|
1452 |
"/ ] |
|
1453 |
"/ ]. |
|
22470 | 1454 |
|
1455 |
"Modified: / 17-01-2018 / 15:55:36 / stefan" |
|
23662 | 1456 |
"Modified: / 05-02-2019 / 09:23:37 / Claus Gittinger" |
19465 | 1457 |
! |
1458 |
||
8810 | 1459 |
showCharacterSet |
1460 |
|font| |
|
1461 |
||
1462 |
font := View defaultFont. |
|
14206
70aa64d89dca
comment/format in: #showCharacterSet
Stefan Vogel <sv@exept.de>
parents:
14188
diff
changeset
|
1463 |
"/ font := (Font family:'courier' face:'medium' style:'roman' size:12 encoding:'iso10646-1'). |
8810 | 1464 |
|
1465 |
CharacterSetView |
|
1466 |
openOn:font |
|
1467 |
label:'Characters of ',self nameWithoutPrefix |
|
1468 |
clickLabel:nil |
|
1469 |
asInputFor:nil |
|
1470 |
encoder:self |
|
1471 |
||
1472 |
" |
|
14206
70aa64d89dca
comment/format in: #showCharacterSet
Stefan Vogel <sv@exept.de>
parents:
14188
diff
changeset
|
1473 |
CharacterEncoderImplementations::MS_Ansi showCharacterSet |
19465 | 1474 |
CharacterEncoderImplementations::ISO8859_1 showCharacterSet |
1475 |
CharacterEncoderImplementations::ISO8859_2 showCharacterSet |
|
1476 |
CharacterEncoderImplementations::ISO8859_3 showCharacterSet |
|
1477 |
CharacterEncoderImplementations::ISO8859_4 showCharacterSet |
|
1478 |
CharacterEncoderImplementations::ISO8859_5 showCharacterSet |
|
1479 |
CharacterEncoderImplementations::ISO8859_6 showCharacterSet |
|
1480 |
CharacterEncoderImplementations::ISO8859_7 showCharacterSet |
|
1481 |
CharacterEncoderImplementations::ISO8859_8 showCharacterSet |
|
1482 |
CharacterEncoderImplementations::ISO8859_9 showCharacterSet |
|
8810 | 1483 |
" |
8711
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1484 |
! ! |
c5f28b4c719d
guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents:
8388
diff
changeset
|
1485 |
|
7892 | 1486 |
!CharacterEncoder methodsFor:'encoding & decoding'! |
1487 |
||
22470 | 1488 |
decodeString:anEncodedStringOrByteCollection |
7892 | 1489 |
"given a string in my encoding, return a unicode-string for it" |
1490 |
||
22470 | 1491 |
^ self subclassResponsibility |
8118 | 1492 |
|
22470 | 1493 |
"Modified: / 16-01-2018 / 19:54:51 / stefan" |
1494 |
"Modified (format): / 17-01-2018 / 13:45:06 / stefan" |
|
7892 | 1495 |
! |
1496 |
||
22470 | 1497 |
encodeCharacter:aUnicodeCharacterOrCodePoint |
1498 |
"encode aUnicodeCharacterOrCodePoint to a (8-bit) String or ByteArray" |
|
7892 | 1499 |
|
22470 | 1500 |
^ self encodeString:aUnicodeCharacterOrCodePoint asString. |
21471 | 1501 |
|
22470 | 1502 |
"Created: / 17-01-2018 / 13:59:44 / stefan" |
21471 | 1503 |
! |
1504 |
||
7892 | 1505 |
encodeString:aUnicodeString |
22470 | 1506 |
"given a string in unicode, return a string or ByteArray in my encoding for it" |
8150
ba9c6e587973
care for bitsPerCharacter change during encodeString/decodeString.
ca
parents:
8136
diff
changeset
|
1507 |
|
22470 | 1508 |
^ self subclassResponsibility |
17664 | 1509 |
|
22470 | 1510 |
"Modified: / 16-01-2018 / 19:54:44 / stefan" |
1511 |
"Modified (comment): / 17-01-2018 / 13:54:44 / stefan" |
|
7892 | 1512 |
! ! |
1513 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1514 |
!CharacterEncoder methodsFor:'error handling'! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1515 |
|
24942 | 1516 |
decodesToUnicode |
1517 |
"answer true, if this encoder decodes data to unicode" |
|
1518 |
||
1519 |
^ self class nameOfDecodedCode == #unicode |
|
1520 |
||
1521 |
"Created: / 21-11-2019 / 18:42:51 / Stefan Vogel" |
|
1522 |
! |
|
1523 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1524 |
decodingError |
7904 | 1525 |
"report an error that there is no unicode-codePoint for a given codePoint in this encoding. |
1526 |
(which is unlikely) or that the encoding is undefined for that value |
|
20004 | 1527 |
(for example, holes in the ISO-8859-3 encoding)" |
7904 | 1528 |
|
7919 | 1529 |
|badCodePoint sender| |
1530 |
||
1531 |
sender := thisContext sender. |
|
1532 |
((sender selector == #encode:) or:[sender selector == #decode:]) ifFalse:[ |
|
11295 | 1533 |
badCodePoint := sender methodHome argAt:1 |
7919 | 1534 |
]. |
11295 | 1535 |
^ (DecodingError new) |
1536 |
defaultValue:(self defaultDecoderValue); |
|
1537 |
parameter:badCodePoint; |
|
1538 |
messageText:'invalid code'; |
|
1539 |
suspendedContext:sender; |
|
1540 |
raiseRequest. |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1541 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1542 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1543 |
defaultDecoderValue |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1544 |
"placed into a decoded string, in case there is no unicode codePoint |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1545 |
for a given encoded codePoint. |
7904 | 1546 |
(typically 16rFFFF)." |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1547 |
|
7904 | 1548 |
^ 16rFFFF |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1549 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1550 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1551 |
defaultEncoderValue |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1552 |
"placed into an encoded string, in case there is no codePoint |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1553 |
for a given unicode codePoint. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1554 |
(typically $?)." |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1555 |
|
8101
f7023a4735bf
Use the ANSI-blessed #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
8087
diff
changeset
|
1556 |
^ $? codePoint |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1557 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1558 |
|
7919 | 1559 |
encodingError |
1560 |
"report an error that some unicode-codePoint cannot be represented by this encoder" |
|
1561 |
||
1562 |
|badCodePoint sender| |
|
7904 | 1563 |
|
1564 |
sender := thisContext sender. |
|
25185 | 1565 |
((sender selector == #encode:) or:[sender selector == #decode:]) ifTrue:[ |
14209 | 1566 |
badCodePoint := sender methodHome argAt:1 |
7904 | 1567 |
]. |
8048 | 1568 |
^ (EncodingError new) |
14209 | 1569 |
defaultValue:(self defaultEncoderValue); |
1570 |
parameter:badCodePoint; |
|
1571 |
messageText:'unrepresentable code (some character cannot be represented)'; |
|
1572 |
suspendedContext:sender; |
|
1573 |
raiseRequest |
|
1574 |
||
1575 |
"Modified: / 12-07-2012 / 20:36:37 / cg" |
|
25185 | 1576 |
"Modified: / 10-01-2020 / 15:31:46 / Stefan Vogel" |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1577 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1578 |
|
7972 | 1579 |
!CharacterEncoder methodsFor:'printing'! |
1580 |
||
1581 |
printOn:aStream |
|
1582 |
aStream |
|
14209 | 1583 |
nextPutAll:(self nameOfDecodedCode); |
1584 |
nextPutAll:'->'; |
|
1585 |
nextPutAll:(self nameOfEncoding) |
|
7972 | 1586 |
! ! |
1587 |
||
7917 | 1588 |
!CharacterEncoder methodsFor:'queries'! |
1589 |
||
22426 | 1590 |
characterSize:charOrCodePoint |
11975 | 1591 |
"return the number of bytes required to encode codePoint" |
1592 |
||
1593 |
^ self subclassResponsibility |
|
1594 |
||
1595 |
"Created: / 15-06-2005 / 15:11:04 / janfrog" |
|
1596 |
! |
|
1597 |
||
17118 | 1598 |
isEncoderFor:encoding |
1599 |
"does this encode to encoding?" |
|
1600 |
||
1601 |
|encodingNameSymbol| |
|
1602 |
||
1603 |
encodingNameSymbol := encoding asLowercase. |
|
1604 |
encodingNameSymbol = #'iso10646-1' ifTrue:[ encodingNameSymbol := #unicode]. |
|
1605 |
||
1606 |
^ encodingNameSymbol = self nameOfEncoding |
|
1607 |
! |
|
1608 |
||
7917 | 1609 |
isNullEncoder |
1610 |
^ false |
|
7972 | 1611 |
! |
1612 |
||
1613 |
nameOfDecodedCode |
|
1614 |
"Most coders decode from their code into unicode / encode from unicode into their code. |
|
1615 |
There are a few exceptions to this, though - these must redefine this." |
|
1616 |
||
1617 |
^ self class nameOfDecodedCode |
|
1618 |
! |
|
1619 |
||
1620 |
nameOfEncoding |
|
1621 |
^ self class nameOfEncoding |
|
1622 |
! |
|
1623 |
||
1624 |
userFriendlyNameOfEncoding |
|
1625 |
^ self class userFriendlyNameOfEncoding |
|
7917 | 1626 |
! ! |
1627 |
||
11975 | 1628 |
!CharacterEncoder methodsFor:'stream support'! |
1629 |
||
22470 | 1630 |
encodeCharacter:aUnicodeCharacter on:aStream |
1631 |
"given a character in unicode, encode it onto aStream. |
|
1632 |
Subclasses can redefine this to avoid allocating many new string instances." |
|
1633 |
||
1634 |
aStream nextPutAll:(self encodeCharacter:aUnicodeCharacter). |
|
1635 |
||
1636 |
"Created: / 16-02-2017 / 16:18:33 / stefan" |
|
1637 |
"Modified: / 17-01-2018 / 14:00:28 / stefan" |
|
1638 |
! |
|
1639 |
||
1640 |
encodeString:aUnicodeString on:aStream |
|
1641 |
"given a string in unicode, encode it onto aStream. |
|
1642 |
Subclasses can redefine this to avoid allocating many new string instances. |
|
1643 |
(but must then also redefine encodeString:aUnicodeString to collect the characters)" |
|
1644 |
||
1645 |
aStream nextPutAll:(self encodeString:aUnicodeString). |
|
1646 |
! |
|
1647 |
||
1648 |
readNext:countArg charactersFrom:aStream |
|
1649 |
|writeStream count "{ Class:SmallInteger }"| |
|
1650 |
||
1651 |
count := countArg. |
|
1652 |
writeStream := CharacterWriteStream on:(String new:count). |
|
1653 |
count timesRepeat:[ |
|
1654 |
writeStream nextPut:(self readNextCharacterFrom:aStream). |
|
1655 |
]. |
|
1656 |
^ writeStream contents. |
|
1657 |
||
1658 |
"Created: / 16-01-2018 / 20:08:10 / stefan" |
|
1659 |
"Modified: / 17-01-2018 / 16:44:29 / stefan" |
|
11975 | 1660 |
! |
1661 |
||
1662 |
readNextCharacterFrom:aStream |
|
22470 | 1663 |
^ self subclassResponsibility |
12608 | 1664 |
|
1665 |
"Created: / 14-06-2005 / 17:03:21 / janfrog" |
|
1666 |
"Modified: / 15-06-2005 / 15:27:49 / janfrog" |
|
1667 |
"Modified: / 20-06-2005 / 13:13:52 / masca" |
|
22470 | 1668 |
"Modified: / 16-01-2018 / 20:12:07 / stefan" |
11975 | 1669 |
! ! |
1670 |
||
24474 | 1671 |
!CharacterEncoder methodsFor:'testing'! |
1672 |
||
1673 |
isUnicodeSubsetEncoder |
|
1674 |
"answer true, if this encodes a subset of Unicode, that is an 1-to-1 |
|
1675 |
mapping to unicode" |
|
1676 |
||
1677 |
^ false |
|
1678 |
||
1679 |
"Created: / 27-07-2019 / 14:51:28 / Stefan Vogel" |
|
1680 |
! |
|
1681 |
||
1682 |
isUtf16Encoder |
|
1683 |
"answer true, if this encodes from/to UTF-16 (regardless of byte-order)" |
|
1684 |
||
1685 |
^ false |
|
1686 |
||
1687 |
"Created: / 27-07-2019 / 14:44:52 / Stefan Vogel" |
|
25269 | 1688 |
! |
1689 |
||
1690 |
isUtfEncoder |
|
1691 |
"answer true, if this encodes from/to any UTF (regardless of how many bytes and byte-order). |
|
1692 |
In other words: does it make sense to prepend a BOM" |
|
1693 |
||
1694 |
^ false |
|
1695 |
||
1696 |
"Created: / 19-02-2020 / 16:17:20 / Stefan Reise" |
|
24474 | 1697 |
! ! |
1698 |
||
7915 | 1699 |
!CharacterEncoder::CompoundEncoder class methodsFor:'documentation'! |
7914 | 1700 |
|
1701 |
documentation |
|
1702 |
" |
|
1703 |
A compoundEncoder uses two real encoders; |
|
1704 |
to encode: |
|
14209 | 1705 |
string -> decoder(encode) -> encoder -> result |
7914 | 1706 |
to decode: |
14209 | 1707 |
string -> encoder -> decoder -> result |
7956 | 1708 |
|
1709 |
|e| |
|
1710 |
||
1711 |
e := CompoundEncoder new. |
|
1712 |
e encoder:ISO8859_5 decoder:KOI8_R. |
|
1713 |
e decode:16rB0. 'CYRILLIC CAPITAL LETTER A; 16rB0 in 8859-5; 16rE1 in KOI8-R'. |
|
1714 |
e encode:16rE1. |
|
7914 | 1715 |
" |
1716 |
! ! |
|
1717 |
||
7915 | 1718 |
!CharacterEncoder::CompoundEncoder methodsFor:'accessing'! |
7914 | 1719 |
|
1720 |
encoder:encoderArg decoder:decoderArg |
|
1721 |
"set instance variables (automatically generated)" |
|
1722 |
||
1723 |
decoder := decoderArg. |
|
1724 |
encoder := encoderArg. |
|
1725 |
! ! |
|
1726 |
||
7915 | 1727 |
!CharacterEncoder::CompoundEncoder methodsFor:'encoding & decoding'! |
7914 | 1728 |
|
22470 | 1729 |
decodeString:anEncodedStringOrByteCollection |
1730 |
^ decoder encodeString:(encoder decodeString:anEncodedStringOrByteCollection) |
|
1731 |
||
1732 |
"Modified (format): / 17-01-2018 / 13:44:08 / stefan" |
|
7956 | 1733 |
! |
1734 |
||
22470 | 1735 |
encodeString:anEncodedStringOrByteCollection |
1736 |
^ encoder encodeString:(decoder decodeString:anEncodedStringOrByteCollection) |
|
7956 | 1737 |
|
22470 | 1738 |
"Modified (format): / 17-01-2018 / 13:46:26 / stefan" |
7914 | 1739 |
! ! |
1740 |
||
7972 | 1741 |
!CharacterEncoder::CompoundEncoder methodsFor:'printing'! |
1742 |
||
1743 |
printOn:aStream |
|
1744 |
aStream |
|
14209 | 1745 |
nextPutAll:(decoder nameOfEncoding); |
1746 |
nextPutAll:'->'. |
|
7972 | 1747 |
"/ nextPutAll:(decoder nameOfDecodedCode); |
1748 |
"/ nextPutAll:'->'; |
|
1749 |
"/ nextPutAll:(encoder nameOfEncoding) |
|
1750 |
encoder printOn:aStream |
|
1751 |
! ! |
|
1752 |
||
22470 | 1753 |
!CharacterEncoder::CompoundEncoder methodsFor:'queries'! |
1754 |
||
1755 |
characterSize:aCharacterOrCodepoint |
|
1756 |
"return the number of bytes required to encode aCharacterOrCodepoint" |
|
1757 |
||
1758 |
^ encoder characterSize:(decoder decode:aCharacterOrCodepoint) |
|
1759 |
||
1760 |
"Created: / 16-01-2018 / 17:58:51 / stefan" |
|
1761 |
! ! |
|
1762 |
||
1763 |
!CharacterEncoder::CompoundEncoder methodsFor:'stream support'! |
|
1764 |
||
1765 |
readNext:count charactersFrom:aStream |
|
1766 |
^ decoder encodeString:(encoder readNext:count charactersFrom:aStream) asString |
|
1767 |
||
1768 |
"Created: / 16-01-2018 / 20:50:56 / stefan" |
|
1769 |
! |
|
1770 |
||
1771 |
readNextCharacterFrom:aStream |
|
1772 |
^ (decoder encodeString:(encoder readNextCharacterFrom:aStream) asString) first |
|
1773 |
||
1774 |
"Created: / 16-01-2018 / 21:10:28 / stefan" |
|
1775 |
! ! |
|
1776 |
||
1777 |
!CharacterEncoder::NullEncoder class methodsFor:'documentation'! |
|
7932 | 1778 |
|
1779 |
documentation |
|
1780 |
" |
|
22470 | 1781 |
A NullEncoder does nothing. |
7932 | 1782 |
" |
1783 |
! ! |
|
1784 |
||
22470 | 1785 |
!CharacterEncoder::NullEncoder methodsFor:'encoding & decoding'! |
1786 |
||
1787 |
decodeString:anEncodedStringOrByteCollection |
|
1788 |
^ anEncodedStringOrByteCollection asString |
|
1789 |
||
1790 |
"Modified: / 17-01-2018 / 13:43:42 / stefan" |
|
1791 |
! |
|
1792 |
||
1793 |
encodeString:aString |
|
1794 |
^ aString |
|
1795 |
! ! |
|
1796 |
||
1797 |
!CharacterEncoder::NullEncoder methodsFor:'queries'! |
|
1798 |
||
1799 |
characterSize:charOrCodePoint |
|
1800 |
"return the number of bytes required to encode aCharacterOrCodepoint" |
|
1801 |
||
1802 |
^ charOrCodePoint asCharacter bytesPerCharacter |
|
1803 |
||
1804 |
" |
|
1805 |
NullEncoder basicNew characterSize:$a codePoint |
|
1806 |
NullEncoder basicNew characterSize:16r3fe |
|
1807 |
NullEncoder basicNew characterSize:16r3ffe |
|
1808 |
" |
|
1809 |
||
1810 |
"Modified (comment): / 16-01-2018 / 21:15:01 / stefan" |
|
1811 |
! |
|
1812 |
||
1813 |
isNullEncoder |
|
1814 |
^ true |
|
1815 |
! ! |
|
1816 |
||
1817 |
!CharacterEncoder::NullEncoder methodsFor:'stream support'! |
|
1818 |
||
1819 |
readNext:count charactersFrom:aStream |
|
1820 |
^ (aStream next:count) asString |
|
1821 |
||
1822 |
"Created: / 16-01-2018 / 20:19:38 / stefan" |
|
1823 |
! |
|
1824 |
||
1825 |
readNextCharacterFrom:aStream |
|
22782 | 1826 |
|chOrNil| |
22470 | 1827 |
|
22782 | 1828 |
chOrNil := aStream next. |
1829 |
chOrNil notNil ifTrue:[ |
|
1830 |
^ chOrNil asCharacter |
|
1831 |
]. |
|
1832 |
^ nil. |
|
1833 |
||
22470 | 1834 |
"Created: / 16-01-2018 / 20:04:01 / stefan" |
1835 |
! ! |
|
1836 |
||
7915 | 1837 |
!CharacterEncoder::InverseEncoder class methodsFor:'documentation'! |
7914 | 1838 |
|
1839 |
documentation |
|
1840 |
" |
|
22470 | 1841 |
An InverseEncoder does the inverse - i.e. encode is really a decode |
7914 | 1842 |
and decode is really an encode. |
22470 | 1843 |
|
1844 |
InverseEncoder is always used to encode to unicode and decode from unicode |
|
1845 |
(see CharacterEncoder class >> #encoderToEncodeFrom:into:). |
|
7914 | 1846 |
" |
1847 |
! ! |
|
1848 |
||
7915 | 1849 |
!CharacterEncoder::InverseEncoder methodsFor:'accessing'! |
7914 | 1850 |
|
22584 | 1851 |
decoder:anEncoder |
1852 |
decoder := anEncoder. |
|
7914 | 1853 |
! ! |
1854 |
||
7915 | 1855 |
!CharacterEncoder::InverseEncoder methodsFor:'encoding & decoding'! |
7914 | 1856 |
|
22470 | 1857 |
decodeString:anEncodedStringOrByteCollection |
1858 |
^ decoder encodeString:anEncodedStringOrByteCollection |
|
1859 |
||
1860 |
"Modified (format): / 17-01-2018 / 13:43:57 / stefan" |
|
7914 | 1861 |
! |
1862 |
||
22470 | 1863 |
encodeString:anEncodedStringOrByteCollection |
1864 |
^ decoder decodeString:anEncodedStringOrByteCollection |
|
7914 | 1865 |
|
22470 | 1866 |
"Modified (format): / 17-01-2018 / 13:46:47 / stefan" |
7914 | 1867 |
! ! |
1868 |
||
7972 | 1869 |
!CharacterEncoder::InverseEncoder methodsFor:'printing'! |
1870 |
||
1871 |
printOn:aStream |
|
1872 |
aStream |
|
14209 | 1873 |
nextPutAll:(decoder nameOfEncoding); |
1874 |
nextPutAll:'->'; |
|
1875 |
nextPutAll:(decoder nameOfDecodedCode) |
|
7972 | 1876 |
! ! |
1877 |
||
12435
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1878 |
!CharacterEncoder::InverseEncoder methodsFor:'queries'! |
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1879 |
|
22426 | 1880 |
characterSize:charOrCodePoint |
21138 | 1881 |
"return the number of bytes required to encode codePoint" |
1882 |
||
22426 | 1883 |
^ decoder characterSize:charOrCodePoint |
12435
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1884 |
! ! |
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1885 |
|
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1886 |
!CharacterEncoder::InverseEncoder methodsFor:'stream support'! |
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1887 |
|
22470 | 1888 |
readNext:count charactersFrom:aStream |
1889 |
"decode the next count bytes or characters on aStream from unicode to something else" |
|
1890 |
||
1891 |
^ decoder encodeString:(aStream next:count). |
|
1892 |
||
1893 |
"Created: / 16-01-2018 / 20:53:42 / stefan" |
|
1894 |
"Modified (comment): / 17-01-2018 / 13:28:41 / stefan" |
|
1895 |
! |
|
1896 |
||
1897 |
readNextCharacterFrom:aStream |
|
1898 |
"decode the next byte or character on aStream from unicode to something else" |
|
1899 |
||
22584 | 1900 |
^ decoder encodeString:(String with:aStream next). |
22470 | 1901 |
|
1902 |
"Created: / 16-01-2018 / 21:08:11 / stefan" |
|
1903 |
"Modified: / 17-01-2018 / 13:29:59 / stefan" |
|
12435
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1904 |
! ! |
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
1905 |
|
22470 | 1906 |
!CharacterEncoder::DefaultEncoder class methodsFor:'documentation'! |
7914 | 1907 |
|
1908 |
documentation |
|
1909 |
" |
|
22470 | 1910 |
That is only a dummy for ST80 compatibility |
7914 | 1911 |
" |
1912 |
! ! |
|
1913 |
||
7915 | 1914 |
!CharacterEncoder::OtherEncoding class methodsFor:'private'! |
7892 | 1915 |
|
1916 |
flushCode |
|
22470 | 1917 |
"do nothing here" |
1918 |
||
1919 |
"Modified (comment): / 16-01-2018 / 17:08:17 / stefan" |
|
1920 |
! ! |
|
7892 | 1921 |
|
22470 | 1922 |
!CharacterEncoder::OtherEncoding class methodsFor:'testing'! |
1923 |
||
1924 |
isAbstract |
|
1925 |
^ self == CharacterEncoder::OtherEncoding |
|
1926 |
||
1927 |
"Created: / 17-01-2018 / 16:06:13 / stefan" |
|
1928 |
"Modified: / 17-01-2018 / 17:50:37 / stefan" |
|
7892 | 1929 |
! ! |
1930 |
||
7919 | 1931 |
!CharacterEncoder::TwoStepEncoder class methodsFor:'documentation'! |
1932 |
||
1933 |
documentation |
|
1934 |
" |
|
1935 |
A twoStepEncoder uses two real encoders; |
|
1936 |
to encode: |
|
14209 | 1937 |
string -> encoder1(encode) -> encoder2(encode) -> result |
7919 | 1938 |
to decode: |
14209 | 1939 |
string -> encoder2(decode) -> encoder1(decode) -> result |
7919 | 1940 |
" |
1941 |
! ! |
|
1942 |
||
1943 |
!CharacterEncoder::TwoStepEncoder methodsFor:'accessing'! |
|
1944 |
||
1945 |
encoder1:encoder1Arg encoder2:encoder2Arg |
|
1946 |
"set instance variables (automatically generated)" |
|
1947 |
||
1948 |
encoder1 := encoder1Arg. |
|
1949 |
encoder2 := encoder2Arg. |
|
1950 |
! ! |
|
1951 |
||
1952 |
!CharacterEncoder::TwoStepEncoder methodsFor:'encoding & decoding'! |
|
1953 |
||
22470 | 1954 |
decodeString:anEncodedStringOrByteCollection |
1955 |
^ encoder1 decodeString:(encoder2 decodeString:anEncodedStringOrByteCollection) |
|
7919 | 1956 |
|
22470 | 1957 |
"Modified (format): / 17-01-2018 / 13:45:20 / stefan" |
7919 | 1958 |
! |
1959 |
||
1960 |
encodeString:aString |
|
1961 |
^ encoder2 encodeString:(encoder1 encodeString:aString) |
|
1962 |
! ! |
|
1963 |
||
7972 | 1964 |
!CharacterEncoder::TwoStepEncoder methodsFor:'printing'! |
1965 |
||
1966 |
printOn:aStream |
|
1967 |
aStream |
|
14209 | 1968 |
nextPutAll:(encoder1 nameOfDecodedCode); |
1969 |
nextPutAll:'->'; |
|
1970 |
nextPutAll:(encoder1 nameOfEncoding); |
|
1971 |
nextPutAll:'->'; |
|
1972 |
nextPutAll:(encoder2 nameOfEncoding) |
|
7972 | 1973 |
! ! |
1974 |
||
11300 | 1975 |
!CharacterEncoder::TwoStepEncoder methodsFor:'queries'! |
1976 |
||
22426 | 1977 |
characterSize:charOrCodePoint |
21138 | 1978 |
"return the number of bytes required to encode codePoint" |
1979 |
||
14523
91746a24d5ad
characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents:
14209
diff
changeset
|
1980 |
"/ naive; actually, we have to do a real encoding to get this info proper |
22426 | 1981 |
^ (encoder2 characterSize:charOrCodePoint) |
14523
91746a24d5ad
characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents:
14209
diff
changeset
|
1982 |
|
91746a24d5ad
characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents:
14209
diff
changeset
|
1983 |
"Created: / 22-11-2012 / 13:07:47 / cg" |
91746a24d5ad
characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents:
14209
diff
changeset
|
1984 |
! |
91746a24d5ad
characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents:
14209
diff
changeset
|
1985 |
|
11300 | 1986 |
nameOfEncoding |
1987 |
^ "encoder1 nameOfEncoding , '-' ," encoder2 nameOfEncoding |
|
1988 |
||
1989 |
||
1990 |
||
1991 |
||
1992 |
||
1993 |
||
1994 |
||
1995 |
||
1996 |
||
1997 |
||
1998 |
||
1999 |
||
2000 |
||
2001 |
||
2002 |
||
2003 |
||
2004 |
! ! |
|
2005 |
||
22470 | 2006 |
!CharacterEncoder::TwoStepEncoder methodsFor:'stream support'! |
2007 |
||
2008 |
readNext:count charactersFrom:aStream |
|
2009 |
^ encoder1 decodeString:(encoder2 readNext:count charactersFrom:aStream) |
|
2010 |
||
2011 |
"Created: / 16-01-2018 / 20:47:52 / stefan" |
|
2012 |
! |
|
2013 |
||
2014 |
readNextCharacterFrom:aStream |
|
2015 |
^ (encoder1 decodeString:(encoder2 readNextCharacterFrom:aStream) asString) first |
|
2016 |
||
2017 |
"Created: / 16-01-2018 / 21:06:48 / stefan" |
|
2018 |
! ! |
|
2019 |
||
7892 | 2020 |
!CharacterEncoder class methodsFor:'documentation'! |
2021 |
||
2022 |
version |
|
18624 | 2023 |
^ '$Header$' |
12435
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
2024 |
! |
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
2025 |
|
539c24148e90
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11975
diff
changeset
|
2026 |
version_CVS |
18624 | 2027 |
^ '$Header$' |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
2028 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
2029 |
|
14777
a669080229da
add user friendly name to semaphores
Stefan Vogel <sv@exept.de>
parents:
14559
diff
changeset
|
2030 |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
2031 |
CharacterEncoder initialize! |