author | Claus Gittinger <cg@exept.de> |
Tue, 09 Mar 2004 01:08:33 +0100 | |
changeset 8127 | 7531ed2cdf35 |
parent 8126 | 33f9c4850e84 |
child 8128 | 4f4195d0a720 |
permissions | -rw-r--r-- |
8048 | 1 |
" |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
2 |
COPYRIGHT (c) 2004 by eXept Software AG |
7932 | 3 |
All Rights Reserved |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
4 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
5 |
This software is furnished under a license and may be used |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
6 |
only in accordance with the terms of that license and with the |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
7 |
inclusion of the above copyright notice. This software may not |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
8 |
be provided or otherwise made available to, or used by, any |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
9 |
other person. No title to or ownership of the software is |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
10 |
hereby transferred. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
11 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
12 |
|
8114
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
13 |
"{ Package: 'stx:libbasic' }" |
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
14 |
|
8118 | 15 |
Object subclass:#CharacterEncoder |
7914 | 16 |
instanceVariableNames:'' |
8118 | 17 |
classVariableNames:'EncoderClassesByName EncodersByName CachedEncoders LastEncoder |
8122 | 18 |
AccessLock NullEncoderInstance Jis7KanjiEscapeSequence |
19 |
Jis7RomanEscapeSequence JisISO2022EscapeSequence |
|
20 |
Jis7KanjiOldEscapeSequence' |
|
7915 | 21 |
poolDictionaries:'' |
8114
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
22 |
category:'Collections-Text-Encodings' |
7969 | 23 |
! |
24 |
||
7914 | 25 |
CharacterEncoder subclass:#CompoundEncoder |
26 |
instanceVariableNames:'decoder encoder' |
|
27 |
classVariableNames:'' |
|
28 |
poolDictionaries:'' |
|
7915 | 29 |
privateIn:CharacterEncoder |
30 |
! |
|
31 |
||
7932 | 32 |
CharacterEncoder subclass:#DefaultEncoder |
33 |
instanceVariableNames:'' |
|
34 |
classVariableNames:'' |
|
35 |
poolDictionaries:'' |
|
36 |
privateIn:CharacterEncoder |
|
37 |
! |
|
38 |
||
7914 | 39 |
CharacterEncoder subclass:#InverseEncoder |
40 |
instanceVariableNames:'decoder' |
|
41 |
classVariableNames:'' |
|
42 |
poolDictionaries:'' |
|
7915 | 43 |
privateIn:CharacterEncoder |
44 |
! |
|
45 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
46 |
CharacterEncoder subclass:#NullEncoder |
7893 | 47 |
instanceVariableNames:'' |
48 |
classVariableNames:'' |
|
49 |
poolDictionaries:'' |
|
7915 | 50 |
privateIn:CharacterEncoder |
51 |
! |
|
52 |
||
7892 | 53 |
CharacterEncoder subclass:#OtherEncoding |
54 |
instanceVariableNames:'' |
|
55 |
classVariableNames:'' |
|
56 |
poolDictionaries:'' |
|
7915 | 57 |
privateIn:CharacterEncoder |
58 |
! |
|
59 |
||
7919 | 60 |
CharacterEncoder subclass:#TwoStepEncoder |
61 |
instanceVariableNames:'encoder1 encoder2' |
|
62 |
classVariableNames:'' |
|
63 |
poolDictionaries:'' |
|
64 |
privateIn:CharacterEncoder |
|
65 |
! |
|
66 |
||
7893 | 67 |
!CharacterEncoder class methodsFor:'documentation'! |
68 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
69 |
copyright |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
70 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
71 |
COPYRIGHT (c) 2004 by eXept Software AG |
7932 | 72 |
All Rights Reserved |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
73 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
74 |
This software is furnished under a license and may be used |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
75 |
only in accordance with the terms of that license and with the |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
76 |
inclusion of the above copyright notice. This software may not |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
77 |
be provided or otherwise made available to, or used by, any |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
78 |
other person. No title to or ownership of the software is |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
79 |
hereby transferred. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
80 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
81 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
82 |
|
7893 | 83 |
documentation |
84 |
" |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
85 |
unfinished code - please read howToAddMoreCoders. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
86 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
87 |
Character mappings are based on information in character maps found at either: |
7932 | 88 |
http://std.dkuug.dk/i18n/charmaps |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
89 |
or: |
7932 | 90 |
http://www.unicode.org/Public/MAPPINGS |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
91 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
92 |
No Warranty. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
93 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
94 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
95 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
96 |
examples |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
97 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
98 |
|s1 s2| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
99 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
100 |
s1 := 'hello'. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
101 |
s2 := CharacterEncoder encode:s1 from:#'iso8859-1' into:#'unicode'. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
102 |
s2 |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
103 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
104 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
105 |
|s1 s2| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
106 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
107 |
s1 := 'hello'. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
108 |
s2 := CharacterEncoder encode:s1 from:#'iso8859-1' into:#'iso8859-7'. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
109 |
s2 |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
110 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
111 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
112 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
113 |
howToAddMoreCoders |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
114 |
" |
7971 | 115 |
Coders can be hand-written or automagically generated via a mapping table. |
7932 | 116 |
Examples for hand-written coders are UTF8_to_ISO10464 or JIS0208_to_JIS7. |
117 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
118 |
The table driven encode/decode methods can be generated from a character mapping document |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
119 |
as found on the unicode consortium host |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
120 |
(for example: 'http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT') |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
121 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
122 |
or from the i18n character maps: |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
123 |
(for example: 'http://std.dkuug.dk/i18n/charmaps/ISO-8859-1 |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
124 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
125 |
In order to add another coder (for example: for finish EBCDIC or ms-codePage 278), |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
126 |
perform the following steps: |
8114
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
127 |
- create a private subclass of CharacterEncoder named (for example) CP267. |
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
128 |
|
05274a80fcc4
separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents:
8105
diff
changeset
|
129 |
- create a public subclass of CharacterEncoderImplementations::CharacterEncoderImplementation named (for example) CharacterEncoderImplementations::CP267. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
130 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
131 |
- define the mappingURL1_relativeName (if the table is found on 'www.unicode.org') |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
132 |
or the mappingURL2_relativeName (if it is found on 'std.dkuug.dk') method, which |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
133 |
should return the name of the tables file, relative to the top directory there |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
134 |
(which is '.../Public/MAPPINGS' on www.unicode.org and '.../i18n/charmaops' on 'std.dkuug.dk'. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
135 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
136 |
In this example, the table from 'std.dkuug.dk' is used, and named 'EBCDIC-CP-FI' there. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
137 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
138 |
- generate code by evaluating: |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
139 |
CharacterEncoder::CP267 generateCode |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
140 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
141 |
Thats all !! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
142 |
|
7909 | 143 |
|
144 |
The existing code was generated by: |
|
145 |
||
146 |
CharacterEncoder::SingleByteEncoder subclassesDo:[:cls | Transcript showCR:cls name. cls flushCode; generateCode ] |
|
147 |
CharacterEncoder::SingleByteEncoder subclassesDo:[:cls | cls allSubclassesDo:[:sub | Transcript showCR:sub name. sub flushCode; generateSubclassCode]] |
|
148 |
||
149 |
or individually: |
|
150 |
CharacterEncoder::ASCII flushCode; generateCode. |
|
151 |
CharacterEncoder::ISO8859_1 flushCode; generateCode. |
|
152 |
CharacterEncoder::ISO8859_2 flushCode; generateCode. |
|
153 |
CharacterEncoder::ISO8859_3 flushCode; generateCode. |
|
154 |
CharacterEncoder::ISO8859_4 flushCode; generateCode. |
|
155 |
CharacterEncoder::ISO8859_5 flushCode; generateCode. |
|
156 |
CharacterEncoder::ISO8859_6 flushCode; generateCode. |
|
157 |
CharacterEncoder::ISO8859_7 flushCode; generateCode. |
|
158 |
CharacterEncoder::ISO8859_8 flushCode; generateCode. |
|
159 |
CharacterEncoder::ISO8859_9 flushCode; generateCode. |
|
160 |
CharacterEncoder::ISO8859_10 flushCode; generateCode. |
|
161 |
CharacterEncoder::ISO8859_11 flushCode; generateCode. |
|
162 |
CharacterEncoder::ISO8859_13 flushCode; generateCode. |
|
163 |
CharacterEncoder::ISO8859_14 flushCode; generateCode. |
|
164 |
CharacterEncoder::ISO8859_15 flushCode; generateCode. |
|
165 |
CharacterEncoder::ISO8859_16 flushCode; generateCode. |
|
166 |
CharacterEncoder::KOI8_R flushCode; generateCode. |
|
167 |
CharacterEncoder::GSM0338 flushCode; generateCode. |
|
168 |
||
169 |
CharacterEncoder::KOI8_U flushCode; generateSubclassCode. |
|
7912 | 170 |
|
171 |
CharacterEncoder::JIS0208 flushCode; generateCode. |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
172 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
173 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
174 |
|
7971 | 175 |
!CharacterEncoder class methodsFor:'instance creation'! |
176 |
||
177 |
encoderFor:encodingNameSymbol |
|
178 |
"given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
179 |
||
180 |
^ self |
|
181 |
encoderFor:encodingNameSymbol |
|
182 |
ifAbsent:[ |
|
183 |
self error:'no encoder for ' , encodingNameSymbol mayProceed:true. |
|
8118 | 184 |
NullEncoderInstance |
7971 | 185 |
] |
186 |
||
187 |
" |
|
188 |
CharacterEncoder encoderFor:#'latin1' |
|
189 |
self encoderFor:#'arabic' |
|
190 |
self encoderFor:#'ms-arabic' |
|
191 |
self encoderFor:#'iso8859-5' |
|
192 |
self encoderFor:#'koi8-r' |
|
193 |
self encoderFor:#'koi8-u' |
|
194 |
self encoderFor:#'jis0208' |
|
195 |
self encoderFor:#'jis7' |
|
8087
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
196 |
self encoderFor:#'utf8' |
7971 | 197 |
" |
198 |
! |
|
199 |
||
200 |
encoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
201 |
"given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
202 |
||
8118 | 203 |
|enc cls lcName name unicodeEncoders unicodeEncoderClasses| |
204 |
||
205 |
encodingNameSymbol isNil ifTrue:[ ^ NullEncoderInstance]. |
|
7972 | 206 |
|
8118 | 207 |
lcName := encodingNameSymbol asLowercase. |
208 |
name := lcName asSymbolIfInterned. |
|
209 |
name isNil ifTrue:[name := lcName]. |
|
8052 | 210 |
|
8118 | 211 |
name includesMatchCharacters ifTrue:[ |
212 |
unicodeEncoders := EncodersByName at:#unicode ifAbsent:nil. |
|
213 |
unicodeEncoders notNil ifTrue:[ |
|
214 |
unicodeEncoders keysAndValuesDo:[:eachEncodingAlias :eachEncoderInstance | |
|
215 |
(name matches:eachEncodingAlias) ifTrue:[ |
|
216 |
^ eachEncoderInstance. |
|
217 |
]. |
|
218 |
]. |
|
219 |
]. |
|
220 |
||
221 |
unicodeEncoderClasses := EncoderClassesByName at:#unicode. |
|
8127 | 222 |
unicodeEncoderClasses notNil ifTrue:[ |
223 |
unicodeEncoderClasses keysandvaluesdo:[:eachencodingalias :eachencoderclass | |
|
224 |
(name matches:eachencodingalias) iftrue:[ |
|
225 |
^ eachencoderclass new. |
|
226 |
]. |
|
7974 | 227 |
]. |
228 |
]. |
|
229 |
^ exceptionValue value |
|
7972 | 230 |
]. |
7971 | 231 |
|
8118 | 232 |
AccessLock critical:[ |
233 |
unicodeEncoders := EncodersByName at:#unicode ifAbsent:nil. |
|
234 |
unicodeEncoders isNil ifTrue:[ |
|
235 |
EncodersByName at:#unicode put:(unicodeEncoders := Dictionary new). |
|
236 |
]. |
|
237 |
enc := unicodeEncoders at:name ifAbsent:nil. |
|
7971 | 238 |
|
8118 | 239 |
enc isNil ifTrue:[ |
240 |
unicodeEncoderClasses := EncoderClassesByName at:#unicode ifAbsent:nil. |
|
8120 | 241 |
unicodeEncoderClasses isNil ifTrue:[ |
242 |
EncoderClassesByName at:#unicode put:(unicodeEncoderClasses := Dictionary new). |
|
243 |
]. |
|
8118 | 244 |
cls := unicodeEncoderClasses at:name ifAbsent:nil. |
245 |
cls notNil ifTrue:[ |
|
246 |
enc := cls new. |
|
247 |
unicodeEncoders at:name put:enc. |
|
248 |
]. |
|
249 |
]. |
|
7973 | 250 |
]. |
8118 | 251 |
enc notNil ifTrue:[ |
7973 | 252 |
^ enc |
253 |
]. |
|
7971 | 254 |
|
8118 | 255 |
"/ no direct encoder from unicode->name |
256 |
"/ search for unicode->any and: any->name |
|
257 |
unicodeEncoderClasses := EncoderClassesByName at:#unicode ifAbsent:nil. |
|
258 |
unicodeEncoderClasses keysAndValuesDo:[:eachEncodingAlias :eachEncoderClass | |
|
259 |
|dict2| |
|
260 |
||
261 |
dict2 := EncoderClassesByName at:eachEncodingAlias ifAbsent:nil. |
|
262 |
dict2 notNil ifTrue:[ |
|
263 |
cls := dict2 at:name ifAbsent:nil. |
|
264 |
cls notNil ifTrue:[ |
|
265 |
enc := TwoStepEncoder new |
|
266 |
encoder1:(self encoderFor:eachEncodingAlias) |
|
267 |
encoder2:(cls new). |
|
268 |
||
7973 | 269 |
AccessLock critical:[ |
8118 | 270 |
unicodeEncoders at:name put:enc. |
7973 | 271 |
]. |
7972 | 272 |
^ enc. |
7971 | 273 |
] |
274 |
]. |
|
275 |
]. |
|
276 |
||
277 |
^ exceptionValue value |
|
278 |
||
279 |
" |
|
280 |
CharacterEncoder encoderFor:#'latin1' |
|
7972 | 281 |
self encoderFor:#'arabic' |
282 |
self encoderFor:#'ms-arabic' |
|
283 |
self encoderFor:#'iso8859-5' |
|
7971 | 284 |
self encoderFor:#'koi8-r' |
285 |
self encoderFor:#'koi8-u' |
|
286 |
self encoderFor:#'jis0208' |
|
287 |
self encoderFor:#'jis7' |
|
7972 | 288 |
self encoderFor:#'unicode' |
7971 | 289 |
" |
290 |
! |
|
291 |
||
292 |
encoderToEncodeFrom:oldEncodingArg into:newEncodingArg |
|
8118 | 293 |
|oldEncoding newEncoding encoders encoderClasses encoder decoder cls| |
294 |
||
7971 | 295 |
oldEncoding := oldEncodingArg ? #'unicode'. |
7972 | 296 |
oldEncoding == #'iso10646-1' ifTrue:[ oldEncoding := #'unicode']. |
7971 | 297 |
newEncoding := newEncodingArg ? #'unicode'. |
7972 | 298 |
newEncoding == #'iso10646-1' ifTrue:[ newEncoding := #'unicode']. |
299 |
||
8120 | 300 |
oldEncoding isSymbol ifFalse:[self halt:'symbol argument expected'. oldEncoding := oldEncoding asSymbol]. |
301 |
newEncoding isSymbol ifFalse:[self halt:'symbol argument expected'. newEncoding := newEncoding asSymbol]. |
|
302 |
||
8118 | 303 |
oldEncoding == newEncoding ifTrue:[^ NullEncoderInstance]. |
304 |
(oldEncoding match:newEncoding) ifTrue:[^ NullEncoderInstance]. |
|
7971 | 305 |
|
8122 | 306 |
(oldEncoding == #unicode) ifTrue:[ |
307 |
"/ something -> unicode |
|
308 |
^ self encoderFor:newEncoding. |
|
309 |
]. |
|
310 |
||
8118 | 311 |
AccessLock critical:[ |
312 |
encoders := EncodersByName at:oldEncoding ifAbsent:nil. |
|
313 |
encoders isNil ifTrue:[ |
|
314 |
EncodersByName at:oldEncoding put:(encoders := Dictionary new). |
|
315 |
]. |
|
316 |
encoder := encoders at:newEncodingArg ifAbsent:nil. |
|
317 |
encoder isNil ifTrue:[ |
|
318 |
encoderClasses := EncoderClassesByName at:oldEncoding ifAbsent:nil. |
|
319 |
encoderClasses isNil ifTrue:[ |
|
8122 | 320 |
EncoderClassesByName at:oldEncoding put:(encoderClasses := Dictionary new). |
8118 | 321 |
]. |
8119 | 322 |
cls := encoderClasses at:newEncoding ifAbsent:nil. |
8118 | 323 |
cls notNil ifTrue:[ |
324 |
encoder := cls new. |
|
325 |
]. |
|
326 |
]. |
|
7971 | 327 |
]. |
328 |
||
8118 | 329 |
encoder isNil ifTrue:[ |
330 |
(newEncoding == #unicode) ifTrue:[ |
|
331 |
"/ something -> unicode |
|
332 |
decoder := self encoderFor:oldEncoding. |
|
333 |
encoder := InverseEncoder new decoder:decoder. |
|
334 |
] ifFalse:[ |
|
335 |
"/ do it as: oldEncoding -> unicode -> newEncoding |
|
7972 | 336 |
|
8118 | 337 |
"/ something -> unicode |
338 |
decoder := self encoderFor:oldEncoding. |
|
7972 | 339 |
|
8118 | 340 |
"/ unicode -> something |
341 |
encoder := self encoderFor:newEncoding. |
|
342 |
encoder := CompoundEncoder new encoder:encoder decoder:decoder. |
|
7971 | 343 |
]. |
344 |
]. |
|
345 |
||
8118 | 346 |
AccessLock critical:[ |
347 |
(EncodersByName at:oldEncoding) at:newEncoding put:encoder |
|
348 |
]. |
|
349 |
^ encoder |
|
7971 | 350 |
|
8118 | 351 |
" CharacterEncoder initialize |
7972 | 352 |
CharacterEncoder encoderToEncodeFrom:#'latin1' into:#'jis7' |
8118 | 353 |
CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'mac-cyrillic' |
8087
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
354 |
CharacterEncoder encoderToEncodeFrom:#'ms-arabic' into:#'mac-arabic' |
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
355 |
CharacterEncoder encoderToEncodeFrom:#'iso8859-5' into:#'koi8-r' |
0a2ee76bcf55
last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents:
8062
diff
changeset
|
356 |
CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'koi8-u' |
7971 | 357 |
" |
358 |
! ! |
|
359 |
||
7932 | 360 |
!CharacterEncoder class methodsFor:'Compatibility-ST80'! |
361 |
||
362 |
encoderNamed: encoderName |
|
363 |
"/ q & d hack |
|
364 |
||
365 |
encoderName == #default ifTrue:[ |
|
366 |
^ DefaultEncoder new |
|
367 |
]. |
|
368 |
self halt. |
|
369 |
^ self new |
|
370 |
! |
|
371 |
||
372 |
platformName |
|
373 |
^ OperatingSystem platformName |
|
374 |
||
375 |
"Created: 20.6.1997 / 17:34:03 / cg" |
|
376 |
"Modified: 20.6.1997 / 17:38:40 / cg" |
|
377 |
! ! |
|
378 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
379 |
!CharacterEncoder class methodsFor:'class initialization'! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
380 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
381 |
initialize |
7973 | 382 |
AccessLock := Semaphore forMutualExclusion. |
8118 | 383 |
NullEncoderInstance := NullEncoder new. |
7973 | 384 |
|
8126 | 385 |
EncodersByName := Dictionary new. |
386 |
EncoderClassesByName := Dictionary new. |
|
387 |
CachedEncoders := Dictionary new. |
|
7972 | 388 |
|
8118 | 389 |
"/ class decoded-name array-of-encodingNames |
390 |
#( |
|
391 |
(ASCII unicode ( ascii 'us-ascii' 'iso-ir-6' 'ibm-367' 'ms-cp367' 'cp367' 'iso646-us' 'ibm-cp367' )) |
|
392 |
||
393 |
(BIG5 unicode ( big5 )) |
|
394 |
||
395 |
(CNS11643 unicode ( 'cns11643' )) |
|
396 |
||
397 |
(CP437 unicode ( 'cp437' 'cp-437' 'ibm-437' 'ms-cp437' 'microsoft-cp437' 'ibm-cp437' )) |
|
398 |
||
399 |
(GB2313_1980 unicode ( 'gb2313' 'gb2313-1980' )) |
|
400 |
||
401 |
(HANGUL unicode ( 'hangul' )) |
|
402 |
||
403 |
(ISO10646_1 unicode ( unicode 'iso10646_1' 'iso10646-1' 'iso-10646-1' )) |
|
404 |
||
405 |
(ISO10646_to_UTF8 unicode ( utf8 'utf-8' )) |
|
406 |
||
407 |
(ISO8859_1 unicode ( 'iso8859_1' 'iso8859-1' 'iso-8859-1' 'latin-1' 'latin1' 'iso-ir-100' 'ibm-819' 'ms-cp819' 'ibm-cp819' )) |
|
408 |
||
409 |
(ISO8859_2 unicode ( 'iso8859_2' 'iso8859-2' 'iso-8859-2' 'latin2' 'latin-2' 'iso-ir-101')) |
|
410 |
||
411 |
(ISO8859_3 unicode ( 'iso8859_3' 'iso8859-3' 'iso-8859-3' 'latin3' 'latin-3' 'iso-ir-109')) |
|
412 |
||
413 |
(ISO8859_4 unicode ( 'iso8859_4' 'iso8859-4' 'iso-8859-4' 'latin4' 'latin-4' 'iso-ir-110')) |
|
414 |
||
415 |
(ISO8859_5 unicode ( 'iso8859_5' 'iso8859-5' 'iso-8859-5' 'cyrillic' 'iso-ir-144' )) |
|
416 |
||
417 |
(ISO8859_6 unicode ( 'iso8859_6' 'iso8859-6' 'iso-8859-6' 'arabic' 'asmo-708' 'ecma-114' 'iso-ir-127' )) |
|
418 |
||
419 |
(ISO8859_7 unicode ( 'iso8859_7' 'iso8859-7' 'iso-8859-7' 'greek' 'iso-ir-126' 'ecma-118')) |
|
420 |
||
421 |
(ISO8859_8 unicode ( 'iso8859_8' 'iso8859-8' 'iso-8859-8' 'hebrew' 'iso-ir-138' )) |
|
422 |
||
423 |
(ISO8859_9 unicode ( 'iso8859_9' 'iso8859-9' 'iso-8859-9' 'latin5' 'latin-5' 'iso-ir-148')) |
|
424 |
||
425 |
(ISO8859_10 unicode ( 'iso8859_10' 'iso8859-10' 'iso-8859-10' 'latin6' 'latin-6' 'iso-ir-157')) |
|
426 |
||
427 |
(ISO8859_11 unicode ( 'iso8859_11' 'iso8859-11' 'iso-8859-11' 'thai' )) |
|
428 |
||
429 |
(ISO8859_13 unicode ( 'iso8859_13' 'iso8859-13' 'iso-8859-13' 'latin7' 'latin-7' )) |
|
430 |
||
431 |
(ISO8859_14 unicode ( 'iso8859_14' 'iso8859-14' 'iso-8859-14' 'latin8' 'latin-8' 'latin-celtic' )) |
|
432 |
||
433 |
(ISO8859_15 unicode ( 'iso8859_15' 'iso8859-15' 'iso-8859-15' 'latin9' 'latin-9' 'iso-ir-203')) |
|
434 |
||
435 |
(ISO8859_16 unicode ( 'iso8859_16' 'iso8859-16' 'iso-8859-16' 'latin10' 'latin-10' )) |
|
436 |
||
437 |
(JIS0201 unicode ( 'jis0201' #'jisx0201.1976-0')) |
|
438 |
||
439 |
(JIS0208 unicode ( jis0208 'jisx0208' 'jisx0208.1983-0' 'jisx0208.1990-0')) |
|
440 |
||
441 |
(JIS0208_to_JIS7 jis0208 ( jis7 'jis-7' 'x-jis7' 'x-iso2022-jp' 'iso2022-jp')) |
|
442 |
||
8122 | 443 |
(JIS0208_to_EUC jis0208 ( euc #'x-euc-jp' )) |
444 |
||
8118 | 445 |
(JIS0212 unicode ( 'jis0212' )) |
446 |
||
447 |
(JOHAB unicode ( 'johab' )) |
|
448 |
||
449 |
(KOI7 unicode ( 'koi7' )) |
|
450 |
||
451 |
(KOI8_R unicode ( #'koi8-r' 'cp878' )) |
|
452 |
||
453 |
(KOI8_U unicode ( #'koi8-u' )) |
|
454 |
||
455 |
(KSC5601 unicode ( #'ksc5601' )) |
|
456 |
||
457 |
(MAC_Arabic unicode ( #'mac-arabic' 'macarabic' )) |
|
458 |
||
459 |
(MAC_CentralEuropean unicode ( #'mac-centraleuropean' #'mac-centraleurope' 'maccentraleurope' 'maccentraleuropean' )) |
|
460 |
||
461 |
(MAC_Croatian unicode ( #'mac-croatian' 'maccroatian')) |
|
462 |
||
463 |
(MAC_Cyrillic unicode ( #'mac-cyrillic' 'maccyrillic' )) |
|
464 |
||
465 |
(MAC_Dingbats unicode ( #'mac-dingbats' 'macdingbats' 'macdingbat')) |
|
466 |
||
467 |
(MAC_Farsi unicode ( #'mac-farsi' 'macfarsi' )) |
|
468 |
||
469 |
(MAC_Greek unicode ( #'mac-greek' #'macgreek' )) |
|
470 |
||
471 |
(MAC_Hebrew unicode ( #'mac-hebrew' #'machebrew' )) |
|
472 |
||
473 |
(MAC_Iceland unicode ( #'mac-iceland' #'maciceland' )) |
|
474 |
||
475 |
(MAC_Japanese unicode ( #'mac-japanese' #'macjapanese' )) |
|
476 |
||
477 |
(MAC_Korean unicode ( #'mac-korean' #'mackorean' )) |
|
478 |
||
479 |
(MAC_Roman unicode ( #'mac-roman' #'macroman' )) |
|
480 |
||
481 |
(MAC_Romanian unicode ( #'mac-romanian' #'macromanian' )) |
|
482 |
||
483 |
(MAC_Symbol unicode ( #'mac-symbol' #'macsymbol' )) |
|
484 |
||
485 |
(MAC_Thai unicode ( #'mac-thai' #'macthai' )) |
|
486 |
||
487 |
(MAC_Turkish unicode ( #'mac-turkish' #'macturkish' )) |
|
488 |
||
489 |
(MS_Ansi unicode ( #'ms-ansi' 'ms-cp1252' 'microsoft-cp1252' 'cp1252' 'microsoft-ansi' 'windows-1252' 'windows-latin1')) |
|
490 |
||
491 |
(MS_Arabic unicode ( 'ms-arabic' 'ms-cp1256' 'microsoft-cp1256' 'cp1256' 'microsoft-arabic' 'windows-1256' )) |
|
492 |
||
493 |
(MS_Baltic unicode ( 'ms-baltic' 'ms-cp1257' 'microsoft-cp1257' 'cp1257' 'microsoft-baltic' 'windows-1257' )) |
|
494 |
||
495 |
(MS_Cyrillic unicode ( 'ms-cyrillic' 'ms-cp1251' 'microsoft-cp1251' 'cp1251' 'microsoft-cyrillic' 'windows-1251' )) |
|
496 |
||
497 |
(MS_EastEuropean unicode ( 'ms-easteuropean' 'ms-ee' 'cp1250' 'ms-cp1250' 'microsoft-cp1250' 'microsoft-easteuropean' 'windows-1250' )) |
|
498 |
||
499 |
(MS_Greek unicode ( 'ms-greek' 'ms-cp1253' 'microsoft-cp1253' 'cp1253' 'microsoft-greek' 'windows-1253' )) |
|
500 |
||
501 |
(MS_Hebrew unicode ( 'ms-hebrew' 'ms-cp1255' 'microsoft-cp1255' 'cp1255' 'microsoft-hebrew' 'windows-1255' )) |
|
502 |
||
503 |
"/ (MS_Symbol unicode ( 'ms-symbol' 'microsoft-symbol' )) |
|
504 |
||
505 |
(MS_Turkish unicode ( 'ms-turkish' 'ms-cp1254' 'microsoft-cp1254' 'cp1254' 'microsoft-turkish' 'windows-1254' )) |
|
506 |
||
507 |
(NEXT unicode ( 'next' 'nextstep' )) |
|
508 |
||
8122 | 509 |
(SJIS unicode ( 'sjis' 'shiftjis' 'x-sjis' #'x-shift-jis' #'shift-jis')) |
8118 | 510 |
) triplesDo:[:className :decodesTo :encodesTo | |
511 |
|implClass dict| |
|
512 |
||
513 |
implClass := (Smalltalk at:#CharacterEncoderImplementations) at:className. |
|
514 |
implClass isNil ifTrue:[ |
|
515 |
self halt:'missing encoder-class' |
|
516 |
] ifFalse:[ |
|
517 |
dict := EncoderClassesByName at:decodesTo ifAbsent:nil. |
|
518 |
dict isNil ifTrue:[ |
|
519 |
EncoderClassesByName at:decodesTo put:(dict := Dictionary new). |
|
520 |
]. |
|
521 |
encodesTo do:[:eachEncodingAlias | |
|
522 |
(dict includesKey:eachEncodingAlias) ifTrue:[ |
|
523 |
self halt:'conflicting alias' |
|
524 |
]. |
|
525 |
dict at:eachEncodingAlias put:implClass. |
|
526 |
] |
|
527 |
]. |
|
528 |
]. |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
529 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
530 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
531 |
self initialize |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
532 |
" |
7892 | 533 |
! ! |
534 |
||
8122 | 535 |
!CharacterEncoder class methodsFor:'constants'! |
536 |
||
537 |
jis7KanjiEscapeSequence |
|
538 |
"return the escape sequence used to switch to kanji in jis7 encoded strings. |
|
539 |
This happens to be the same as ISO2022-JP's escape sequence." |
|
540 |
||
541 |
Jis7KanjiEscapeSequence isNil ifTrue:[ |
|
542 |
Jis7KanjiEscapeSequence := Character esc asString , '$B'. |
|
543 |
]. |
|
544 |
^ Jis7KanjiEscapeSequence. |
|
545 |
||
546 |
"Created: 26.2.1996 / 17:38:08 / cg" |
|
547 |
"Modified: 30.6.1997 / 16:03:16 / cg" |
|
548 |
! |
|
549 |
||
550 |
jis7KanjiOldEscapeSequence |
|
551 |
"return the escape sequence used to switch to kanji in some old jis7 encoded strings." |
|
552 |
||
553 |
Jis7KanjiOldEscapeSequence isNil ifTrue:[ |
|
554 |
Jis7KanjiOldEscapeSequence := Character esc asString , '$@'.. |
|
555 |
]. |
|
556 |
^ Jis7KanjiOldEscapeSequence. |
|
557 |
! |
|
558 |
||
559 |
jis7RomanEscapeSequence |
|
560 |
"return the escape sequence used to switch to roman in jis7 encoded strings" |
|
561 |
||
562 |
Jis7RomanEscapeSequence isNil ifTrue:[ |
|
563 |
Jis7RomanEscapeSequence := Character esc asString , '(J'. |
|
564 |
]. |
|
565 |
^ Jis7RomanEscapeSequence. |
|
566 |
||
567 |
"Created: 26.2.1996 / 17:38:08 / cg" |
|
568 |
"Modified: 30.6.1997 / 16:03:16 / cg" |
|
569 |
! |
|
570 |
||
571 |
jisISO2022EscapeSequence |
|
572 |
"return the escape sequence used to switch to kanji in iso2022 encoded strings" |
|
573 |
||
574 |
JisISO2022EscapeSequence isNil ifTrue:[ |
|
575 |
JisISO2022EscapeSequence := Character esc asString , '&@' , Character esc asString , '$B'. |
|
576 |
]. |
|
577 |
^ JisISO2022EscapeSequence. |
|
578 |
! ! |
|
579 |
||
7892 | 580 |
!CharacterEncoder class methodsFor:'encoding & decoding'! |
581 |
||
582 |
decode:aCodePoint |
|
583 |
^ self new decode:aCodePoint |
|
584 |
! |
|
585 |
||
586 |
decodeString:aString |
|
587 |
^ self new decodeString:aString |
|
588 |
! |
|
589 |
||
7972 | 590 |
decodeString:aString from:oldEncoding |
8016 | 591 |
^ self encodeString:aString from:oldEncoding into:#'unicode' |
7967 | 592 |
! |
593 |
||
7892 | 594 |
encode:aCodePoint |
595 |
^ self new encode:aCodePoint |
|
596 |
||
597 |
" |
|
598 |
ISO8859_1 encode:16r00FF |
|
599 |
ISO8859_1 decodeString:'hello' |
|
600 |
ISO8859_1 encodeString:(ISO8859_1 decodeString:'hello') |
|
601 |
||
602 |
ISO8859_5 decodeString:(String |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
603 |
with:(Character value:16rE4) |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
604 |
with:(Character value:16rE0)) |
7892 | 605 |
" |
606 |
! |
|
607 |
||
7994 | 608 |
encode:codePoint from:oldEncodingArg into:newEncodingArg |
8015 | 609 |
|oldEncoding newEncoding encoder| |
7994 | 610 |
|
611 |
oldEncoding := oldEncodingArg ? #'unicode'. |
|
612 |
oldEncoding == #'iso10646-1' ifTrue:[ oldEncoding := #'unicode']. |
|
613 |
newEncoding := newEncodingArg ? #'unicode'. |
|
614 |
newEncoding == #'iso10646-1' ifTrue:[ newEncoding := #'unicode']. |
|
615 |
||
616 |
oldEncoding == newEncoding ifTrue:[^ codePoint]. |
|
617 |
||
8016 | 618 |
oldEncoding == #'unicode' ifTrue:[ |
619 |
newEncoding == #'iso8859-1' ifTrue:[ |
|
620 |
codePoint <= 16rFF ifTrue:[ |
|
621 |
^ codePoint |
|
622 |
] |
|
623 |
] |
|
624 |
]. |
|
625 |
newEncoding == #'unicode' ifTrue:[ |
|
626 |
oldEncoding == #'iso8859-1' ifTrue:[ |
|
627 |
codePoint <= 16rFF ifTrue:[ |
|
628 |
^ codePoint |
|
629 |
] |
|
630 |
] |
|
631 |
]. |
|
8118 | 632 |
encoder := self encoderToEncodeFrom:oldEncoding into:newEncoding. |
8015 | 633 |
^ encoder encode:codePoint. |
7994 | 634 |
! |
635 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
636 |
encodeString:aUnicodeString |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
637 |
"given a string in unicode, return a string in my encoding for it" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
638 |
|
7912 | 639 |
^ self new encodeString:aUnicodeString |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
640 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
641 |
" |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
642 |
ISO8859_1 decodeString:'hello' |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
643 |
" |
7914 | 644 |
! |
645 |
||
7967 | 646 |
encodeString:aString from:oldEncodingArg into:newEncodingArg |
8015 | 647 |
|oldEncoding newEncoding encoder| |
7967 | 648 |
|
649 |
oldEncoding := oldEncodingArg ? #'unicode'. |
|
7972 | 650 |
oldEncoding == #'iso10646-1' ifTrue:[ oldEncoding := #'unicode']. |
7967 | 651 |
newEncoding := newEncodingArg ? #'unicode'. |
7972 | 652 |
newEncoding == #'iso10646-1' ifTrue:[ newEncoding := #'unicode']. |
653 |
||
7967 | 654 |
oldEncoding == newEncoding ifTrue:[^ aString]. |
655 |
||
8016 | 656 |
oldEncoding == #'unicode' ifTrue:[ |
657 |
newEncoding == #'iso8859-1' ifTrue:[ |
|
658 |
aString bitsPerCharacter == 8 ifTrue:[ |
|
659 |
^ aString |
|
660 |
] |
|
661 |
] |
|
662 |
]. |
|
663 |
newEncoding == #'unicode' ifTrue:[ |
|
664 |
oldEncoding == #'iso8859-1' ifTrue:[ |
|
665 |
aString bitsPerCharacter == 8 ifTrue:[ |
|
666 |
^ aString |
|
667 |
] |
|
668 |
] |
|
669 |
]. |
|
670 |
||
8118 | 671 |
encoder := self encoderToEncodeFrom:oldEncoding into:newEncoding. |
8015 | 672 |
^ encoder encodeString:aString. |
7972 | 673 |
! |
674 |
||
675 |
encodeString:aString into:newEncoding |
|
8016 | 676 |
^ self encodeString:aString from:#'unicode' into:newEncoding |
7892 | 677 |
! ! |
678 |
||
679 |
!CharacterEncoder class methodsFor:'private'! |
|
680 |
||
681 |
flushCode |
|
8127 | 682 |
self initialize. |
7914 | 683 |
|
7892 | 684 |
self isAbstract ifFalse:[ |
8015 | 685 |
(self mapFileURL1_relativePathName notNil |
686 |
or:[ self mapFileURL2_relativePathName notNil]) ifTrue:[ |
|
687 |
self class removeSelector:#mapping. |
|
688 |
]. |
|
7892 | 689 |
]. |
690 |
||
691 |
" |
|
692 |
self flushCode |
|
693 |
" |
|
694 |
! ! |
|
695 |
||
696 |
!CharacterEncoder class methodsFor:'private-mapping setup'! |
|
697 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
698 |
generateCode |
7909 | 699 |
(CharacterEncoderCodeGenerator new targetClass:self) generateCode. |
700 |
! |
|
701 |
||
702 |
generateSubclassCode |
|
703 |
(CharacterEncoderCodeGenerator new targetClass:self) generateSubclassCode. |
|
7892 | 704 |
! |
705 |
||
7914 | 706 |
mapFileURL1_codeColumn |
707 |
^ 1 |
|
708 |
! |
|
709 |
||
7912 | 710 |
mapFileURL1_relativePathName |
711 |
"raise an error: must be redefined in concrete subclass(es)" |
|
712 |
||
713 |
^ nil |
|
714 |
! |
|
715 |
||
716 |
mapFileURL2_relativePathName |
|
717 |
"raise an error: must be redefined in concrete subclass(es)" |
|
718 |
||
719 |
^ nil |
|
720 |
! |
|
721 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
722 |
mappingURL1 |
7892 | 723 |
"raise an error: must be redefined in concrete subclass(es)" |
7912 | 724 |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
725 |
|rel| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
726 |
|
7912 | 727 |
rel := self mapFileURL1_relativePathName. |
728 |
rel isNil ifTrue:[ |
|
7932 | 729 |
^ nil |
7912 | 730 |
]. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
731 |
^ 'http://www.unicode.org/Public/MAPPINGS/' , rel |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
732 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
733 |
|
7892 | 734 |
mappingURL2 |
735 |
"raise an error: must be redefined in concrete subclass(es)" |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
736 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
737 |
|rel| |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
738 |
|
7912 | 739 |
rel := self mapFileURL2_relativePathName. |
740 |
rel isNil ifTrue:[ |
|
7932 | 741 |
^ nil |
7912 | 742 |
]. |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
743 |
^ 'http://std.dkuug.dk/i18n/charmaps/' , rel |
7892 | 744 |
! ! |
745 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
746 |
!CharacterEncoder class methodsFor:'queries'! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
747 |
|
7938 | 748 |
isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg |
7994 | 749 |
"return true, if superSetEncoding encoding includes all characters of subSetEncoding. |
750 |
(this means: characters are included - not that they have the same encoding)" |
|
7938 | 751 |
|
752 |
|subSetEncoding superSetEncoding| |
|
753 |
||
754 |
subSetEncodingArg = superSetEncodingArg ifTrue:[^ true]. |
|
755 |
subSetEncoding := subSetEncodingArg asLowercase. |
|
756 |
superSetEncoding := superSetEncodingArg asLowercase. |
|
757 |
||
758 |
(subSetEncoding match:superSetEncoding) ifTrue:[^ true]. |
|
759 |
||
760 |
(('iso10646*' match:superSetEncoding) or:[superSetEncoding = 'unicode']) ifTrue:[ |
|
761 |
('ascii*' match:subSetEncoding) ifTrue:[^ true]. |
|
762 |
('iso8859*' match:subSetEncoding) ifTrue:[^ true]. |
|
763 |
('jis*' match:subSetEncoding) ifTrue:[^ true]. |
|
764 |
('koi8*' match:subSetEncoding) ifTrue:[^ true]. |
|
765 |
('ksc*' match:subSetEncoding) ifTrue:[^ true]. |
|
766 |
('big*' match:subSetEncoding) ifTrue:[^ true]. |
|
767 |
('cns*' match:subSetEncoding) ifTrue:[^ true]. |
|
768 |
('gb2312*' match:subSetEncoding) ifTrue:[^ true]. |
|
769 |
]. |
|
770 |
||
771 |
"/ if the subSet is iso8859-*, that means ascii (i.e. the lower 7 bits of iso8859 only). |
|
772 |
((subSetEncoding = 'iso8859*') or:[subSetEncoding = 'iso8859-*']) ifTrue:[ |
|
773 |
('ascii*' match:superSetEncoding) ifTrue:[^ true]. |
|
774 |
]. |
|
775 |
(subSetEncoding = 'ascii') ifTrue:[ |
|
776 |
('iso8859*' match:superSetEncoding) ifTrue:[^ true]. |
|
777 |
]. |
|
778 |
||
7923 | 779 |
"/ TODO: check the charSets mappingTables... |
780 |
"/ self halt. |
|
781 |
^ false. |
|
782 |
! |
|
783 |
||
7919 | 784 |
nameOfDecodedCode |
785 |
"Most coders decode from their code into unicode / encode from unicode into their code. |
|
786 |
There are a few exceptions to this, though - these must redefine this." |
|
787 |
||
788 |
^ #'unicode' |
|
789 |
! |
|
790 |
||
791 |
nameOfEncoding |
|
7974 | 792 |
^ (self nameWithoutPrefix asLowercase copyReplaceAll:$_ with:$-) asSymbol |
7919 | 793 |
! |
794 |
||
7959 | 795 |
supportedExternalEncodings |
796 |
"return an array of arrays containing the names of supported |
|
797 |
encodings which are supported for external resources (i.e. files). |
|
798 |
The first element contains the internally used symbolic name, |
|
799 |
the second contains a user-readable string (description). |
|
800 |
More than one external name may be mapped onto the same symbolic." |
|
801 |
||
802 |
^ #( |
|
8016 | 803 |
('utf8' 'Unicode as 8Bit characters' ) |
804 |
('utf7' 'Unicode as 7Bit characters' ) |
|
7959 | 805 |
nil |
8016 | 806 |
('ascii' 'Common 7bit subset of iso8859' ) |
807 |
('iso8859-1' 'Latin1' ) |
|
808 |
('iso8859-2' 'Latin2' ) |
|
809 |
('iso8859-3' 'Latin3' ) |
|
810 |
('iso8859-4' 'Latin4' ) |
|
811 |
('iso8859-5' 'Cyrillic' ) |
|
812 |
('iso8859-6' 'Arabic' ) |
|
813 |
('iso8859-7' 'Greek' ) |
|
814 |
('iso8859-8' 'Hebrew' ) |
|
7959 | 815 |
nil |
8033 | 816 |
('koi7' 'Cyrillic (Old)' ) |
8016 | 817 |
('koi8-r' 'Cyrillic' ) |
818 |
('koi8-u' 'Cyrillic (Ukraine)' ) |
|
7959 | 819 |
nil |
820 |
('cp437' 'msdos US / codepage 437' ) |
|
821 |
('cp850' 'msdos Latin1 codepage 850' ) |
|
822 |
('mac' 'macintosh 8 bit' ) |
|
823 |
('next' 'NeXT 8 bit' ) |
|
824 |
('hp' 'hpux 8 bit' ) |
|
825 |
nil |
|
826 |
('euc' 'EUC - extended unix code japanese' ) |
|
827 |
('jis7' 'JIS7 - jis 7bit escape codes japanese' ) |
|
8016 | 828 |
('iso-2022-jp' 'Same as jis 7bit' ) |
7959 | 829 |
('sjis' 'SJIS - shift jis 8bit codes japanese' ) |
830 |
nil |
|
831 |
('gb' 'GB - mainland chin' ) |
|
832 |
('big5' 'BIG5 - taiwan' ) |
|
833 |
"/ ('ksc' 'korean' ) |
|
834 |
) |
|
835 |
! |
|
836 |
||
7947 | 837 |
userFriendlyNameOfEncoding |
7972 | 838 |
^ self nameOfEncoding asUppercaseFirst |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
839 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
840 |
|
7912 | 841 |
!CharacterEncoder class methodsFor:'testing'! |
842 |
||
843 |
isAbstract |
|
844 |
^ self == CharacterEncoder |
|
845 |
! ! |
|
846 |
||
7892 | 847 |
!CharacterEncoder methodsFor:'encoding & decoding'! |
848 |
||
849 |
decode:anEncoding |
|
850 |
"given an integer in my encoding, return a unicode codePoint for it" |
|
851 |
||
8118 | 852 |
self subclassResponsibility |
7892 | 853 |
! |
854 |
||
855 |
decodeString:anEncodedString |
|
856 |
"given a string in my encoding, return a unicode-string for it" |
|
857 |
||
8118 | 858 |
|newString| |
859 |
||
860 |
newString := String new:(anEncodedString size). |
|
861 |
1 to:anEncodedString size do:[:idx | |
|
862 |
|myCode uniCodePoint| |
|
863 |
||
864 |
myCode := (anEncodedString at:idx) codePoint. |
|
865 |
uniCodePoint := self decode:myCode. |
|
866 |
uniCodePoint > 16rFF ifTrue:[ |
|
867 |
uniCodePoint > 16rFFFF ifTrue:[ |
|
868 |
newString bitsPerCharacter < 32 ifTrue:[ |
|
869 |
newString := Unicode32String fromString:newString. |
|
870 |
] |
|
871 |
] ifFalse:[ |
|
872 |
newString bitsPerCharacter < 16 ifTrue:[ |
|
873 |
newString := Unicode16String fromString:newString. |
|
874 |
] |
|
875 |
]. |
|
876 |
]. |
|
877 |
newString at:idx put:(Character value:uniCodePoint). |
|
878 |
]. |
|
879 |
^ newString |
|
7892 | 880 |
|
881 |
" |
|
882 |
ISO8859_1 decodeString:'hello' |
|
883 |
" |
|
884 |
! |
|
885 |
||
886 |
encode:aCodePoint |
|
887 |
"given a codePoint in unicode, return a byte in my encoding for it" |
|
888 |
||
8118 | 889 |
self subclassResponsibility |
7892 | 890 |
! |
891 |
||
892 |
encodeString:aUnicodeString |
|
893 |
"given a string in unicode, return a string in my encoding for it" |
|
894 |
||
8118 | 895 |
|newString myCode uniCodePoint| |
896 |
||
897 |
newString := self newString:(aUnicodeString size). |
|
898 |
1 to:aUnicodeString size do:[:idx | |
|
899 |
uniCodePoint := (aUnicodeString at:idx) codePoint. |
|
900 |
myCode := self encode:uniCodePoint. |
|
901 |
newString at:idx put:(Character value:myCode). |
|
902 |
]. |
|
903 |
^ newString |
|
7892 | 904 |
! ! |
905 |
||
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
906 |
!CharacterEncoder methodsFor:'error handling'! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
907 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
908 |
decodingError |
7904 | 909 |
"report an error that there is no unicode-codePoint for a given codePoint in this encoding. |
910 |
(which is unlikely) or that the encoding is undefined for that value |
|
911 |
(for example, holes in the ISO8859-3 encoding)" |
|
912 |
||
7919 | 913 |
|badCodePoint sender| |
914 |
||
915 |
sender := thisContext sender. |
|
916 |
((sender selector == #encode:) or:[sender selector == #decode:]) ifFalse:[ |
|
7938 | 917 |
badCodePoint := sender methodHome argAt:1 |
7919 | 918 |
]. |
8062 | 919 |
^ (EncodingError new) |
7938 | 920 |
defaultValue:(self defaultDecoderValue); |
921 |
parameter:badCodePoint; |
|
8021
ce3a9f322c0e
Use #messageText: instead of obsolete #errorString:
Stefan Vogel <sv@exept.de>
parents:
8019
diff
changeset
|
922 |
messageText:'invalid code'; |
7938 | 923 |
suspendedContext:sender; |
924 |
raiseRequest |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
925 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
926 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
927 |
defaultDecoderValue |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
928 |
"placed into a decoded string, in case there is no unicode codePoint |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
929 |
for a given encoded codePoint. |
7904 | 930 |
(typically 16rFFFF)." |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
931 |
|
7904 | 932 |
^ 16rFFFF |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
933 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
934 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
935 |
defaultEncoderValue |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
936 |
"placed into an encoded string, in case there is no codePoint |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
937 |
for a given unicode codePoint. |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
938 |
(typically $?)." |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
939 |
|
8101
f7023a4735bf
Use the ANSI-blessed #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
8087
diff
changeset
|
940 |
^ $? codePoint |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
941 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
942 |
|
7919 | 943 |
encodingError |
944 |
"report an error that some unicode-codePoint cannot be represented by this encoder" |
|
945 |
||
946 |
|badCodePoint sender| |
|
7904 | 947 |
|
948 |
sender := thisContext sender. |
|
949 |
((sender selector == #encode:) or:[sender selector == #decode:]) ifFalse:[ |
|
7938 | 950 |
badCodePoint := sender methodHome argAt:1 |
7904 | 951 |
]. |
8048 | 952 |
^ (EncodingError new) |
7938 | 953 |
defaultValue:(self defaultEncoderValue); |
954 |
parameter:badCodePoint; |
|
8021
ce3a9f322c0e
Use #messageText: instead of obsolete #errorString:
Stefan Vogel <sv@exept.de>
parents:
8019
diff
changeset
|
955 |
messageText:'unrepresentable unicode'; |
7938 | 956 |
suspendedContext:sender; |
957 |
raiseRequest |
|
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
958 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
959 |
|
7972 | 960 |
!CharacterEncoder methodsFor:'printing'! |
961 |
||
962 |
printOn:aStream |
|
963 |
aStream |
|
964 |
nextPutAll:(self nameOfDecodedCode); |
|
965 |
nextPutAll:'->'; |
|
966 |
nextPutAll:(self nameOfEncoding) |
|
967 |
! ! |
|
968 |
||
7892 | 969 |
!CharacterEncoder methodsFor:'private'! |
970 |
||
971 |
newString:size |
|
972 |
self subclassResponsibility |
|
973 |
! ! |
|
974 |
||
7917 | 975 |
!CharacterEncoder methodsFor:'queries'! |
976 |
||
977 |
isNullEncoder |
|
978 |
^ false |
|
7972 | 979 |
! |
980 |
||
981 |
nameOfDecodedCode |
|
982 |
"Most coders decode from their code into unicode / encode from unicode into their code. |
|
983 |
There are a few exceptions to this, though - these must redefine this." |
|
984 |
||
985 |
^ self class nameOfDecodedCode |
|
986 |
! |
|
987 |
||
988 |
nameOfEncoding |
|
989 |
^ self class nameOfEncoding |
|
990 |
! |
|
991 |
||
992 |
userFriendlyNameOfEncoding |
|
993 |
^ self class userFriendlyNameOfEncoding |
|
7917 | 994 |
! ! |
995 |
||
7915 | 996 |
!CharacterEncoder::CompoundEncoder class methodsFor:'documentation'! |
7914 | 997 |
|
998 |
documentation |
|
999 |
" |
|
1000 |
A compoundEncoder uses two real encoders; |
|
1001 |
to encode: |
|
7956 | 1002 |
string -> decoder(encode) -> encoder -> result |
7914 | 1003 |
to decode: |
7956 | 1004 |
string -> encoder -> decoder -> result |
1005 |
||
1006 |
|e| |
|
1007 |
||
1008 |
e := CompoundEncoder new. |
|
1009 |
e encoder:ISO8859_5 decoder:KOI8_R. |
|
1010 |
e decode:16rB0. 'CYRILLIC CAPITAL LETTER A; 16rB0 in 8859-5; 16rE1 in KOI8-R'. |
|
1011 |
e encode:16rE1. |
|
7914 | 1012 |
" |
1013 |
! ! |
|
1014 |
||
7915 | 1015 |
!CharacterEncoder::CompoundEncoder methodsFor:'accessing'! |
7914 | 1016 |
|
1017 |
encoder:encoderArg decoder:decoderArg |
|
1018 |
"set instance variables (automatically generated)" |
|
1019 |
||
1020 |
decoder := decoderArg. |
|
1021 |
encoder := encoderArg. |
|
1022 |
! ! |
|
1023 |
||
7915 | 1024 |
!CharacterEncoder::CompoundEncoder methodsFor:'encoding & decoding'! |
7914 | 1025 |
|
7956 | 1026 |
decode:aCode |
1027 |
^ decoder encode:(encoder decode:aCode) |
|
1028 |
! |
|
1029 |
||
1030 |
decodeString:aString |
|
1031 |
^ decoder encodeString:(encoder decodeString:aString) |
|
1032 |
! |
|
1033 |
||
7914 | 1034 |
encode:aCode |
1035 |
^ encoder encode:(decoder decode:aCode) |
|
1036 |
! |
|
1037 |
||
1038 |
encodeString:aString |
|
1039 |
^ encoder encodeString:(decoder decodeString:aString) |
|
1040 |
! ! |
|
1041 |
||
7972 | 1042 |
!CharacterEncoder::CompoundEncoder methodsFor:'printing'! |
1043 |
||
1044 |
printOn:aStream |
|
1045 |
aStream |
|
1046 |
nextPutAll:(decoder nameOfEncoding); |
|
1047 |
nextPutAll:'->'. |
|
1048 |
"/ nextPutAll:(decoder nameOfDecodedCode); |
|
1049 |
"/ nextPutAll:'->'; |
|
1050 |
"/ nextPutAll:(encoder nameOfEncoding) |
|
1051 |
encoder printOn:aStream |
|
1052 |
! ! |
|
1053 |
||
7932 | 1054 |
!CharacterEncoder::DefaultEncoder class methodsFor:'documentation'! |
1055 |
||
1056 |
documentation |
|
1057 |
" |
|
7972 | 1058 |
That is only a dummy for ST80 compatibility |
7932 | 1059 |
" |
1060 |
! ! |
|
1061 |
||
7915 | 1062 |
!CharacterEncoder::InverseEncoder class methodsFor:'documentation'! |
7914 | 1063 |
|
1064 |
documentation |
|
1065 |
" |
|
1066 |
An inverseEncoder does the inverse - i.e. encode is really a decode |
|
1067 |
and decode is really an encode. |
|
1068 |
" |
|
1069 |
! ! |
|
1070 |
||
7915 | 1071 |
!CharacterEncoder::InverseEncoder methodsFor:'accessing'! |
7914 | 1072 |
|
1073 |
decoder:something |
|
1074 |
decoder := something. |
|
1075 |
! ! |
|
1076 |
||
7915 | 1077 |
!CharacterEncoder::InverseEncoder methodsFor:'encoding & decoding'! |
7914 | 1078 |
|
1079 |
decode:aCode |
|
1080 |
^ decoder encode:aCode |
|
1081 |
! |
|
1082 |
||
1083 |
decodeString:aString |
|
1084 |
^ decoder encodeString:aString |
|
1085 |
! |
|
1086 |
||
1087 |
encode:aCode |
|
1088 |
^ decoder decode:aCode |
|
1089 |
! |
|
1090 |
||
1091 |
encodeString:aString |
|
1092 |
^ decoder decodeString:aString |
|
1093 |
! ! |
|
1094 |
||
7972 | 1095 |
!CharacterEncoder::InverseEncoder methodsFor:'printing'! |
1096 |
||
1097 |
printOn:aStream |
|
1098 |
aStream |
|
1099 |
nextPutAll:(decoder nameOfEncoding); |
|
1100 |
nextPutAll:'->'; |
|
1101 |
nextPutAll:(decoder nameOfDecodedCode) |
|
1102 |
! ! |
|
1103 |
||
7915 | 1104 |
!CharacterEncoder::NullEncoder class methodsFor:'documentation'! |
7914 | 1105 |
|
1106 |
documentation |
|
1107 |
" |
|
1108 |
A NullEncoder does nothing. |
|
1109 |
" |
|
1110 |
! ! |
|
1111 |
||
7915 | 1112 |
!CharacterEncoder::NullEncoder methodsFor:'encoding & decoding'! |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1113 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1114 |
decode:aCode |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1115 |
^ aCode |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1116 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1117 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1118 |
decodeString:aString |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1119 |
^ aString |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1120 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1121 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1122 |
encode:aCode |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1123 |
^ aCode |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1124 |
! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1125 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1126 |
encodeString:aString |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1127 |
^ aString |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1128 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1129 |
|
7917 | 1130 |
!CharacterEncoder::NullEncoder methodsFor:'queries'! |
1131 |
||
1132 |
isNullEncoder |
|
1133 |
^ true |
|
1134 |
! ! |
|
1135 |
||
7915 | 1136 |
!CharacterEncoder::OtherEncoding class methodsFor:'private'! |
7892 | 1137 |
|
1138 |
flushCode |
|
1139 |
! |
|
1140 |
||
1141 |
generateEncoderCode |
|
1142 |
! ! |
|
1143 |
||
7919 | 1144 |
!CharacterEncoder::TwoStepEncoder class methodsFor:'documentation'! |
1145 |
||
1146 |
documentation |
|
1147 |
" |
|
1148 |
A twoStepEncoder uses two real encoders; |
|
1149 |
to encode: |
|
7932 | 1150 |
string -> encoder1(encode) -> encoder2(encode) -> result |
7919 | 1151 |
to decode: |
7932 | 1152 |
string -> encoder2(decode) -> encoder1(decode) -> result |
7919 | 1153 |
" |
1154 |
! ! |
|
1155 |
||
1156 |
!CharacterEncoder::TwoStepEncoder methodsFor:'accessing'! |
|
1157 |
||
1158 |
encoder1:encoder1Arg encoder2:encoder2Arg |
|
1159 |
"set instance variables (automatically generated)" |
|
1160 |
||
1161 |
encoder1 := encoder1Arg. |
|
1162 |
encoder2 := encoder2Arg. |
|
1163 |
! ! |
|
1164 |
||
1165 |
!CharacterEncoder::TwoStepEncoder methodsFor:'encoding & decoding'! |
|
1166 |
||
1167 |
decode:aCode |
|
1168 |
^ encoder1 decode:(encoder2 decode:aCode) |
|
1169 |
! |
|
1170 |
||
1171 |
decodeString:aString |
|
1172 |
^ encoder1 decodeString:(encoder2 decodeString:aString) |
|
1173 |
! |
|
1174 |
||
1175 |
encode:aCode |
|
1176 |
^ encoder2 encode:(encoder1 encode:aCode) |
|
1177 |
! |
|
1178 |
||
1179 |
encodeString:aString |
|
1180 |
^ encoder2 encodeString:(encoder1 encodeString:aString) |
|
1181 |
! ! |
|
1182 |
||
7972 | 1183 |
!CharacterEncoder::TwoStepEncoder methodsFor:'printing'! |
1184 |
||
1185 |
printOn:aStream |
|
1186 |
aStream |
|
1187 |
nextPutAll:(encoder1 nameOfDecodedCode); |
|
1188 |
nextPutAll:'->'; |
|
1189 |
nextPutAll:(encoder1 nameOfEncoding); |
|
1190 |
nextPutAll:'->'; |
|
1191 |
nextPutAll:(encoder2 nameOfEncoding) |
|
1192 |
! ! |
|
1193 |
||
7892 | 1194 |
!CharacterEncoder class methodsFor:'documentation'! |
1195 |
||
1196 |
version |
|
8127 | 1197 |
^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.57 2004-03-09 00:08:33 cg Exp $' |
7899
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1198 |
! ! |
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1199 |
|
7577df77ba95
character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents:
7893
diff
changeset
|
1200 |
CharacterEncoder initialize! |