120 "Modified: / 10-01-2018 / 22:59:20 / stefan" |
121 "Modified: / 10-01-2018 / 22:59:20 / stefan" |
121 ! ! |
122 ! ! |
122 |
123 |
123 !ISO10646_to_UTF8 methodsFor:'encoding & decoding'! |
124 !ISO10646_to_UTF8 methodsFor:'encoding & decoding'! |
124 |
125 |
125 decode:aCode |
|
126 "given an integer in my encoding, return a unicode codePoint for it" |
|
127 |
|
128 self shouldNotImplement "/ no single byte conversion possible |
|
129 |
|
130 "Modified (comment): / 03-01-2018 / 23:15:37 / stefan" |
|
131 ! |
|
132 |
|
133 decodeString:aStringOrByteCollection |
126 decodeString:aStringOrByteCollection |
134 "given a string in UTF8 encoding, |
127 "given a string in UTF8 encoding, |
135 return a new string containing the same characters, in Unicode encoding. |
128 return a new string containing the same characters, in Unicode encoding. |
136 Returns either a normal String, a Unicode16String or a Unicode32String instance. |
129 Returns either a normal String, a Unicode16String or a Unicode32String instance. |
137 This is only useful, when reading from external sources or communicating with |
130 This is only useful, when reading from external sources or communicating with |
140 This only handles up-to 30bit characters." |
133 This only handles up-to 30bit characters." |
141 |
134 |
142 ^ CharacterArray decodeFromUTF8:aStringOrByteCollection. |
135 ^ CharacterArray decodeFromUTF8:aStringOrByteCollection. |
143 ! |
136 ! |
144 |
137 |
145 encode:aCode |
|
146 "given a codePoint in unicode, return a byte in my encoding for it" |
|
147 |
|
148 self shouldNotImplement "/ no vonversion to a single byte is possible |
|
149 |
|
150 "Modified (comment): / 03-01-2018 / 23:13:58 / stefan" |
|
151 ! |
|
152 |
|
153 encodeCharacter:aUnicodeCharacter on:aStream |
|
154 "given a character in unicode, encode it onto aStream." |
|
155 |
|
156 aStream nextPutUtf8:aUnicodeCharacter. |
|
157 |
|
158 "Created: / 16-02-2017 / 16:20:57 / stefan" |
|
159 ! |
|
160 |
|
161 encodeString:aUnicodeString |
138 encodeString:aUnicodeString |
162 "return the UTF-8 representation of a Unicode string. |
139 "return the UTF-8 representation of a Unicode string. |
163 The resulting string is only useful to be stored on some external file, |
140 The resulting string is only useful to be stored on some external file, |
164 not for being used inside ST/X." |
141 not for being used inside ST/X." |
165 |
142 |
166 ^ aUnicodeString utf8Encoded. |
143 ^ aUnicodeString utf8Encoded. |
|
144 ! ! |
|
145 |
|
146 !ISO10646_to_UTF8 methodsFor:'queries'! |
|
147 |
|
148 characterSize:charOrCodePoint |
|
149 "return the number of bytes required to encode codePoint" |
|
150 |
|
151 ^ charOrCodePoint asCharacter utf8BytesPerCharacter. |
|
152 |
|
153 "Created: / 15-06-2005 / 15:16:22 / janfrog" |
|
154 "Modified: / 03-01-2018 / 23:05:59 / stefan" |
|
155 ! |
|
156 |
|
157 nameOfEncoding |
|
158 ^ #utf8 |
|
159 ! ! |
|
160 |
|
161 !ISO10646_to_UTF8 methodsFor:'stream support'! |
|
162 |
|
163 encodeCharacter:aUnicodeCharacter on:aStream |
|
164 "given a character in unicode, encode it onto aStream." |
|
165 |
|
166 aStream nextPutUtf8:aUnicodeCharacter. |
|
167 |
|
168 "Created: / 16-02-2017 / 16:20:57 / stefan" |
167 ! |
169 ! |
168 |
170 |
169 encodeString:aUnicodeString on:aStream |
171 encodeString:aUnicodeString on:aStream |
170 "given a string in unicode, encode it onto aStream." |
172 "given a string in unicode, encode it onto aStream." |
171 |
173 |
172 aStream nextPutAllUtf8:aUnicodeString. |
174 aStream nextPutAllUtf8:aUnicodeString. |
173 |
175 |
174 "Created: / 16-02-2017 / 16:27:31 / stefan" |
176 "Created: / 16-02-2017 / 16:27:31 / stefan" |
175 ! ! |
177 ! |
176 |
178 |
177 !ISO10646_to_UTF8 methodsFor:'queries'! |
179 readNext:charactersToReadArg charactersFrom:aStream |
178 |
180 "decode the next charactersToRead on aStream from utf-8 to unicode" |
179 characterSize:charOrCodePoint |
181 |
180 "return the number of bytes required to encode codePoint" |
182 |s c cp hasUtf8 charactersToRead "{ Class:SmallInteger }"| |
181 |
183 |
182 ^ charOrCodePoint asCharacter utf8BytesPerCharacter. |
184 charactersToRead := charactersToReadArg. |
183 |
|
184 "Created: / 15-06-2005 / 15:16:22 / janfrog" |
|
185 "Modified: / 03-01-2018 / 23:05:59 / stefan" |
|
186 ! |
|
187 |
|
188 nameOfEncoding |
|
189 ^ #utf8 |
|
190 ! ! |
|
191 |
|
192 !ISO10646_to_UTF8 methodsFor:'stream support'! |
|
193 |
|
194 readNext:charactersToRead charactersFrom:stream |
|
195 | s c cp hasUtf8| |
|
196 |
|
197 hasUtf8 := false. |
185 hasUtf8 := false. |
198 "stream may be both text or bytes" |
186 "stream may be both text or bytes" |
199 s := (stream contentsSpecies new:charactersToRead) writeStream. |
187 s := (aStream contentsSpecies new:charactersToRead) writeStream. |
200 charactersToRead timesRepeat:[ |
188 charactersToRead timesRepeat:[ |
201 c := stream next. |
189 c := aStream next. |
202 s nextPut:c. |
190 s nextPut:c. |
203 cp := c codePoint. |
191 cp := c codePoint. |
204 (cp bitTest:16r80) ifTrue:[ |
192 (cp bitTest:16r80) ifTrue:[ |
205 hasUtf8 := true. |
193 hasUtf8 := true. |
206 s nextPutAll:(stream next:(self class bytesToReadFor:cp)-1). |
194 s nextPutAll:(aStream next:(self class bytesToReadFor:cp)-1). |
207 ]. |
195 ]. |
208 ]. |
196 ]. |
209 hasUtf8 ifTrue:[ |
197 hasUtf8 ifTrue:[ |
210 ^ self decodeString:s contents. |
198 ^ self decodeString:s contents. |
211 ]. |
199 ]. |
212 ^ s contents asString |
200 ^ s contents asString |
213 |
201 |
214 "Created: / 16-06-2005 / 11:45:14 / masca" |
202 "Created: / 16-06-2005 / 11:45:14 / masca" |
215 "Modified: / 10-01-2018 / 22:28:39 / stefan" |
203 "Modified (comment): / 17-01-2018 / 13:24:42 / stefan" |
216 ! |
204 ! |
217 |
205 |
218 readNextCharacterFrom:aStream |
206 readNextCharacterFrom:aStream |
|
207 "decode the next character or byte on aStream from utf-8 to unicode" |
|
208 |
219 ^ Character utf8DecodeFrom:aStream. |
209 ^ Character utf8DecodeFrom:aStream. |
220 |
210 |
221 "Created: / 14-06-2005 / 17:03:59 / janfrog" |
211 "Created: / 14-06-2005 / 17:03:59 / janfrog" |
222 "Modified: / 10-01-2018 / 17:35:40 / stefan" |
212 "Modified: / 10-01-2018 / 17:35:40 / stefan" |
|
213 "Modified (comment): / 17-01-2018 / 13:24:08 / stefan" |
223 ! ! |
214 ! ! |
224 |
215 |
225 !ISO10646_to_UTF8 class methodsFor:'documentation'! |
216 !ISO10646_to_UTF8 class methodsFor:'documentation'! |
226 |
217 |
227 version |
218 version |