189 |
191 |
190 encoder := CharacterEncoder encoderToEncodeFrom:#unicode into:#utf8. |
192 encoder := CharacterEncoder encoderToEncodeFrom:#unicode into:#utf8. |
191 |
193 |
192 "/ reversibility |
194 "/ reversibility |
193 (0 to:16r1FFFF) do:[:eachCodePoint | |
195 (0 to:16r1FFFF) do:[:eachCodePoint | |
194 |s1 s2 s3| |
196 |s1 s2 s3| |
195 |
197 |
196 s1 := (Character value:eachCodePoint) asString. |
198 s1 := (Character value:eachCodePoint) asString. |
197 s2 := encoder encodeString:s1. |
199 s2 := encoder encodeString:s1. |
198 s3 := encoder decodeString:s2. |
200 s3 := encoder decodeString:s2. |
199 self assert:(s1 = s3). |
201 self assert:(s1 = s3). |
200 ]. |
202 ]. |
201 |
203 |
202 "/ 00 .. 7F -> 0xxxxxxx |
204 "/ 00 .. 7F -> 0xxxxxxx |
203 #[16r00 16r01 16r02 16r04 16r08 16r10 16r20 16r40 |
205 #[16r00 16r01 16r02 16r04 16r08 16r10 16r20 16r40 |
204 16r03 16r07 16r0F 16r1F 16r3F 16r7F] |
206 16r03 16r07 16r0F 16r1F 16r3F 16r7F] |
205 do:[:eachCodePoint | |
207 do:[:eachCodePoint | |
206 |s1 s2 s3| |
208 |s1 s2 s3| |
207 |
209 |
208 s1 := (Character value:eachCodePoint) asString. |
210 s1 := (Character value:eachCodePoint) asString. |
209 s2 := encoder encodeString:s1. |
211 s2 := encoder encodeString:s1. |
210 s3 := encoder decodeString:s2. |
212 s3 := encoder decodeString:s2. |
211 self assert:(s1 = s2). |
213 self assert:(s1 = s2). |
212 self assert:(s2 = s3). |
214 self assert:(s2 = s3). |
213 self assert:(s2 size == 1). |
215 self assert:(s2 size == 1). |
214 ]. |
216 ]. |
215 |
217 |
216 "/ 80 .. 7FF -> 110xxxxx 10xxxxxx |
218 "/ 80 .. 7FF -> 110xxxxx 10xxxxxx |
217 #(16r80 16r100 16r200 16r400 |
219 #(16r80 16r100 16r200 16r400 |
218 16r0FF 16r1FF 16r3FF 16r7FF) |
220 16r0FF 16r1FF 16r3FF 16r7FF) |
219 do:[:eachCodePoint | |
221 do:[:eachCodePoint | |
220 |s1 s2 s3| |
222 |s1 s2 s3| |
221 |
223 |
222 s1 := (Character value:eachCodePoint) asString. |
224 s1 := (Character value:eachCodePoint) asString. |
223 s2 := encoder encodeString:s1. |
225 s2 := encoder encodeString:s1. |
224 self assert:(s2 size == 2). |
226 self assert:(s2 size == 2). |
225 self assert:((s2 first codePoint bitAnd:2r11100000) == 2r11000000). |
227 self assert:((s2 first codePoint bitAnd:2r11100000) == 2r11000000). |
226 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
228 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
227 s3 := encoder decodeString:s2. |
229 s3 := encoder decodeString:s2. |
228 self assert:(s1 = s3). |
230 self assert:(s1 = s3). |
229 ]. |
231 ]. |
230 |
232 |
231 "/ 800 .. FFFF -> 1110xxxx 10xxxxxx 10xxxxxx |
233 "/ 800 .. FFFF -> 1110xxxx 10xxxxxx 10xxxxxx |
232 #(16r800 16r1000 16r2000 16r4000 16r8000 |
234 #(16r800 16r1000 16r2000 16r4000 16r8000 |
233 16r0FFF 16r1FFF 16r3FFF 16r7FFF 16rFFFF) |
235 16r0FFF 16r1FFF 16r3FFF 16r7FFF 16rFFFF) |
234 do:[:eachCodePoint | |
236 do:[:eachCodePoint | |
235 |s1 s2 s3| |
237 |s1 s2 s3| |
236 |
238 |
237 s1 := (Character value:eachCodePoint) asString. |
239 s1 := (Character value:eachCodePoint) asString. |
238 s2 := encoder encodeString:s1. |
240 s2 := encoder encodeString:s1. |
239 self assert:(s2 size == 3). |
241 self assert:(s2 size == 3). |
240 self assert:((s2 first codePoint bitAnd:2r11110000) == 2r11100000). |
242 self assert:((s2 first codePoint bitAnd:2r11110000) == 2r11100000). |
241 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
243 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
242 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
244 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
243 s3 := encoder decodeString:s2. |
245 s3 := encoder decodeString:s2. |
244 self assert:(s1 = s3). |
246 self assert:(s1 = s3). |
245 ]. |
247 ]. |
246 |
248 |
247 "/ 10000 .. 1FFFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
249 "/ 10000 .. 1FFFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
248 #(16r10000 16r20000 16r40000 16r80000 16r10000 |
250 #(16r10000 16r20000 16r40000 16r80000 16r10000 |
249 16r1FFFF 16r3FFFF 16r7FFFF 16rFFFFF 16r1FFFFF) |
251 16r1FFFF 16r3FFFF 16r7FFFF 16rFFFFF 16r1FFFFF) |
250 do:[:eachCodePoint | |
252 do:[:eachCodePoint | |
251 |s1 s2 s3| |
253 |s1 s2 s3| |
252 |
254 |
253 s1 := (Character value:eachCodePoint) asString. |
255 s1 := (Character value:eachCodePoint) asString. |
254 s2 := encoder encodeString:s1. |
256 s2 := encoder encodeString:s1. |
255 self assert:(s2 size == 4). |
257 self assert:(s2 size == 4). |
256 self assert:((s2 first codePoint bitAnd:2r11111000) == 2r11110000). |
258 self assert:((s2 first codePoint bitAnd:2r11111000) == 2r11110000). |
257 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
259 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
258 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
260 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
259 self assert:((s2 fourth codePoint bitAnd:2r11000000) == 2r10000000). |
261 self assert:((s2 fourth codePoint bitAnd:2r11000000) == 2r10000000). |
260 s3 := encoder decodeString:s2. |
262 s3 := encoder decodeString:s2. |
261 self assert:(s1 = s3). |
263 self assert:(s1 = s3). |
262 ]. |
264 ]. |
263 |
265 |
264 "/ 200000 .. 3FFFFFF -> 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
266 "/ 200000 .. 3FFFFFF -> 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
265 #(16r200000 16r400000 16r800000 16r1000000 16r2000000 |
267 #(16r200000 16r400000 16r800000 16r1000000 16r2000000 |
266 16r3FFFFF 16r7FFFFF 16r0FFFFFF 16r1FFFFFF 16r3FFFFFF) |
268 16r3FFFFF 16r7FFFFF 16r0FFFFFF 16r1FFFFFF 16r3FFFFFF) |
267 do:[:eachCodePoint | |
269 do:[:eachCodePoint | |
268 |s1 s2 s3| |
270 |s1 s2 s3| |
269 |
271 |
270 s1 := (Character value:eachCodePoint) asString. |
272 s1 := (Character value:eachCodePoint) asString. |
271 s2 := encoder encodeString:s1. |
273 s2 := encoder encodeString:s1. |
272 self assert:(s2 size == 5). |
274 self assert:(s2 size == 5). |
273 self assert:((s2 first codePoint bitAnd:2r11111100) == 2r11111000). |
275 self assert:((s2 first codePoint bitAnd:2r11111100) == 2r11111000). |
274 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
276 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
275 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
277 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
276 self assert:((s2 fourth codePoint bitAnd:2r11000000) == 2r10000000). |
278 self assert:((s2 fourth codePoint bitAnd:2r11000000) == 2r10000000). |
277 self assert:((s2 fifth codePoint bitAnd:2r11000000) == 2r10000000). |
279 self assert:((s2 fifth codePoint bitAnd:2r11000000) == 2r10000000). |
278 s3 := encoder decodeString:s2. |
280 s3 := encoder decodeString:s2. |
279 self assert:(s1 = s3). |
281 self assert:(s1 = s3). |
280 ]. |
282 ]. |
281 |
283 |
282 "/ ST/X limitation: only 30 bit integers (to avoid largeInteger codePoint) |
284 "/ ST/X limitation: only 30 bit integers (to avoid largeInteger codePoint) |
283 |
285 |
284 "/ 4000000 .. 7FFFFFFF -> 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
286 "/ 4000000 .. 7FFFFFFF -> 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
285 #(16r4000000 16r8000000 16r10000000 16r20000000 "16r40000000" |
287 #(16r4000000 16r8000000 16r10000000 16r20000000 "16r40000000" |
286 16r7FFFFFF 16r0FFFFFFF 16r1FFFFFFF "16r3FFFFFFF 16r7FFFFFFF") |
288 16r7FFFFFF 16r0FFFFFFF 16r1FFFFFFF "16r3FFFFFFF 16r7FFFFFFF") |
287 do:[:eachCodePoint | |
289 do:[:eachCodePoint | |
288 |s1 s2 s3| |
290 |s1 s2 s3| |
289 |
291 |
290 s1 := (Character value:eachCodePoint) asString. |
292 s1 := (Character value:eachCodePoint) asString. |
291 s2 := encoder encodeString:s1. |
293 s2 := encoder encodeString:s1. |
292 self assert:(s2 size == 6). |
294 self assert:(s2 size == 6). |
293 self assert:((s2 first codePoint bitAnd:2r11111110) == 2r11111100). |
295 self assert:((s2 first codePoint bitAnd:2r11111110) == 2r11111100). |
294 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
296 self assert:((s2 second codePoint bitAnd:2r11000000) == 2r10000000). |
295 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
297 self assert:((s2 third codePoint bitAnd:2r11000000) == 2r10000000). |
296 self assert:((s2 fourth codePoint bitAnd:2r11000000) == 2r10000000). |
298 self assert:((s2 fourth codePoint bitAnd:2r11000000) == 2r10000000). |
297 self assert:((s2 fifth codePoint bitAnd:2r11000000) == 2r10000000). |
299 self assert:((s2 fifth codePoint bitAnd:2r11000000) == 2r10000000). |
298 self assert:((s2 sixth codePoint bitAnd:2r11000000) == 2r10000000). |
300 self assert:((s2 sixth codePoint bitAnd:2r11000000) == 2r10000000). |
299 s3 := encoder decodeString:s2. |
301 s3 := encoder decodeString:s2. |
300 self assert:(s1 = s3). |
302 self assert:(s1 = s3). |
301 ]. |
303 ]. |
302 |
304 |
303 " |
305 " |
304 self new testUTF8 |
306 self new testUTF8 |
|
307 " |
|
308 ! |
|
309 |
|
310 testUTF8_2 |
|
311 |encoder s| |
|
312 |
|
313 encoder := CharacterEncoder encoderToEncodeFrom:#unicode into:#utf8. |
|
314 |
|
315 self assert:(s := encoder encodeString:(Character value:16r1FFFFF)) asByteArray = #[16rF7 16rBF 16rBF 16rBF ]. |
|
316 self assert:( encoder decodeString:s ) first = (Character value:16r1FFFFF). |
|
317 |
|
318 self assert:(s := encoder encodeString:(Character value:16r200000)) asByteArray = #[16rF8 16r88 16r80 16r80 16r80 ]. |
|
319 self assert:( encoder decodeString:s ) first = (Character value:16r200000). |
|
320 |
|
321 self assert:( encoder encodeString:(Character value:16r2FFFFF)) asByteArray = #[16rF8 16r8B 16rBF 16rBF 16rBF ]. |
|
322 self assert:( encoder encodeString:(Character value:16r3FFFFF)) asByteArray = #[16rF8 16r8F 16rBF 16rBF 16rBF ]. |
|
323 self assert:( encoder encodeString:(Character value:16r4FFFFF)) asByteArray = #[16rF8 16r93 16rBF 16rBF 16rBF ]. |
|
324 self assert:( encoder encodeString:(Character value:16r5FFFFF)) asByteArray = #[16rF8 16r97 16rBF 16rBF 16rBF ]. |
|
325 self assert:( encoder encodeString:(Character value:16r6FFFFF)) asByteArray = #[16rF8 16r9B 16rBF 16rBF 16rBF ]. |
|
326 self assert:( encoder encodeString:(Character value:16r7FFFFF)) asByteArray = #[16rF8 16r9F 16rBF 16rBF 16rBF ]. |
|
327 self assert:( encoder encodeString:(Character value:16r800000)) asByteArray = #[16rF8 16rA0 16r80 16r80 16r80 ]. |
|
328 |
|
329 self assert:( encoder encodeString:(Character value:16r3FFFFFF)) asByteArray = #[16rFB 16rBF 16rBF 16rBF 16rBF ]. |
|
330 self assert:( encoder encodeString:(Character value:16r4000000)) asByteArray = #[16rFC 16r84 16r80 16r80 16r80 16r80 ]. |
|
331 self assert:( encoder encodeString:(Character value:16r7FFFFFF)) asByteArray = #[16rFC 16r87 16rBF 16rBF 16rBF 16rBF ]. |
|
332 self assert:( encoder encodeString:(Character value:16rFFFFFFF)) asByteArray = #[16rFC 16r8F 16rBF 16rBF 16rBF 16rBF ]. |
|
333 |
|
334 self assert:(s := encoder encodeString:(Character value:16r3FFFFFFF)) asByteArray = #[16rFC 16rBF 16rBF 16rBF 16rBF 16rBF ]. |
|
335 self assert:( encoder decodeString:s ) first = (Character value:16r3FFFFFFF). |
|
336 |
|
337 "/ STX <<only>> supports characters up to 31 bit. |
|
338 "/ self assert:( encoder encodeString:(Character value:16r7FFFFFFF)) asByteArray = #[16rFD 16rBF 16rBF 16rBF 16rBF 16rBF ]. |
|
339 |
|
340 |
|
341 " |
|
342 self new testUTF8_2 |
305 " |
343 " |
306 ! ! |
344 ! ! |
307 |
345 |
308 !CharacterEncoderTests class methodsFor:'documentation'! |
346 !CharacterEncoderTests class methodsFor:'documentation'! |
309 |
347 |