changeset 18625 | 37d697b9bf8d |
parent 18604 | 54caf7b64994 |
child 18630 | a74d669db937 |
child 19838 | a6ca726d596c |
18624:1f113cce940e | 18625:37d697b9bf8d |
---|---|
1 "{ Encoding: utf8 }" |
|
2 |
|
3 " |
1 " |
4 COPYRIGHT (c) 2004 by eXept Software AG |
2 COPYRIGHT (c) 2004 by eXept Software AG |
5 All Rights Reserved |
3 All Rights Reserved |
6 |
4 |
7 This software is furnished under a license and may be used |
5 This software is furnished under a license and may be used |
52 |
50 |
53 |
51 |
54 Decoding (utf8 to unicode): |
52 Decoding (utf8 to unicode): |
55 |t| |
53 |t| |
56 |
54 |
57 t := ISO10646_to_UTF8 encodeString:'Helloœ'. |
55 t := ISO10646_to_UTF8 encodeString:'Helloœ'. |
58 ISO10646_to_UTF8 decodeString:t. |
56 ISO10646_to_UTF8 decodeString:t. |
59 " |
57 " |
60 ! ! |
58 ! ! |
61 |
59 |
62 !ISO10646_to_UTF8 class methodsFor:'instance creation'! |
60 !ISO10646_to_UTF8 class methodsFor:'instance creation'! |
346 not for being used inside ST/X. |
344 not for being used inside ST/X. |
347 |
345 |
348 If you work a lot with utf8 encoded textFiles, |
346 If you work a lot with utf8 encoded textFiles, |
349 this is a first-class candidate for a primitive." |
347 this is a first-class candidate for a primitive." |
350 |
348 |
351 |s| |
349 |s |
350 stringSize "{ Class: SmallInteger }"| |
|
352 |
351 |
353 "/ avoid creation of new strings if possible |
352 "/ avoid creation of new strings if possible |
354 aUnicodeString containsNon7BitAscii ifFalse:[ |
353 aUnicodeString containsNon7BitAscii ifFalse:[ |
355 ^ aUnicodeString asSingleByteString |
354 ^ aUnicodeString asSingleByteString |
356 ]. |
355 ]. |
357 |
356 |
358 s := WriteStream on:(String uninitializedNew:(aUnicodeString size * 3 // 2)). |
357 stringSize := aUnicodeString size. |
359 aUnicodeString do:[:eachCharacter | |
358 s := WriteStream on:(String uninitializedNew:(stringSize * 3 // 2)). |
360 |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"| |
359 1 to:stringSize do:[:idx | |
361 |
360 |character codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"| |
362 codePoint := eachCharacter codePoint. |
361 |
362 character := aUnicodeString at:idx. |
|
363 codePoint := character codePoint. |
|
363 codePoint <= 16r7F ifTrue:[ |
364 codePoint <= 16r7F ifTrue:[ |
364 s nextPut:eachCharacter. |
365 s nextPut:character. |
365 ] ifFalse:[ |
366 ] ifFalse:[ |
366 b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000). |
367 b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000). |
367 v := codePoint bitShift:-6. |
368 v := codePoint bitShift:-6. |
368 v <= 16r1F ifTrue:[ |
369 v <= 16r1F ifTrue:[ |
369 s nextPut:(Character value:(v bitOr:2r11000000)). |
370 s nextPut:(Character value:(v bitOr:2r11000000)). |
392 v <= 16r01 ifTrue:[ |
393 v <= 16r01 ifTrue:[ |
393 s nextPut:(Character value:(v bitOr:2r11111100)). |
394 s nextPut:(Character value:(v bitOr:2r11111100)). |
394 s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1. |
395 s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1. |
395 ] ifFalse:[ |
396 ] ifFalse:[ |
396 "/ cannot happen - we only support up to 30 bit characters |
397 "/ cannot happen - we only support up to 30 bit characters |
397 EncodingError raiseWith:eachCharacter errorString:'codePoint > 31bit in #utf8Encode'. |
398 EncodingError raiseWith:character errorString:'codePoint > 31bit in #utf8Encode'. |
398 ] |
399 ] |
399 ]. |
400 ]. |
400 ]. |
401 ]. |
401 ]. |
402 ]. |
402 ]. |
403 ]. |