--- a/CharacterEncoderImplementations__ISO10646_to_UTF8.st Thu Jul 23 13:45:44 2015 +0200
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8.st Thu Jul 23 13:46:22 2015 +0200
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
@@ -54,7 +52,7 @@
Decoding (utf8 to unicode):
|t|
- t := ISO10646_to_UTF8 encodeString:'Helloœ'.
+ t := ISO10646_to_UTF8 encodeString:'Helloœ'.
ISO10646_to_UTF8 decodeString:t.
"
! !
@@ -348,20 +346,23 @@
If you work a lot with utf8 encoded textFiles,
this is a first-class candidate for a primitive."
- |s|
+ |s
+ stringSize "{ Class: SmallInteger }"|
"/ avoid creation of new strings if possible
aUnicodeString containsNon7BitAscii ifFalse:[
^ aUnicodeString asSingleByteString
].
- s := WriteStream on:(String uninitializedNew:(aUnicodeString size * 3 // 2)).
- aUnicodeString do:[:eachCharacter |
- |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
+ stringSize := aUnicodeString size.
+ s := WriteStream on:(String uninitializedNew:(stringSize * 3 // 2)).
+ 1 to:stringSize do:[:idx |
+ |character codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
- codePoint := eachCharacter codePoint.
+ character := aUnicodeString at:idx.
+ codePoint := character codePoint.
codePoint <= 16r7F ifTrue:[
- s nextPut:eachCharacter.
+ s nextPut:character.
] ifFalse:[
b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000).
v := codePoint bitShift:-6.
@@ -394,7 +395,7 @@
s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
] ifFalse:[
"/ cannot happen - we only support up to 30 bit characters
- EncodingError raiseWith:eachCharacter errorString:'codePoint > 31bit in #utf8Encode'.
+ EncodingError raiseWith:character errorString:'codePoint > 31bit in #utf8Encode'.
]
].
].