CharacterEncoderImplementations__ISO10646_to_UTF8.st
changeset 18625 37d697b9bf8d
parent 18604 54caf7b64994
child 18630 a74d669db937
child 19838 a6ca726d596c
--- a/CharacterEncoderImplementations__ISO10646_to_UTF8.st	Thu Jul 23 13:45:44 2015 +0200
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8.st	Thu Jul 23 13:46:22 2015 +0200
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
 "
  COPYRIGHT (c) 2004 by eXept Software AG
 	      All Rights Reserved
@@ -54,7 +52,7 @@
   Decoding (utf8 to unicode):
      |t|
 
-     t := ISO10646_to_UTF8 encodeString:'Helloœ'.
+     t := ISO10646_to_UTF8 encodeString:'Helloœ'.
      ISO10646_to_UTF8 decodeString:t.
 "
 ! !
@@ -348,20 +346,23 @@
      If you work a lot with utf8 encoded textFiles,
      this is a first-class candidate for a primitive."
 
-    |s|
+    |s
+     stringSize "{ Class: SmallInteger }"|
 
     "/ avoid creation of new strings if possible
     aUnicodeString containsNon7BitAscii ifFalse:[
         ^ aUnicodeString asSingleByteString
     ].
 
-    s := WriteStream on:(String uninitializedNew:(aUnicodeString size * 3 // 2)).
-    aUnicodeString do:[:eachCharacter |
-        |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
+    stringSize := aUnicodeString size.
+    s := WriteStream on:(String uninitializedNew:(stringSize * 3 // 2)).
+    1 to:stringSize do:[:idx |
+        |character codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
 
-        codePoint := eachCharacter codePoint.
+        character := aUnicodeString at:idx.
+        codePoint := character codePoint.
         codePoint <= 16r7F ifTrue:[
-            s nextPut:eachCharacter.
+            s nextPut:character.
         ] ifFalse:[
             b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000).
             v := codePoint bitShift:-6.
@@ -394,7 +395,7 @@
                                 s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
                             ] ifFalse:[
                                 "/ cannot happen - we only support up to 30 bit characters
-                                EncodingError raiseWith:eachCharacter errorString:'codePoint > 31bit in #utf8Encode'.
+                                EncodingError raiseWith:character errorString:'codePoint > 31bit in #utf8Encode'.
                             ]
                         ].
                     ].