#TUNING by stefan
authorStefan Vogel <sv@exept.de>
Thu, 16 Feb 2017 20:49:01 +0100
changeset 21478 2e63fbcbfa85
parent 21477 3a69148be78e
child 21479 5269eda738b4
#TUNING by stefan class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC added: #encodeCharacter:on: #encodeString:on: changed: #encodeString:
CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st
--- a/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st	Thu Feb 16 20:48:47 2017 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st	Thu Feb 16 20:49:01 2017 +0100
@@ -220,6 +220,33 @@
     ^ true
 !
 
+encodeCharacter:aUnicodeCharacter on:aStream
+    "return the UTF-8-MAC representation of a aUnicodeString.
+     This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as
+     single combined characters).
+
+     For now, here is a limited version, which should work
+     at least for most european countries...
+    "
+
+    |codePoint composeCodePoint needExtra|
+
+    DecomposeMap isNil ifTrue:[
+        self class initializeDecomposeMap
+    ].
+
+    codePoint := aUnicodeCharacter codePoint.
+    needExtra := self decompositionOf:codePoint into:[:baseCodePointArg :composeCodePointArg | 
+            codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg
+        ].
+    aStream nextPutUtf8:codePoint.
+    needExtra ifTrue:[
+        aStream nextPutUtf8:composeCodePoint
+    ].
+
+    "Created: / 16-02-2017 / 17:45:18 / stefan"
+!
+
 encodeString:aUnicodeString
     "return the UTF-8-MAC representation of a aUnicodeString.
      This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as
@@ -229,77 +256,10 @@
      at least for most european countries...
     "
 
-    |gen s decomp codePoint composeCodePoint|
-
-    DecomposeMap isNil ifTrue:[
-        self class initializeDecomposeMap
-    ].
-
-    gen := 
-        [:codePointArg |
-            |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
-
-            codePoint := codePointArg.
-            codePoint <= 16r7F ifTrue:[
-                s nextPut:(Character value:codePoint).
-            ] ifFalse:[
-                b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000).
-                v := codePoint bitShift:-6.
-                v <= 16r1F ifTrue:[
-                    s nextPut:(Character value:(v bitOr:2r11000000)).
-                    s nextPut:b1.
-                ] ifFalse:[
-                    b2 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
-                    v := v bitShift:-6.
-                    v <= 16r0F ifTrue:[
-                        s nextPut:(Character value:(v bitOr:2r11100000)).
-                        s nextPut:b2; nextPut:b1.
-                    ] ifFalse:[
-                        b3 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
-                        v := v bitShift:-6.
-                        v <= 16r07 ifTrue:[
-                            s nextPut:(Character value:(v bitOr:2r11110000)).
-                            s nextPut:b3; nextPut:b2; nextPut:b1.
-                        ] ifFalse:[
-                            b4 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
-                            v := v bitShift:-6.
-                            v <= 16r03 ifTrue:[
-                                s nextPut:(Character value:(v bitOr:2r11111000)).
-                                s nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
-                            ] ifFalse:[
-                                b5 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
-                                v := v bitShift:-6.
-                                v <= 16r01 ifTrue:[
-                                    s nextPut:(Character value:(v bitOr:2r11111100)).
-                                    s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
-                                ] ifFalse:[
-                                    "/ cannot happen - we only support up to 30 bit characters
-                                    self error:'ascii value > 31bit in utf8Encode'.
-                                ]
-                            ].
-                        ].
-                    ].
-                ].
-            ].
-        ].
-
-    decomp := 
-        [:baseCodePointArg :composeCodePointArg | 
-            codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg
-        ].
+    |s|
 
     s := WriteStream on:(String uninitializedNew:aUnicodeString size).
-    aUnicodeString do:[:eachCharacter |
-        |needExtra|
-
-        codePoint := eachCharacter codePoint.
-        needExtra := self decompositionOf: codePoint into:decomp.
-        gen value:codePoint.
-        needExtra ifTrue:[
-            gen value:composeCodePoint
-        ].
-    ].
-
+    self encodeString:aUnicodeString on:s.
     ^ s contents
 
     "
@@ -317,6 +277,36 @@
      ISO10646_to_UTF8_MAC new decodeString:
          (ISO10646_to_UTF8_MAC new encodeString:'Packages aus VSE für Smalltalk_X') asByteArray 
     "
+
+    "Modified (format): / 16-02-2017 / 17:36:14 / stefan"
+!
+
+encodeString:aUnicodeString on:aStream
+    "return the UTF-8-MAC representation of a aUnicodeString.
+     This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as
+     single combined characters).
+
+     For now, here is a limited version, which should work
+     at least for most european countries...
+    "
+
+    |sz "{Class: SmallInteger}" decomposeBlock codePoint composeCodePoint needExtra|
+
+    decomposeBlock := [:baseCodePointArg :composeCodePointArg | 
+                          codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg
+                      ].
+
+    sz := aUnicodeString size.
+    1 to:sz do:[:idx|
+        codePoint := (aUnicodeString at:idx) codePoint.
+        needExtra := self decompositionOf:codePoint into:decomposeBlock.
+        aStream nextPutUtf8:codePoint.
+        needExtra ifTrue:[
+            aStream nextPutUtf8:composeCodePoint
+        ].
+    ].
+
+    "Created: / 16-02-2017 / 17:33:04 / stefan"
 ! !
 
 !ISO10646_to_UTF8_MAC methodsFor:'queries'!
@@ -328,10 +318,10 @@
 !ISO10646_to_UTF8_MAC class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
+    ^ '$Header$'
 !
 
 version_CVS
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
+    ^ '$Header$'
 ! !