#TUNING by stefan
class: CharacterEncoderImplementations::ISO10646_to_UTF8_MAC
added:
#encodeCharacter:on:
#encodeString:on:
changed: #encodeString:
--- a/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st Thu Feb 16 20:48:47 2017 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st Thu Feb 16 20:49:01 2017 +0100
@@ -220,6 +220,33 @@
^ true
!
+encodeCharacter:aUnicodeCharacter on:aStream
+ "return the UTF-8-MAC representation of a aUnicodeString.
+ This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as
+ single combined characters).
+
+ For now, here is a limited version, which should work
+ at least for most european countries...
+ "
+
+ |codePoint composeCodePoint needExtra|
+
+ DecomposeMap isNil ifTrue:[
+ self class initializeDecomposeMap
+ ].
+
+ codePoint := aUnicodeCharacter codePoint.
+ needExtra := self decompositionOf:codePoint into:[:baseCodePointArg :composeCodePointArg |
+ codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg
+ ].
+ aStream nextPutUtf8:codePoint.
+ needExtra ifTrue:[
+ aStream nextPutUtf8:composeCodePoint
+ ].
+
+ "Created: / 16-02-2017 / 17:45:18 / stefan"
+!
+
encodeString:aUnicodeString
"return the UTF-8-MAC representation of a aUnicodeString.
This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as
@@ -229,77 +256,10 @@
at least for most european countries...
"
- |gen s decomp codePoint composeCodePoint|
-
- DecomposeMap isNil ifTrue:[
- self class initializeDecomposeMap
- ].
-
- gen :=
- [:codePointArg |
- |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
-
- codePoint := codePointArg.
- codePoint <= 16r7F ifTrue:[
- s nextPut:(Character value:codePoint).
- ] ifFalse:[
- b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000).
- v := codePoint bitShift:-6.
- v <= 16r1F ifTrue:[
- s nextPut:(Character value:(v bitOr:2r11000000)).
- s nextPut:b1.
- ] ifFalse:[
- b2 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
- v := v bitShift:-6.
- v <= 16r0F ifTrue:[
- s nextPut:(Character value:(v bitOr:2r11100000)).
- s nextPut:b2; nextPut:b1.
- ] ifFalse:[
- b3 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
- v := v bitShift:-6.
- v <= 16r07 ifTrue:[
- s nextPut:(Character value:(v bitOr:2r11110000)).
- s nextPut:b3; nextPut:b2; nextPut:b1.
- ] ifFalse:[
- b4 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
- v := v bitShift:-6.
- v <= 16r03 ifTrue:[
- s nextPut:(Character value:(v bitOr:2r11111000)).
- s nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
- ] ifFalse:[
- b5 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
- v := v bitShift:-6.
- v <= 16r01 ifTrue:[
- s nextPut:(Character value:(v bitOr:2r11111100)).
- s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
- ] ifFalse:[
- "/ cannot happen - we only support up to 30 bit characters
- self error:'ascii value > 31bit in utf8Encode'.
- ]
- ].
- ].
- ].
- ].
- ].
- ].
-
- decomp :=
- [:baseCodePointArg :composeCodePointArg |
- codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg
- ].
+ |s|
s := WriteStream on:(String uninitializedNew:aUnicodeString size).
- aUnicodeString do:[:eachCharacter |
- |needExtra|
-
- codePoint := eachCharacter codePoint.
- needExtra := self decompositionOf: codePoint into:decomp.
- gen value:codePoint.
- needExtra ifTrue:[
- gen value:composeCodePoint
- ].
- ].
-
+ self encodeString:aUnicodeString on:s.
^ s contents
"
@@ -317,6 +277,36 @@
ISO10646_to_UTF8_MAC new decodeString:
(ISO10646_to_UTF8_MAC new encodeString:'Packages aus VSE für Smalltalk_X') asByteArray
"
+
+ "Modified (format): / 16-02-2017 / 17:36:14 / stefan"
+!
+
+encodeString:aUnicodeString on:aStream
+ "return the UTF-8-MAC representation of a aUnicodeString.
+ This is UTF-8 with compose-characters decompose (i.e. as separate codes, not as
+ single combined characters).
+
+ For now, here is a limited version, which should work
+ at least for most european countries...
+ "
+
+ |sz "{Class: SmallInteger}" decomposeBlock codePoint composeCodePoint needExtra|
+
+ decomposeBlock := [:baseCodePointArg :composeCodePointArg |
+ codePoint := baseCodePointArg. composeCodePoint := composeCodePointArg
+ ].
+
+ sz := aUnicodeString size.
+ 1 to:sz do:[:idx|
+ codePoint := (aUnicodeString at:idx) codePoint.
+ needExtra := self decompositionOf:codePoint into:decomposeBlock.
+ aStream nextPutUtf8:codePoint.
+ needExtra ifTrue:[
+ aStream nextPutUtf8:composeCodePoint
+ ].
+ ].
+
+ "Created: / 16-02-2017 / 17:33:04 / stefan"
! !
!ISO10646_to_UTF8_MAC methodsFor:'queries'!
@@ -328,10 +318,10 @@
!ISO10646_to_UTF8_MAC class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
+ ^ '$Header$'
!
version_CVS
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
+ ^ '$Header$'
! !