#REFACTORING by stefan
authorStefan Vogel <sv@exept.de>
Fri, 19 Jan 2018 14:42:22 +0100
changeset 22472 45940fc5e0ad
parent 22471 1359d953a751
child 22473 35fd10859181
#REFACTORING by stefan class: CharacterEncoderImplementations::ISO10646_to_UTF16BE class definition added: #characterSize: #readNextCharacterFrom: removed: #decode: #nextPutTwoByteValue:to: changed: #encodeString: category of: #encodeCharacter:on: #encodeString:on: class: CharacterEncoderImplementations::ISO10646_to_UTF16BE class comment/format in: #examples Refactor inheritance.
CharacterEncoderImplementations__ISO10646_to_UTF16BE.st
--- a/CharacterEncoderImplementations__ISO10646_to_UTF16BE.st	Fri Jan 19 14:41:53 2018 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF16BE.st	Fri Jan 19 14:42:22 2018 +0100
@@ -15,7 +15,7 @@
 
 "{ NameSpace: CharacterEncoderImplementations }"
 
-TwoByteEncoder subclass:#ISO10646_to_UTF16BE
+VariableBytesEncoder subclass:#ISO10646_to_UTF16BE
 	instanceVariableNames:''
 	classVariableNames:''
 	poolDictionaries:''
@@ -61,23 +61,17 @@
   Decoding (utf16BE to unicode):
      |t|
 
-     t := ISO10646_to_UTF16BE encodeString:''.
+     t := ISO10646_to_UTF16BE encodeString:'ÄÖÜß'.
      ISO10646_to_UTF16BE decodeString:t.
 
   Decoding (utf16LE-Bytes to unicode):
-     |bytes|
-
-     bytes := #[ 16r40 0 16r41 0 16r42 0 16r43 0 16r44 0 ].
-     ISO10646_to_UTF16LE decodeString:bytes.
+     ISO10646_to_UTF16LE decodeString:#[ 16r40 0 16r41 0 16r42 0 16r43 0 16r44 0 ].
+     ISO10646_to_UTF16BE decodeString:#[ 16r40 0 16r41 0 16r42 0 16r43 0 16r44 0 ] copy swapBytes.
 "
 ! !
 
 !ISO10646_to_UTF16BE methodsFor:'encoding & decoding'!
 
-decode:aCode
-    ^ aCode
-!
-
 decodeString:aStringOrByteCollection
     "given a byteArray (2-bytes per character) or unsignedShortArray in UTF16 encoding,
      return a new string containing the same characters, in 8, 16bit (or more) encoding.
@@ -203,47 +197,21 @@
     ^ aCode
 !
 
-encodeCharacter:aUnicodeCharacter on:aStream
-    "given a string in unicode, encode it onto aStream."
-
-     aStream nextPutUtf16Bytes:aUnicodeCharacter MSB:true.
-
-    "Created: / 16-02-2017 / 16:41:25 / stefan"
-!
-
 encodeString:aUnicodeString
     "return the UTF-16 representation of a aUnicodeString.
      The resulting string is only useful to be stored on some external file,
      not for being used inside ST/X."
 
-    |s|
-
-    s := WriteStream on:(ByteArray uninitializedNew:aUnicodeString size).
-    aUnicodeString do:[:eachCharacter |
-        |codePoint t hi low|
+    |stream size "{ Class:SmallInteger }"|
 
-        codePoint := eachCharacter codePoint.
-        (codePoint <= 16rFFFF) ifTrue:[
-            ((codePoint <= 16rD7FF) or:[ codePoint between:16rE000 and:16rFFFF]) ifTrue:[
-                self nextPutTwoByteValue:codePoint to:s.
-            ] ifFalse:[
-                "/ unrepresentable: D800..DFFFF
-                self error:'unrepresentable value (D800..DFFFF) in utf16Encode'.
-            ].
-        ] ifFalse:[
-            t := codePoint - 16r00010000.
-            hi := t bitShift:-10.
-            low := t bitAnd:16r3FF.
-            hi > 16r3FF ifTrue:[
-                "/ unrepresentable: above 110000
-                self error:'unrepresentable value (> 10FFFF) in utf16Encode'.
-            ].
-            self nextPutTwoByteValue:(hi + 16rD800) to:s.
-            self nextPutTwoByteValue:(low + 16rDC00) to:s.
-        ].
+    stream := WriteStream on:(ByteArray uninitializedNew:aUnicodeString size * 2).
+    size := aUnicodeString size.
+
+    1 to:size do:[:idx |
+        stream nextPutUtf16Bytes:(aUnicodeString at:idx) MSB:true.
     ].
 
-    ^ s contents
+    ^ stream contents
 
     "
      (self encodeString:'hello')                                         #[0 104 0 101 0 108 0 108 0 111]
@@ -267,6 +235,38 @@
      (self encodeString:(Character value:16rDFFF) asString) 
      (self encodeString:(Character value:16r110000) asString)   
     "
+
+    "Modified: / 16-01-2018 / 19:38:30 / stefan"
+! !
+
+!ISO10646_to_UTF16BE methodsFor:'private'!
+
+nextTwoByteValueFrom:aStream
+    ^ aStream nextUnsignedInt16MSB:true
+! !
+
+!ISO10646_to_UTF16BE methodsFor:'queries'!
+
+characterSize:charOrCodePoint
+    "return the number of bytes required to encode codePoint"
+
+    ^ charOrCodePoint codePoint <= 16rFFFF ifTrue:[2] ifFalse:[4]
+
+    "Created: / 16-01-2018 / 19:21:09 / stefan"
+!
+
+nameOfEncoding
+    ^ #utf16be
+! !
+
+!ISO10646_to_UTF16BE methodsFor:'stream support'!
+
+encodeCharacter:aUnicodeCharacter on:aStream
+    "given a string in unicode, encode it onto aStream."
+
+     aStream nextPutUtf16Bytes:aUnicodeCharacter MSB:true.
+
+    "Created: / 16-02-2017 / 16:41:25 / stefan"
 !
 
 encodeString:aUnicodeString on:aStream
@@ -275,22 +275,29 @@
      aStream nextPutAllUtf16Bytes:aUnicodeString MSB:true.
 
     "Created: / 16-02-2017 / 16:40:32 / stefan"
-! !
-
-!ISO10646_to_UTF16BE methodsFor:'private'!
-
-nextPutTwoByteValue:anInteger to:aStream
-    aStream nextPutInt16MSB:anInteger
 !
 
-nextTwoByteValueFrom:aStream
-    ^ aStream nextUnsignedInt16MSB:true
-! !
+readNextCharacterFrom:aStream
+    |codeIn codeIn2|
 
-!ISO10646_to_UTF16BE methodsFor:'queries'!
+    codeIn := self nextTwoByteValueFrom:aStream.
+    codeIn isNil ifTrue:[
+        ^ nil.
+    ].
+    (codeIn between:16rD800 and:16rDBFF) ifTrue:[
+        codeIn2 := self nextTwoByteValueFrom:aStream.
+        codeIn2 isNil ifTrue:[
+            InvalidEncodingError raiseErrorString:' - UTF16 missing followBytes'.
+        ].
+        codeIn :=  ((codeIn - 16rD800) bitShift:10)
+                  + (codeIn2 - 16rDC00)
+                  + 16r00010000.
+    ].
 
-nameOfEncoding
-    ^ #utf16be
+    ^ Character codePoint:codeIn.
+
+    "Created: / 16-01-2018 / 22:31:29 / stefan"
+    "Modified: / 17-01-2018 / 14:41:31 / stefan"
 ! !
 
 !ISO10646_to_UTF16BE class methodsFor:'documentation'!