#REFACTORING by stefan
class: CharacterEncoderImplementations::ISO10646_to_UTF16BE
class definition
added:
#characterSize:
#readNextCharacterFrom:
removed:
#decode:
#nextPutTwoByteValue:to:
changed: #encodeString:
category of:
#encodeCharacter:on:
#encodeString:on:
class: CharacterEncoderImplementations::ISO10646_to_UTF16BE class
comment/format in: #examples
Refactor inheritance.
--- a/CharacterEncoderImplementations__ISO10646_to_UTF16BE.st Fri Jan 19 14:41:53 2018 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF16BE.st Fri Jan 19 14:42:22 2018 +0100
@@ -15,7 +15,7 @@
"{ NameSpace: CharacterEncoderImplementations }"
-TwoByteEncoder subclass:#ISO10646_to_UTF16BE
+VariableBytesEncoder subclass:#ISO10646_to_UTF16BE
instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
@@ -61,23 +61,17 @@
Decoding (utf16BE to unicode):
|t|
- t := ISO10646_to_UTF16BE encodeString:''.
+ t := ISO10646_to_UTF16BE encodeString:'ÄÖÜß'.
ISO10646_to_UTF16BE decodeString:t.
Decoding (utf16LE-Bytes to unicode):
- |bytes|
-
- bytes := #[ 16r40 0 16r41 0 16r42 0 16r43 0 16r44 0 ].
- ISO10646_to_UTF16LE decodeString:bytes.
+ ISO10646_to_UTF16LE decodeString:#[ 16r40 0 16r41 0 16r42 0 16r43 0 16r44 0 ].
+ ISO10646_to_UTF16BE decodeString:#[ 16r40 0 16r41 0 16r42 0 16r43 0 16r44 0 ] copy swapBytes.
"
! !
!ISO10646_to_UTF16BE methodsFor:'encoding & decoding'!
-decode:aCode
- ^ aCode
-!
-
decodeString:aStringOrByteCollection
"given a byteArray (2-bytes per character) or unsignedShortArray in UTF16 encoding,
return a new string containing the same characters, in 8, 16bit (or more) encoding.
@@ -203,47 +197,21 @@
^ aCode
!
-encodeCharacter:aUnicodeCharacter on:aStream
- "given a string in unicode, encode it onto aStream."
-
- aStream nextPutUtf16Bytes:aUnicodeCharacter MSB:true.
-
- "Created: / 16-02-2017 / 16:41:25 / stefan"
-!
-
encodeString:aUnicodeString
"return the UTF-16 representation of a aUnicodeString.
The resulting string is only useful to be stored on some external file,
not for being used inside ST/X."
- |s|
-
- s := WriteStream on:(ByteArray uninitializedNew:aUnicodeString size).
- aUnicodeString do:[:eachCharacter |
- |codePoint t hi low|
+ |stream size "{ Class:SmallInteger }"|
- codePoint := eachCharacter codePoint.
- (codePoint <= 16rFFFF) ifTrue:[
- ((codePoint <= 16rD7FF) or:[ codePoint between:16rE000 and:16rFFFF]) ifTrue:[
- self nextPutTwoByteValue:codePoint to:s.
- ] ifFalse:[
- "/ unrepresentable: D800..DFFFF
- self error:'unrepresentable value (D800..DFFFF) in utf16Encode'.
- ].
- ] ifFalse:[
- t := codePoint - 16r00010000.
- hi := t bitShift:-10.
- low := t bitAnd:16r3FF.
- hi > 16r3FF ifTrue:[
- "/ unrepresentable: above 110000
- self error:'unrepresentable value (> 10FFFF) in utf16Encode'.
- ].
- self nextPutTwoByteValue:(hi + 16rD800) to:s.
- self nextPutTwoByteValue:(low + 16rDC00) to:s.
- ].
+ stream := WriteStream on:(ByteArray uninitializedNew:aUnicodeString size * 2).
+ size := aUnicodeString size.
+
+ 1 to:size do:[:idx |
+ stream nextPutUtf16Bytes:(aUnicodeString at:idx) MSB:true.
].
- ^ s contents
+ ^ stream contents
"
(self encodeString:'hello') #[0 104 0 101 0 108 0 108 0 111]
@@ -267,6 +235,38 @@
(self encodeString:(Character value:16rDFFF) asString)
(self encodeString:(Character value:16r110000) asString)
"
+
+ "Modified: / 16-01-2018 / 19:38:30 / stefan"
+! !
+
+!ISO10646_to_UTF16BE methodsFor:'private'!
+
+nextTwoByteValueFrom:aStream
+ ^ aStream nextUnsignedInt16MSB:true
+! !
+
+!ISO10646_to_UTF16BE methodsFor:'queries'!
+
+characterSize:charOrCodePoint
+ "return the number of bytes required to encode codePoint"
+
+ ^ charOrCodePoint codePoint <= 16rFFFF ifTrue:[2] ifFalse:[4]
+
+ "Created: / 16-01-2018 / 19:21:09 / stefan"
+!
+
+nameOfEncoding
+ ^ #utf16be
+! !
+
+!ISO10646_to_UTF16BE methodsFor:'stream support'!
+
+encodeCharacter:aUnicodeCharacter on:aStream
+ "given a string in unicode, encode it onto aStream."
+
+ aStream nextPutUtf16Bytes:aUnicodeCharacter MSB:true.
+
+ "Created: / 16-02-2017 / 16:41:25 / stefan"
!
encodeString:aUnicodeString on:aStream
@@ -275,22 +275,29 @@
aStream nextPutAllUtf16Bytes:aUnicodeString MSB:true.
"Created: / 16-02-2017 / 16:40:32 / stefan"
-! !
-
-!ISO10646_to_UTF16BE methodsFor:'private'!
-
-nextPutTwoByteValue:anInteger to:aStream
- aStream nextPutInt16MSB:anInteger
!
-nextTwoByteValueFrom:aStream
- ^ aStream nextUnsignedInt16MSB:true
-! !
+readNextCharacterFrom:aStream
+ |codeIn codeIn2|
-!ISO10646_to_UTF16BE methodsFor:'queries'!
+ codeIn := self nextTwoByteValueFrom:aStream.
+ codeIn isNil ifTrue:[
+ ^ nil.
+ ].
+ (codeIn between:16rD800 and:16rDBFF) ifTrue:[
+ codeIn2 := self nextTwoByteValueFrom:aStream.
+ codeIn2 isNil ifTrue:[
+ InvalidEncodingError raiseErrorString:' - UTF16 missing followBytes'.
+ ].
+ codeIn := ((codeIn - 16rD800) bitShift:10)
+ + (codeIn2 - 16rDC00)
+ + 16r00010000.
+ ].
-nameOfEncoding
- ^ #utf16be
+ ^ Character codePoint:codeIn.
+
+ "Created: / 16-01-2018 / 22:31:29 / stefan"
+ "Modified: / 17-01-2018 / 14:41:31 / stefan"
! !
!ISO10646_to_UTF16BE class methodsFor:'documentation'!