#REFACTORING by stefan
class: CharacterEncoderImplementations::ISO10646_to_SGML
class definition
added:
#characterSize:
#readNextCharacterFrom:
removed:
#decode:
#encode:
comment/format in: #decodeString:
changed: #encodeString:
--- a/CharacterEncoderImplementations__ISO10646_to_SGML.st Fri Jan 19 14:43:55 2018 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_SGML.st Fri Jan 19 14:44:05 2018 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
@@ -13,7 +15,7 @@
"{ NameSpace: CharacterEncoderImplementations }"
-TwoByteEncoder subclass:#ISO10646_to_SGML
+VariableBytesEncoder subclass:#ISO10646_to_SGML
instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
@@ -49,10 +51,6 @@
!ISO10646_to_SGML methodsFor:'encoding & decoding'!
-decode:aCode
- self shouldNotImplement "/ no single byte conversion possible
-!
-
decodeString:aStringOrByteCollection
"given a string in SGML encoding (i.e. with SGML escaped characters),
return a new string containing the same characters, in 16bit (or more) encoding.
@@ -61,11 +59,11 @@
This only handles up-to 30bit characters."
|nBits ch
- in out codePoint t|
+ in out codePoint|
nBits := 8.
in := aStringOrByteCollection readStream.
- out := WriteStream on:(String new:10).
+ out := CharacterWriteStream on:(String new:10).
[in atEnd] whileFalse:[
ch := in next.
ch == $& ifTrue:[
@@ -78,24 +76,7 @@
codePoint := (codePoint * 10) + ch digitValue.
in next.
].
- codePoint > 16rFF ifTrue:[
- codePoint > 16rFFFF ifTrue:[
- nBits < 32 ifTrue:[
- t := out contents.
- out := WriteStream on:(Unicode32String fromString:t).
- out position:t size.
- nBits := 32.
- ]
- ] ifFalse:[
- nBits < 16 ifTrue:[
- t := out contents.
- out := WriteStream on:(Unicode16String fromString:t).
- out position:t size.
- nBits := 16.
- ]
- ]
- ].
- out nextPut:(Character value:codePoint).
+ out nextPut:(Character codePoint:codePoint).
in peekOrNil == $; ifTrue:[
in next.
]
@@ -115,10 +96,8 @@
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'#197;&bn...'
"
-!
-encode:aCode
- self shouldNotImplement "/ no single byte conversion possible
+ "Modified: / 17-01-2018 / 18:35:52 / stefan"
!
encodeString:aUnicodeString
@@ -126,33 +105,77 @@
The resulting string is only useful to be stored on some external file,
not for being used inside ST/X."
- |ch in out codePoint|
+ |in out|
in := aUnicodeString readStream.
- out := WriteStream on:(String new:10).
+ out := WriteStream on:(String new:aUnicodeString size + 10).
[in atEnd] whileFalse:[
+ |ch codePoint|
+
ch := in next.
codePoint := ch codePoint.
(codePoint between:16r20 and:16r7F) ifTrue:[
out nextPut:ch.
] ifFalse:[
out nextPutAll:'&#'.
- out nextPutAll:(codePoint printString).
- out nextPutAll:';'.
+ codePoint printOn:out.
+ out nextPut:$;.
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
- encodeString:'hello äöü'
+ encodeString:'hello äöü'
"
"Modified: / 23-10-2006 / 13:25:27 / cg"
+ "Modified (format): / 17-01-2018 / 18:41:16 / stefan"
+! !
+
+!ISO10646_to_SGML methodsFor:'queries'!
+
+characterSize:aCharacter
+ |codePoint|
+
+ codePoint := aCharacter codePoint.
+ (codePoint between:16r20 and:16r7F) ifTrue:[
+ ^ 1.
+ ].
+ ^ codePoint printString size + 3 "#&1234;"
+
+ "Created: / 17-01-2018 / 18:01:40 / stefan"
+! !
+
+!ISO10646_to_SGML methodsFor:'stream support'!
+
+readNextCharacterFrom:aStream
+ |char codePoint|
+
+ char := aStream next.
+ (char ~~ $# and:[aStream peek ~~ $&]) ifTrue:[
+ ^ char.
+ ].
+ aStream next.
+
+ codePoint := 0.
+ [char := aStream peekOrNil.
+ char notNil and:[char isDigit]
+ ] whileTrue:[
+ codePoint := (codePoint * 10) + char digitValue.
+ aStream next.
+ ].
+ aStream peekOrNil == $; ifTrue:[
+ aStream next.
+ ].
+ ^ Character codePoint:codePoint.
+
+ "Created: / 17-01-2018 / 18:37:40 / stefan"
! !
!ISO10646_to_SGML class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.3 2006-10-23 11:25:11 cg Exp $'
+ ^ '$Header$'
! !
+