#REFACTORING by stefan
authorStefan Vogel <sv@exept.de>
Fri, 19 Jan 2018 14:44:05 +0100
changeset 22477 5b8c1f5f8ffa
parent 22476 b30058f26971
child 22478 e511b09f7a97
#REFACTORING by stefan class: CharacterEncoderImplementations::ISO10646_to_SGML class definition added: #characterSize: #readNextCharacterFrom: removed: #decode: #encode: comment/format in: #decodeString: changed: #encodeString:
CharacterEncoderImplementations__ISO10646_to_SGML.st
--- a/CharacterEncoderImplementations__ISO10646_to_SGML.st	Fri Jan 19 14:43:55 2018 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_SGML.st	Fri Jan 19 14:44:05 2018 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 2004 by eXept Software AG
               All Rights Reserved
@@ -13,7 +15,7 @@
 
 "{ NameSpace: CharacterEncoderImplementations }"
 
-TwoByteEncoder subclass:#ISO10646_to_SGML
+VariableBytesEncoder subclass:#ISO10646_to_SGML
 	instanceVariableNames:''
 	classVariableNames:''
 	poolDictionaries:''
@@ -49,10 +51,6 @@
 
 !ISO10646_to_SGML methodsFor:'encoding & decoding'!
 
-decode:aCode
-    self shouldNotImplement "/ no single byte conversion possible
-!
-
 decodeString:aStringOrByteCollection
     "given a string in SGML encoding (i.e. with SGML escaped characters),
      return a new string containing the same characters, in 16bit (or more) encoding.
@@ -61,11 +59,11 @@
      This only handles up-to 30bit characters."
 
     |nBits ch 
-     in out codePoint t|
+     in out codePoint|
 
     nBits := 8.
     in := aStringOrByteCollection readStream.
-    out := WriteStream on:(String new:10).
+    out := CharacterWriteStream on:(String new:10).
     [in atEnd] whileFalse:[
         ch := in next.
         ch == $& ifTrue:[
@@ -78,24 +76,7 @@
                     codePoint := (codePoint * 10) + ch digitValue.
                     in next.
                 ].
-                codePoint > 16rFF ifTrue:[
-                    codePoint > 16rFFFF ifTrue:[
-                        nBits < 32 ifTrue:[
-                            t := out contents.
-                            out := WriteStream on:(Unicode32String fromString:t).
-                            out position:t size.
-                            nBits := 32.
-                        ]
-                    ] ifFalse:[
-                        nBits < 16 ifTrue:[
-                            t := out contents.
-                            out := WriteStream on:(Unicode16String fromString:t).
-                            out position:t size.
-                            nBits := 16.
-                        ]
-                    ]
-                ].
-                out nextPut:(Character value:codePoint).
+                out nextPut:(Character codePoint:codePoint).
                 in peekOrNil == $; ifTrue:[
                     in next.
                 ]
@@ -115,10 +96,8 @@
      CharacterEncoderImplementations::ISO10646_to_SGML
         decodeString:'#197;&bn...'
     "
-!
 
-encode:aCode
-    self shouldNotImplement "/ no single byte conversion possible
+    "Modified: / 17-01-2018 / 18:35:52 / stefan"
 !
 
 encodeString:aUnicodeString
@@ -126,33 +105,77 @@
      The resulting string is only useful to be stored on some external file,
      not for being used inside ST/X."
 
-    |ch in out codePoint|
+    |in out|
 
     in := aUnicodeString readStream.
-    out := WriteStream on:(String new:10).
+    out := WriteStream on:(String new:aUnicodeString size + 10).
     [in atEnd] whileFalse:[
+        |ch codePoint|
+
         ch := in next.
         codePoint := ch codePoint.
         (codePoint between:16r20 and:16r7F) ifTrue:[
             out nextPut:ch.
         ] ifFalse:[
             out nextPutAll:'&#'.
-            out nextPutAll:(codePoint printString).
-            out nextPutAll:';'.
+            codePoint printOn:out.
+            out nextPut:$;.
         ].
     ].
     ^ out contents
 
     "
      CharacterEncoderImplementations::ISO10646_to_SGML
-        encodeString:'hello äöü' 
+        encodeString:'hello äöü' 
     "
 
     "Modified: / 23-10-2006 / 13:25:27 / cg"
+    "Modified (format): / 17-01-2018 / 18:41:16 / stefan"
+! !
+
+!ISO10646_to_SGML methodsFor:'queries'!
+
+characterSize:aCharacter
+    |codePoint|
+
+    codePoint := aCharacter codePoint.
+    (codePoint between:16r20 and:16r7F) ifTrue:[
+        ^ 1.
+    ].
+    ^ codePoint printString size + 3   "#&1234;"
+
+    "Created: / 17-01-2018 / 18:01:40 / stefan"
+! !
+
+!ISO10646_to_SGML methodsFor:'stream support'!
+
+readNextCharacterFrom:aStream
+    |char codePoint|
+
+    char := aStream next.
+    (char ~~ $# and:[aStream peek ~~ $&]) ifTrue:[
+        ^ char.
+    ].
+    aStream next.
+
+    codePoint := 0.
+    [char := aStream peekOrNil.
+     char notNil and:[char isDigit]
+    ] whileTrue:[
+        codePoint := (codePoint * 10) + char digitValue.
+        aStream next.
+    ].
+    aStream peekOrNil == $; ifTrue:[
+        aStream next.
+    ].
+    ^ Character codePoint:codePoint.
+
+    "Created: / 17-01-2018 / 18:37:40 / stefan"
 ! !
 
 !ISO10646_to_SGML class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.3 2006-10-23 11:25:11 cg Exp $'
+    ^ '$Header$'
 ! !
+