initial checkin
authorClaus Gittinger <cg@exept.de>
Fri, 12 Mar 2004 13:50:27 +0100
changeset 8170 ffa1ed9338ad
parent 8169 354b025a08f9
child 8171 ac837a7ca3a3
initial checkin
CharacterEncoderImplementations__ISO10646_to_SGML.st
Encoder_ISO10646_to_SGML.st
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CharacterEncoderImplementations__ISO10646_to_SGML.st	Fri Mar 12 13:50:27 2004 +0100
@@ -0,0 +1,122 @@
+"{ Package: 'stx:libbasic' }"
+
+"{ NameSpace: CharacterEncoderImplementations }"
+
+TwoByteEncoder subclass:#ISO10646_to_SGML
+	instanceVariableNames:''
+	classVariableNames:''
+	poolDictionaries:''
+	category:'Collections-Text-Encodings'
+!
+
+
+!ISO10646_to_SGML methodsFor:'encoding & decoding'!
+
+decode:aCode
+    self shouldNotImplement "/ no single byte conversion possible
+!
+
+decodeString:aStringOrByteCollection
+    "given a string in SGML encoding (i.e. with SGML escaped characters),
+     return a new string containing the same characters, in 16bit (or more) encoding.
+     Returns either a normal String, a TwoByteString or a FourByteString instance.
+     Only useful, when reading from external sources.
+     This only handles up-to 30bit characters."
+
+    |nBits ch 
+     in out codePoint t|
+
+    nBits := 8.
+    in := aStringOrByteCollection readStream.
+    out := WriteStream on:(String new:10).
+    [in atEnd] whileFalse:[
+        ch := in next.
+        ch == $& ifTrue:[
+            in peekOrNil == $# ifTrue:[
+                in next.
+                codePoint := 0.
+                [ch := in peekOrNil.
+                 ch notNil and:[ch isDigit]
+                ] whileTrue:[
+                    codePoint := (codePoint * 10) + ch digitValue.
+                    in next.
+                ].
+                codePoint > 16rFF ifTrue:[
+                    codePoint > 16rFFFF ifTrue:[
+                        nBits < 32 ifTrue:[
+                            t := out contents.
+                            out := WriteStream on:(Unicode32String fromString:t).
+                            out position:t size.
+                            nBits := 32.
+                        ]
+                    ] ifFalse:[
+                        nBits < 16 ifTrue:[
+                            t := out contents.
+                            out := WriteStream on:(Unicode16String fromString:t).
+                            out position:t size.
+                            nBits := 16.
+                        ]
+                    ]
+                ].
+                out nextPut:(Character value:codePoint).
+                in peekOrNil == $; ifTrue:[
+                    in next.
+                ]
+            ] ifFalse:[
+                out nextPut:ch
+            ]
+        ] ifFalse:[
+            out nextPut:ch
+        ].
+    ].
+    ^ out contents
+
+    "
+     CharacterEncoderImplementations::ISO10646_to_SGML
+        decodeString:'&#1060;&#1072;&#1081;&#1083;' 
+
+     CharacterEncoderImplementations::ISO10646_to_SGML
+        decodeString:'#197;&bn...'
+    "
+!
+
+encode:aCode
+    self shouldNotImplement "/ no single byte conversion possible
+!
+
+encodeString:aUnicodeString
+    "return the UTF-8 representation of a aUnicodeString.
+     The resulting string is only useful to be stored on some external file,
+     not for being used inside ST/X.
+
+     If you work a lot with utf8 encoded textFiles, 
+     this is a first-class candidate for a primitive."
+
+    |ch in out codePoint|
+
+    in := aUnicodeString readStream.
+    out := WriteStream on:(String new:10).
+    [in atEnd] whileFalse:[
+        ch := in next.
+        codePoint := ch codePoint.
+        (codePoint between:16r20 and:16r7F) ifTrue:[
+            out nextPut:ch.
+        ] ifFalse:[
+            out nextPutAll:'&#'.
+            out nextPutAll:(codePoint printString).
+            out nextPutAll:';'.
+        ].
+    ].
+    ^ out contents
+
+    "
+     CharacterEncoderImplementations::ISO10646_to_SGML
+        encodeString:'hello äöü' 
+    "
+! !
+
+!ISO10646_to_SGML class methodsFor:'documentation'!
+
+version
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.1 2004-03-12 12:50:27 cg Exp $'
+! !
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Encoder_ISO10646_to_SGML.st	Fri Mar 12 13:50:27 2004 +0100
@@ -0,0 +1,122 @@
+"{ Package: 'stx:libbasic' }"
+
+"{ NameSpace: CharacterEncoderImplementations }"
+
+TwoByteEncoder subclass:#ISO10646_to_SGML
+	instanceVariableNames:''
+	classVariableNames:''
+	poolDictionaries:''
+	category:'Collections-Text-Encodings'
+!
+
+
+!ISO10646_to_SGML methodsFor:'encoding & decoding'!
+
+decode:aCode
+    self shouldNotImplement "/ no single byte conversion possible
+!
+
+decodeString:aStringOrByteCollection
+    "given a string in SGML encoding (i.e. with SGML escaped characters),
+     return a new string containing the same characters, in 16bit (or more) encoding.
+     Returns either a normal String, a TwoByteString or a FourByteString instance.
+     Only useful, when reading from external sources.
+     This only handles up-to 30bit characters."
+
+    |nBits ch 
+     in out codePoint t|
+
+    nBits := 8.
+    in := aStringOrByteCollection readStream.
+    out := WriteStream on:(String new:10).
+    [in atEnd] whileFalse:[
+        ch := in next.
+        ch == $& ifTrue:[
+            in peekOrNil == $# ifTrue:[
+                in next.
+                codePoint := 0.
+                [ch := in peekOrNil.
+                 ch notNil and:[ch isDigit]
+                ] whileTrue:[
+                    codePoint := (codePoint * 10) + ch digitValue.
+                    in next.
+                ].
+                codePoint > 16rFF ifTrue:[
+                    codePoint > 16rFFFF ifTrue:[
+                        nBits < 32 ifTrue:[
+                            t := out contents.
+                            out := WriteStream on:(Unicode32String fromString:t).
+                            out position:t size.
+                            nBits := 32.
+                        ]
+                    ] ifFalse:[
+                        nBits < 16 ifTrue:[
+                            t := out contents.
+                            out := WriteStream on:(Unicode16String fromString:t).
+                            out position:t size.
+                            nBits := 16.
+                        ]
+                    ]
+                ].
+                out nextPut:(Character value:codePoint).
+                in peekOrNil == $; ifTrue:[
+                    in next.
+                ]
+            ] ifFalse:[
+                out nextPut:ch
+            ]
+        ] ifFalse:[
+            out nextPut:ch
+        ].
+    ].
+    ^ out contents
+
+    "
+     CharacterEncoderImplementations::ISO10646_to_SGML
+        decodeString:'&#1060;&#1072;&#1081;&#1083;' 
+
+     CharacterEncoderImplementations::ISO10646_to_SGML
+        decodeString:'#197;&bn...'
+    "
+!
+
+encode:aCode
+    self shouldNotImplement "/ no single byte conversion possible
+!
+
+encodeString:aUnicodeString
+    "return the UTF-8 representation of a aUnicodeString.
+     The resulting string is only useful to be stored on some external file,
+     not for being used inside ST/X.
+
+     If you work a lot with utf8 encoded textFiles, 
+     this is a first-class candidate for a primitive."
+
+    |ch in out codePoint|
+
+    in := aUnicodeString readStream.
+    out := WriteStream on:(String new:10).
+    [in atEnd] whileFalse:[
+        ch := in next.
+        codePoint := ch codePoint.
+        (codePoint between:16r20 and:16r7F) ifTrue:[
+            out nextPut:ch.
+        ] ifFalse:[
+            out nextPutAll:'&#'.
+            out nextPutAll:(codePoint printString).
+            out nextPutAll:';'.
+        ].
+    ].
+    ^ out contents
+
+    "
+     CharacterEncoderImplementations::ISO10646_to_SGML
+        encodeString:'hello äöü' 
+    "
+! !
+
+!ISO10646_to_SGML class methodsFor:'documentation'!
+
+version
+    ^ '$Header: /cvs/stx/stx/libbasic/Attic/Encoder_ISO10646_to_SGML.st,v 1.1 2004-03-12 12:50:27 cg Exp $'
+! !