--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CharacterEncoderImplementations__ISO10646_to_SGML.st Fri Mar 12 13:50:27 2004 +0100
@@ -0,0 +1,122 @@
+"{ Package: 'stx:libbasic' }"
+
+"{ NameSpace: CharacterEncoderImplementations }"
+
+TwoByteEncoder subclass:#ISO10646_to_SGML
+ instanceVariableNames:''
+ classVariableNames:''
+ poolDictionaries:''
+ category:'Collections-Text-Encodings'
+!
+
+
+!ISO10646_to_SGML methodsFor:'encoding & decoding'!
+
+decode:aCode
+ self shouldNotImplement "/ no single byte conversion possible
+!
+
+decodeString:aStringOrByteCollection
+ "given a string in SGML encoding (i.e. with SGML escaped characters),
+ return a new string containing the same characters, in 16bit (or more) encoding.
+ Returns either a normal String, a TwoByteString or a FourByteString instance.
+ Only useful, when reading from external sources.
+ This only handles up-to 30bit characters."
+
+ |nBits ch
+ in out codePoint t|
+
+ nBits := 8.
+ in := aStringOrByteCollection readStream.
+ out := WriteStream on:(String new:10).
+ [in atEnd] whileFalse:[
+ ch := in next.
+ ch == $& ifTrue:[
+ in peekOrNil == $# ifTrue:[
+ in next.
+ codePoint := 0.
+ [ch := in peekOrNil.
+ ch notNil and:[ch isDigit]
+ ] whileTrue:[
+ codePoint := (codePoint * 10) + ch digitValue.
+ in next.
+ ].
+ codePoint > 16rFF ifTrue:[
+ codePoint > 16rFFFF ifTrue:[
+ nBits < 32 ifTrue:[
+ t := out contents.
+ out := WriteStream on:(Unicode32String fromString:t).
+ out position:t size.
+ nBits := 32.
+ ]
+ ] ifFalse:[
+ nBits < 16 ifTrue:[
+ t := out contents.
+ out := WriteStream on:(Unicode16String fromString:t).
+ out position:t size.
+ nBits := 16.
+ ]
+ ]
+ ].
+ out nextPut:(Character value:codePoint).
+ in peekOrNil == $; ifTrue:[
+ in next.
+ ]
+ ] ifFalse:[
+ out nextPut:ch
+ ]
+ ] ifFalse:[
+ out nextPut:ch
+ ].
+ ].
+ ^ out contents
+
+ "
+ CharacterEncoderImplementations::ISO10646_to_SGML
+ decodeString:'Файл'
+
+ CharacterEncoderImplementations::ISO10646_to_SGML
+ decodeString:'#197;&bn...'
+ "
+!
+
+encode:aCode
+ self shouldNotImplement "/ no single byte conversion possible
+!
+
+encodeString:aUnicodeString
+ "return the UTF-8 representation of a aUnicodeString.
+ The resulting string is only useful to be stored on some external file,
+ not for being used inside ST/X.
+
+ If you work a lot with utf8 encoded textFiles,
+ this is a first-class candidate for a primitive."
+
+ |ch in out codePoint|
+
+ in := aUnicodeString readStream.
+ out := WriteStream on:(String new:10).
+ [in atEnd] whileFalse:[
+ ch := in next.
+ codePoint := ch codePoint.
+ (codePoint between:16r20 and:16r7F) ifTrue:[
+ out nextPut:ch.
+ ] ifFalse:[
+ out nextPutAll:'&#'.
+ out nextPutAll:(codePoint printString).
+ out nextPutAll:';'.
+ ].
+ ].
+ ^ out contents
+
+ "
+ CharacterEncoderImplementations::ISO10646_to_SGML
+ encodeString:'hello äöü'
+ "
+! !
+
+!ISO10646_to_SGML class methodsFor:'documentation'!
+
+version
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.1 2004-03-12 12:50:27 cg Exp $'
+! !