"{ Package: 'stx:libbasic' }"
"{ NameSpace: CharacterEncoderImplementations }"
TwoByteEncoder subclass:#ISO10646_to_SGML
instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
category:'Collections-Text-Encodings'
!
!ISO10646_to_SGML methodsFor:'encoding & decoding'!
decode:aCode
self shouldNotImplement "/ no single byte conversion possible
!
decodeString:aStringOrByteCollection
"given a string in SGML encoding (i.e. with SGML escaped characters),
return a new string containing the same characters, in 16bit (or more) encoding.
Returns either a normal String, a TwoByteString or a FourByteString instance.
Only useful, when reading from external sources.
This only handles up-to 30bit characters."
|nBits ch
in out codePoint t|
nBits := 8.
in := aStringOrByteCollection readStream.
out := WriteStream on:(String new:10).
[in atEnd] whileFalse:[
ch := in next.
ch == $& ifTrue:[
in peekOrNil == $# ifTrue:[
in next.
codePoint := 0.
[ch := in peekOrNil.
ch notNil and:[ch isDigit]
] whileTrue:[
codePoint := (codePoint * 10) + ch digitValue.
in next.
].
codePoint > 16rFF ifTrue:[
codePoint > 16rFFFF ifTrue:[
nBits < 32 ifTrue:[
t := out contents.
out := WriteStream on:(Unicode32String fromString:t).
out position:t size.
nBits := 32.
]
] ifFalse:[
nBits < 16 ifTrue:[
t := out contents.
out := WriteStream on:(Unicode16String fromString:t).
out position:t size.
nBits := 16.
]
]
].
out nextPut:(Character value:codePoint).
in peekOrNil == $; ifTrue:[
in next.
]
] ifFalse:[
out nextPut:ch
]
] ifFalse:[
out nextPut:ch
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'Файл'
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'#197;&bn...'
"
!
encode:aCode
self shouldNotImplement "/ no single byte conversion possible
!
encodeString:aUnicodeString
"return the UTF-8 representation of a aUnicodeString.
The resulting string is only useful to be stored on some external file,
not for being used inside ST/X.
If you work a lot with utf8 encoded textFiles,
this is a first-class candidate for a primitive."
|ch in out codePoint|
in := aUnicodeString readStream.
out := WriteStream on:(String new:10).
[in atEnd] whileFalse:[
ch := in next.
codePoint := ch codePoint.
(codePoint between:16r20 and:16r7F) ifTrue:[
out nextPut:ch.
] ifFalse:[
out nextPutAll:'&#'.
out nextPutAll:(codePoint printString).
out nextPutAll:';'.
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
encodeString:'hello äöü'
"
! !
!ISO10646_to_SGML class methodsFor:'documentation'!
version
^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.1 2004-03-12 12:50:27 cg Exp $'
! !