"{ Encoding: utf8 }"
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
This software is furnished under a license and may be used
only in accordance with the terms of that license and with the
inclusion of the above copyright notice. This software may not
be provided or otherwise made available to, or used by, any
other person. No title to or ownership of the software is
hereby transferred.
"
"{ Package: 'stx:libbasic' }"
"{ NameSpace: CharacterEncoderImplementations }"
VariableBytesEncoder subclass:#ISO10646_to_SGML
instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
category:'Collections-Text-Encodings'
!
!ISO10646_to_SGML class methodsFor:'documentation'!
copyright
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
This software is furnished under a license and may be used
only in accordance with the terms of that license and with the
inclusion of the above copyright notice. This software may not
be provided or otherwise made available to, or used by, any
other person. No title to or ownership of the software is
hereby transferred.
"
!
documentation
"
Incomplete - only knows how to encode/decode escaped decimal-code characters
(i.e. &#nnnn; )
TODO:
add all other characters
reuse this code in XML and HTML processing code.
"
! !
!ISO10646_to_SGML methodsFor:'encoding & decoding'!
decodeString:aStringOrByteCollection
"given a string in SGML encoding (i.e. with SGML escaped characters),
return a new string containing the same characters, in 16bit (or more) encoding.
Returns either a normal String, a TwoByteString or a FourByteString instance.
Only useful, when reading from external sources.
This only handles up-to 30bit characters."
|nBits ch
in out codePoint|
nBits := 8.
in := aStringOrByteCollection readStream.
out := CharacterWriteStream on:(String new:10).
[in atEnd] whileFalse:[
ch := in next.
ch == $& ifTrue:[
in peekOrNil == $# ifTrue:[
in next.
codePoint := 0.
[ch := in peekOrNil.
ch notNil and:[ch isDigit]
] whileTrue:[
codePoint := (codePoint * 10) + ch digitValue.
in next.
].
out nextPut:(Character codePoint:codePoint).
in peekOrNil == $; ifTrue:[
in next.
]
] ifFalse:[
out nextPut:ch
]
] ifFalse:[
out nextPut:ch
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'Файл'
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'#197;&bn...'
"
"Modified: / 17-01-2018 / 18:35:52 / stefan"
!
encodeString:aUnicodeString
"return the SGML representation of aUnicodeString.
The resulting string is only useful to be stored on some external file,
not for being used inside ST/X."
|in out|
in := aUnicodeString readStream.
out := WriteStream on:(String new:aUnicodeString size + 10).
[in atEnd] whileFalse:[
|ch codePoint|
ch := in next.
codePoint := ch codePoint.
(codePoint between:16r20 and:16r7F) ifTrue:[
out nextPut:ch.
] ifFalse:[
out nextPutAll:'&#'.
codePoint printOn:out.
out nextPut:$;.
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
encodeString:'hello äöü'
"
"Modified: / 23-10-2006 / 13:25:27 / cg"
"Modified (format): / 17-01-2018 / 18:41:16 / stefan"
! !
!ISO10646_to_SGML methodsFor:'queries'!
characterSize:aCharacter
|codePoint|
codePoint := aCharacter codePoint.
(codePoint between:16r20 and:16r7F) ifTrue:[
^ 1.
].
^ codePoint printString size + 3 "#&1234;"
"Created: / 17-01-2018 / 18:01:40 / stefan"
! !
!ISO10646_to_SGML methodsFor:'stream support'!
readNextCharacterFrom:aStream
|char codePoint|
char := aStream next.
(char ~~ $# and:[aStream peek ~~ $&]) ifTrue:[
^ char.
].
aStream next.
codePoint := 0.
[char := aStream peekOrNil.
char notNil and:[char isDigit]
] whileTrue:[
codePoint := (codePoint * 10) + char digitValue.
aStream next.
].
aStream peekOrNil == $; ifTrue:[
aStream next.
].
^ Character codePoint:codePoint.
"Created: / 17-01-2018 / 18:37:40 / stefan"
! !
!ISO10646_to_SGML class methodsFor:'documentation'!
version
^ '$Header$'
! !