CharacterEncoderImplementations__ISO10646_to_SGML.st
author |
Jan Vrany <jan.vrany@labware.com> |
|
Tue, 01 Jun 2021 20:19:13 +0100 |
branch | jv |
changeset 25424 |
51bd8a6b196f |
parent 17940 |
985e22966acb
|
permissions |
-rw-r--r-- |
Cherry-picked `Context`
cherry-picked Context.st from a6b6dda4caff:
* 4aaf30c174e9: #DOCUMENTATION by cg, Claus Gittinger <cg@exept.de>
* c67311afcc6c: #OTHER by cg, Claus Gittinger <cg@exept.de>
* 883f79e7b2a6: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* 716f3fbb09e9: Don't mark contexts with `CATCHMARK`, Jan Vrany <jan.vrany@fit.cvut.cz>
* cff24fa817b0: #REFACTORING by stefan, Stefan Vogel <sv@exept.de>
* 521f0d837330: #UI_ENHANCEMENT by cg, Claus Gittinger <cg@exept.de>
* bf1118f0fcca: #UI_ENHANCEMENT by cg, Claus Gittinger <cg@exept.de>
* e587cdd22868: #BUGFIX by cg, Claus Gittinger <cg@exept.de>
* fe9f9487a3ed: #DOCUMENTATION by cg, Claus Gittinger <cg@exept.de>
* d5b781899274: #BUGFIX by cg, Claus Gittinger <cg@exept.de>
* 8258751a7465: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* 40173e082cbc: Copyright updates, Jan Vrany <jan.vrany@fit.cvut.cz>
* 6db5c28207d5: #UI_ENHANCEMENT by cg, Claus Gittinger <cg@exept.de>
* 871ea64fd5dc: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* 4b544a108e4e: #DOCUMENTATION by cg, Claus Gittinger <cg@exept.de>
* 9a8d8399e566: #FEATURE by cgexept.de, Claus Gittinger <cg@exept.de>
* 170b00be0103: #BUGFIX by stefan, Stefan Vogel <sv@exept.de>
* a6c73965eae8: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* ce2a0e462ff0: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* 46a260a9ca92: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* 46cab49167fb: #UI_ENHANCEMENT by exept, Claus Gittinger <cg@exept.de>
* 7d52dfd3997d: #DOCUMENTATION by exept, Claus Gittinger <cg@exept.de>
* c52eeea62763: Fix `Context >> argAndVarNames` in cases when debug info is not available, Jan Vrany <jan.vrany@labware.com>
* b5d6963fe4a9: Backed out changeset c52eeea62763, Jan Vrany <jan.vrany@labware.com>
* 6fd3896f8703: #FEATURE by exept, Claus Gittinger <cg@exept.de>
* b530ee616256: #REFACTORING by cg, Claus Gittinger <cg@exept.de>
* ef9b481d7498: #FEATURE by cg, Claus Gittinger <cg@exept.de>
* ea663b72bd51: #UI_ENHANCEMENT by cg, Claus Gittinger <cg@exept.de>
* 6179572a733c: #FEATURE by exept, Claus Gittinger <cg@exept.de>
* 84155b1b6622: #DOCUMENTATION by exept, Claus Gittinger <cg@exept.de>
* 37d06602d856: *** empty log message ***, Claus Gittinger <cg@exept.de>
* f927b9022fea: *** empty log message ***, Claus Gittinger <cg@exept.de>
* 427d3be62d97: #UI_ENHANCEMENT by exept, Claus Gittinger <cg@exept.de>
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
This software is furnished under a license and may be used
only in accordance with the terms of that license and with the
inclusion of the above copyright notice. This software may not
be provided or otherwise made available to, or used by, any
other person. No title to or ownership of the software is
hereby transferred.
"
"{ Package: 'stx:libbasic' }"
"{ NameSpace: CharacterEncoderImplementations }"
TwoByteEncoder subclass:#ISO10646_to_SGML
instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
category:'Collections-Text-Encodings'
!
!ISO10646_to_SGML class methodsFor:'documentation'!
copyright
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
This software is furnished under a license and may be used
only in accordance with the terms of that license and with the
inclusion of the above copyright notice. This software may not
be provided or otherwise made available to, or used by, any
other person. No title to or ownership of the software is
hereby transferred.
"
!
documentation
"
Incomplete - only knows how to encode/decode escaped decimal-code characters
(i.e. &#nnnn; )
TODO:
add all other characters
reuse this code in XML and HTML processing code.
"
! !
!ISO10646_to_SGML methodsFor:'encoding & decoding'!
decode:aCode
self shouldNotImplement "/ no single byte conversion possible
!
decodeString:aStringOrByteCollection
"given a string in SGML encoding (i.e. with SGML escaped characters),
return a new string containing the same characters, in 16bit (or more) encoding.
Returns either a normal String, a TwoByteString or a FourByteString instance.
Only useful, when reading from external sources.
This only handles up-to 30bit characters."
|nBits ch
in out codePoint t|
nBits := 8.
in := aStringOrByteCollection readStream.
out := WriteStream on:(String new:10).
[in atEnd] whileFalse:[
ch := in next.
ch == $& ifTrue:[
in peekOrNil == $# ifTrue:[
in next.
codePoint := 0.
[ch := in peekOrNil.
ch notNil and:[ch isDigit]
] whileTrue:[
codePoint := (codePoint * 10) + ch digitValue.
in next.
].
codePoint > 16rFF ifTrue:[
codePoint > 16rFFFF ifTrue:[
nBits < 32 ifTrue:[
t := out contents.
out := WriteStream on:(Unicode32String fromString:t).
out position:t size.
nBits := 32.
]
] ifFalse:[
nBits < 16 ifTrue:[
t := out contents.
out := WriteStream on:(Unicode16String fromString:t).
out position:t size.
nBits := 16.
]
]
].
out nextPut:(Character value:codePoint).
in peekOrNil == $; ifTrue:[
in next.
]
] ifFalse:[
out nextPut:ch
]
] ifFalse:[
out nextPut:ch
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'Файл'
CharacterEncoderImplementations::ISO10646_to_SGML
decodeString:'#197;&bn...'
"
!
encode:aCode
self shouldNotImplement "/ no single byte conversion possible
!
encodeString:aUnicodeString
"return the SGML representation of aUnicodeString.
The resulting string is only useful to be stored on some external file,
not for being used inside ST/X."
|ch in out codePoint|
in := aUnicodeString readStream.
out := WriteStream on:(String new:10).
[in atEnd] whileFalse:[
ch := in next.
codePoint := ch codePoint.
(codePoint between:16r20 and:16r7F) ifTrue:[
out nextPut:ch.
] ifFalse:[
out nextPutAll:'&#'.
out nextPutAll:(codePoint printString).
out nextPutAll:';'.
].
].
^ out contents
"
CharacterEncoderImplementations::ISO10646_to_SGML
encodeString:'hello '
"
"Modified: / 23-10-2006 / 13:25:27 / cg"
! !
!ISO10646_to_SGML class methodsFor:'documentation'!
version
^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.3 2006/10/23 11:25:11 cg Exp $'
! !