CharacterEncoderImplementations__ISO10646_to_JavaText.st
author Jan Vrany <jan.vrany@labware.com>
Wed, 22 Mar 2023 13:57:18 +0000
branchjv
changeset 25445 1623217d2268
parent 23107 40173e082cbc
permissions -rw-r--r--
Cherry-picked OrderedCollection.st from 0b286fd51da7: * d4c86d7c0bfc: #TUNING by stefan, Stefan Vogel <sv@exept.de> * 692b6497a669: #DOCUMENTATION by stefan, Stefan Vogel <sv@exept.de> * d47bb2912953: #DOCUMENTATION by stefan, Stefan Vogel <sv@exept.de> * abb4316c6bff: #FEATURE by cg, Claus Gittinger <cg@exept.de> * 3a8fce0e8d11: #TUNING by stefan, Stefan Vogel <sv@exept.de> * 03d29bf8c5bb: #REFACTORING by stefan, Stefan Vogel <sv@exept.de> * cccc6c4abcfc: #REFACTORING by stefan, Stefan Vogel <sv@exept.de> * 35d957c7a840: #FEATURE by cg, Claus Gittinger <cg@exept.de> * 6b11890f5f2c: #OTHER by cg, Claus Gittinger <cg@exept.de> * abb6108fb06b: #FEATURE by cg, Claus Gittinger <cg@exept.de> * 2c4768bb2e89: #FEATURE by cg, Claus Gittinger <cg@exept.de> * 4029e964d0f1: #FEATURE by cg, Claus Gittinger <cg@exept.de> * ddcab3a9c2df: #OTHER by cg, Claus Gittinger <cg@exept.de> * 2213eb56e0c7: #REFACTORING by exept, Claus Gittinger <cg@exept.de> * 09ca874a6160: #REFACTORING by exept, Claus Gittinger <cg@exept.de> * 30b332af1f33: #BUGFIX by stefan, Stefan Vogel <sv@exept.de> * 779764ba117b: #REFACTORING by cg, Claus Gittinger <cg@exept.de> * b3d232a613c9: #BUGFIX by stefan, Stefan Vogel <sv@exept.de> * c417f7edaec1: #BUGFIX by stefan, Stefan Vogel <sv@exept.de> * 904b6538f379: #FEATURE by exept, Claus Gittinger <cg@exept.de> * c5887f03e01f: #REFACTORING by stefan, Stefan Vogel <sv@exept.de> * 8912d03aff48: #BUGFIX by exept, Claus Gittinger <cg@exept.de> * de5cd1dab4c3: #DOCUMENTATION by exept, Claus Gittinger <cg@exept.de> * 9bbd26603378: #OTHER by exept, Claus Gittinger <cg@exept.de> * c2c9dc110f42: #FEATURE by stefan, Stefan Vogel <sv@exept.de> * 81d123c6703d: #DOCUMENTATION by stefan, Stefan Vogel <sv@exept.de> * 8aadbb21458a: #BUGFIX by stefan, Stefan Vogel <sv@exept.de> * f210dbb8b2f6: #TUNING by stefan, Stefan Vogel <sv@exept.de> * c2c774fc53c0: #FEATURE by exept, Claus Gittinger <cg@exept.de> * b6f462670875: #DOCUMENTATION by exept, Claus Gittinger <cg@exept.de> * 27ae4021d5d6: #FEATURE by stefan, Stefan Vogel <sv@exept.de> * 10d9e9d85594: #TUNING by exept, Claus Gittinger <cg@exept.de> * 2653d855dcc7: #DOCUMENTATION by exept, Claus Gittinger <cg@exept.de> * 6ea1698a1a34: #FEATURE by stefan, Stefan Vogel <sv@exept.de> * 28762315e664: #OTHER by exept, Claus Gittinger <cg@exept.de> * 7142ea786f3e: #TUNING by stefan, Stefan Vogel <sv@exept.de> * 7875acb42b53: #BUGFIX by stefan, Stefan Vogel <sv@exept.de> * 163a0eebc97e: #BUGFIX by Maren, matilk

"
 COPYRIGHT (c) 2006 by eXept Software AG
 COPYRIGHT (c) 2009 Jan Vrany
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
"{ Package: 'stx:libbasic' }"

"{ NameSpace: CharacterEncoderImplementations }"

TwoByteEncoder subclass:#ISO10646_to_JavaText
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text-Encodings'
!

!ISO10646_to_JavaText class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2006 by eXept Software AG
 COPYRIGHT (c) 2009 Jan Vrany
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    Translates \uXXXX-escapes in the text
"
! !

!ISO10646_to_JavaText methodsFor:'encoding & decoding'!

decode:aCode
    self shouldNotImplement "/ no single byte conversion possible
!

decodeString:aStringOrByteCollection
    "given a string in JavaText encoding (i.e. with \uXXXX escaped characters),
     return a new string containing the same characters, in 16bit (or more) encoding.
     Returns either a normal String, a TwoByteString or a FourByteString instance.
     Only useful, when reading Java property and resource files.
     This only handles up-to 30bit characters."

    |nBits ch 
     in out codePoint t|

    nBits := 8.
    in := aStringOrByteCollection readStream.
    out := WriteStream on:(String new:10).
    [in atEnd] whileFalse:[
        ch := in next.
        ch == $\ ifTrue:[
            in peekOrNil == $u ifTrue:[
                in next.
                codePoint := 0.
                4 timesRepeat:[
                    ch := in peekOrNil.
                    codePoint := (codePoint * 16) + ch digitValue.
                    in next.
                ].
                codePoint > 16rFF ifTrue:[
                    codePoint > 16rFFFF ifTrue:[
                        nBits < 32 ifTrue:[
                            t := out contents.
                            out := WriteStream on:(Unicode32String fromString:t).
                            out position:t size.
                            nBits := 32.
                        ]
                    ] ifFalse:[
                        nBits < 16 ifTrue:[
                            t := out contents.
                            out := WriteStream on:(Unicode16String fromString:t).
                            out position:t size.
                            nBits := 16.
                        ]
                    ]
                ].
                out nextPut:(Character value:codePoint).
            ] ifFalse:[
                out nextPut:ch
            ]
        ] ifFalse:[
            out nextPut:ch
        ].
    ].
    ^ out contents

    "
     CharacterEncoderImplementations::ISO10646_to_JavaText
        decodeString:'AB\u1234CD' 
    "

    "Modified: / 23-10-2006 / 13:23:18 / cg"
!

encode:aCode
    self shouldNotImplement "/ no single byte conversion possible
!

encodeString:aUnicodeString
    "return the JavaText representation of aUnicodeString.
     The resulting string is only useful to be stored on some external file,
     not for being used inside ST/X."

    |ch in out codePoint|

    in := aUnicodeString readStream.
    out := WriteStream on:(String new:10).
    [in atEnd] whileFalse:[
        ch := in next.
        codePoint := ch codePoint.
        (codePoint between:16r20 and:16r7F) ifTrue:[
            out nextPut:ch.
        ] ifFalse:[
            out nextPutAll:'\u'.
            out nextPutAll:((codePoint printStringRadix:16) leftPaddedTo:4 with:$0).
        ].
    ].
    ^ out contents

    "
     CharacterEncoderImplementations::ISO10646_to_JavaText
        encodeString:'hello '  

     CharacterEncoderImplementations::ISO10646_to_JavaText
        decodeString:(CharacterEncoderImplementations::ISO10646_to_JavaText encodeString:'hello ') 
    "

    "Modified: / 23-10-2006 / 13:25:03 / cg"
! !

!ISO10646_to_JavaText class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_JavaText.st,v 1.1 2014-02-05 17:11:06 cg Exp $'
!

version_CVS
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_JavaText.st,v 1.1 2014-02-05 17:11:06 cg Exp $'
! !