CharacterEncoderImplementations__ISO10646_to_SGML.st
author Claus Gittinger <cg@exept.de>
Fri, 12 Mar 2004 13:50:27 +0100
changeset 8170 ffa1ed9338ad
child 8171 ac837a7ca3a3
permissions -rw-r--r--
initial checkin
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8170
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"{ Package: 'stx:libbasic' }"
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"{ NameSpace: CharacterEncoderImplementations }"
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
TwoByteEncoder subclass:#ISO10646_to_SGML
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
	instanceVariableNames:''
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
	classVariableNames:''
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
	poolDictionaries:''
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
	category:'Collections-Text-Encodings'
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
!
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
!ISO10646_to_SGML methodsFor:'encoding & decoding'!
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
decode:aCode
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
    self shouldNotImplement "/ no single byte conversion possible
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
!
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
decodeString:aStringOrByteCollection
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
    "given a string in SGML encoding (i.e. with SGML escaped characters),
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
     return a new string containing the same characters, in 16bit (or more) encoding.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
     Returns either a normal String, a TwoByteString or a FourByteString instance.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
     Only useful, when reading from external sources.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
     This only handles up-to 30bit characters."
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
    |nBits ch 
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
     in out codePoint t|
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
    nBits := 8.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
    in := aStringOrByteCollection readStream.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
    out := WriteStream on:(String new:10).
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
    [in atEnd] whileFalse:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
        ch := in next.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
        ch == $& ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
            in peekOrNil == $# ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
                in next.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
                codePoint := 0.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
                [ch := in peekOrNil.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
                 ch notNil and:[ch isDigit]
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
                ] whileTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
                    codePoint := (codePoint * 10) + ch digitValue.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
                    in next.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
                ].
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
                codePoint > 16rFF ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
                    codePoint > 16rFFFF ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
                        nBits < 32 ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
                            t := out contents.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
                            out := WriteStream on:(Unicode32String fromString:t).
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
                            out position:t size.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
                            nBits := 32.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
                        ]
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
                    ] ifFalse:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    53
                        nBits < 16 ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    54
                            t := out contents.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    55
                            out := WriteStream on:(Unicode16String fromString:t).
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
                            out position:t size.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
                            nBits := 16.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
                        ]
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
                    ]
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
                ].
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
                out nextPut:(Character value:codePoint).
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
                in peekOrNil == $; ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    63
                    in next.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
                ]
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
            ] ifFalse:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    66
                out nextPut:ch
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    67
            ]
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    68
        ] ifFalse:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    69
            out nextPut:ch
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    70
        ].
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    71
    ].
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
    ^ out contents
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    74
    "
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    75
     CharacterEncoderImplementations::ISO10646_to_SGML
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
        decodeString:'&#1060;&#1072;&#1081;&#1083;' 
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
     CharacterEncoderImplementations::ISO10646_to_SGML
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
        decodeString:'#197;&bn...'
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
    "
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
!
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
encode:aCode
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
    self shouldNotImplement "/ no single byte conversion possible
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
!
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
encodeString:aUnicodeString
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
    "return the UTF-8 representation of a aUnicodeString.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
     The resulting string is only useful to be stored on some external file,
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
     not for being used inside ST/X.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
     If you work a lot with utf8 encoded textFiles, 
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
     this is a first-class candidate for a primitive."
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
    |ch in out codePoint|
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
    in := aUnicodeString readStream.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
    out := WriteStream on:(String new:10).
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
    [in atEnd] whileFalse:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
        ch := in next.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
        codePoint := ch codePoint.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
        (codePoint between:16r20 and:16r7F) ifTrue:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
            out nextPut:ch.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
        ] ifFalse:[
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
            out nextPutAll:'&#'.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
            out nextPutAll:(codePoint printString).
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
            out nextPutAll:';'.
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
        ].
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
    ].
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
    ^ out contents
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
    "
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
     CharacterEncoderImplementations::ISO10646_to_SGML
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
        encodeString:'hello äöü' 
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
    "
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
! !
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
!ISO10646_to_SGML class methodsFor:'documentation'!
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
version
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.1 2004-03-12 12:50:27 cg Exp $'
ffa1ed9338ad initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
! !