CharacterEncoderImplementations__ISO10646_to_UTF8.st
author Claus Gittinger <cg@exept.de>
Wed, 17 Mar 2004 17:16:31 +0100
changeset 8221 805e0b61b852
parent 8163 a867b07aa226
child 8297 e7a05a86f280
permissions -rw-r--r--
*** empty log message ***
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8148
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     1
"
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     2
 COPYRIGHT (c) 2004 by eXept Software AG
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     3
              All Rights Reserved
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     4
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     5
 This software is furnished under a license and may be used
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     6
 only in accordance with the terms of that license and with the
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     7
 inclusion of the above copyright notice.   This software may not
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     8
 be provided or otherwise made available to, or used by, any
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
     9
 other person.  No title to or ownership of the software is
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    10
 hereby transferred.
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    11
"
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    12
8081
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
"{ Package: 'stx:libbasic' }"
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
"{ NameSpace: CharacterEncoderImplementations }"
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
TwoByteEncoder subclass:#ISO10646_to_UTF8
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
	instanceVariableNames:''
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
	classVariableNames:''
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	poolDictionaries:''
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	category:'Collections-Text-Encodings'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
!ISO10646_to_UTF8 class methodsFor:'documentation'!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
8148
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    26
copyright
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    27
"
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    28
 COPYRIGHT (c) 2004 by eXept Software AG
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    29
              All Rights Reserved
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    30
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    31
 This software is furnished under a license and may be used
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    32
 only in accordance with the terms of that license and with the
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    33
 inclusion of the above copyright notice.   This software may not
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    34
 be provided or otherwise made available to, or used by, any
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    35
 other person.  No title to or ownership of the software is
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    36
 hereby transferred.
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    37
"
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    38
!
dbf64e3142d9 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    39
8081
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
examples
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
"
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
  Encoding (unicode to utf8)
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
     ISO10646_to_UTF8 encodeString:'hello'. 
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
     ISO10646_to_UTF8 encodeString:'ÄÖÜ'. 
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
 Decoding (utf8 to unicode):
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
     |t|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
     t := ISO10646_to_UTF8 encodeString:'ÄÖÜ'.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
     ISO10646_to_UTF8 decodeString:t.    
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    53
"
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    54
! !
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    55
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
!ISO10646_to_UTF8 methodsFor:'encoding & decoding'!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
decode:aCode
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
    self shouldNotImplement "/ no single byte conversion possible
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
decodeString:aStringOrByteCollection
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    63
    "given a string in UTF8 encoding,
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
     return a new string containing the same characters, in 16bit (or more) encoding.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
     Returns either a normal String, a TwoByteString or a FourByteString instance.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    66
     Only useful, when reading from external sources.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    67
     This only handles up-to 30bit characters.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    68
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    69
     If you work a lot with utf8 encoded textFiles, 
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    70
     this is a first-class candidate for a primitive."
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    71
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
    |sz anyAbove7BitAscii nBitsRequired 
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
     ascii "{ Class: SmallInteger }"
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    74
     byte  "{ Class: SmallInteger }"
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    75
     s newString idx next6Bits last6Bits
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
     errorReporter|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
    errorReporter := [:msg | DecodingError raiseWith:aStringOrByteCollection errorString:msg].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
    next6Bits := [
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
                    |byte|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
                    byte := s nextByte.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
                    byte isNil ifTrue:[^ errorReporter value:'short utf8 string'].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
                    ascii := (ascii bitShift:6) bitOr:(byte bitAnd:2r00111111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
                 ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
    last6Bits := [
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
                    |byte a|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
                    byte := s nextByte.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
                    byte isNil ifTrue:[^ errorReporter value:'short utf8 string'].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
                    a := (ascii bitShift:6) bitOr:(byte bitAnd:2r00111111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
                    (a > 16r3FFFFFFF) ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
                        "/ ST/X can only represent 30 bit unicode characters.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
                        errorReporter value:'unicode character out of range'.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
                        a := 16r3FFFFFFF.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
                    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
                    ascii := a.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
                 ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
    nBitsRequired := 8.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
    anyAbove7BitAscii := false.    
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
    sz := 0.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
    s := aStringOrByteCollection readStream.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
    [s atEnd] whileFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
        byte := ascii := s nextByte.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
        (byte bitAnd:16r80) ~~ 0 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
            anyAbove7BitAscii := true.    
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
            (byte bitAnd:2r11100000) == 2r11000000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
                "/ 80 .. 7FF
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
                ascii := (byte bitAnd:2r00011111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
                ascii > 16rFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
                    nBitsRequired := nBitsRequired max:16
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
                ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
                "/ a strict utf8 decoder does not allow overlong sequences
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
                ascii < 16r80 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
                    errorReporter value:'overlong utf8 sequence'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
                ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
            ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
                (byte bitAnd:2r11110000) == 2r11100000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
                    "/ 800 .. FFFF
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   124
                    ascii := (byte bitAnd:2r00001111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
                    next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
                    next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
                    ascii > 16rFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   128
                        nBitsRequired := nBitsRequired max:16
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
                    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
                    ascii < 16r800 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   131
                        errorReporter value:'overlong utf8 sequence'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
                    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
                ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
                    (byte bitAnd:2r11111000) == 2r11110000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
                        "/ 10000 .. 1FFFFF
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
                        ascii := (byte bitAnd:2r00000111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
                        next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
                        next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
                        next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
                        ascii > 16rFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
                            ascii > 16rFFFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
                                nBitsRequired := nBitsRequired max:32
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
                            ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
                                nBitsRequired := nBitsRequired max:16
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
                            ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
                        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
                        ascii < 16r10000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
                            errorReporter value:'overlong utf8 sequence'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
                        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
                    ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
                        (byte bitAnd:2r11111100) == 2r11111000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
                            "/ 200000 .. 3FFFFFF
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
                            ascii := (byte bitAnd:2r00000011).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   154
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
                            ascii > 16rFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
                                ascii > 16rFFFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
                                    nBitsRequired := nBitsRequired max:32
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
                                ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
                                    nBitsRequired := nBitsRequired max:16
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
                                ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
                            ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
                            ascii < 200000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
                                errorReporter value:'overlong utf8 sequence'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
                            ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
                        ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
                            (byte bitAnd:2r11111110) == 2r11111100 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
                                "/ 4000000 .. 7FFFFFFF
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
                                ascii := (byte bitAnd:2r00000001).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
                                last6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
                                ascii > 16rFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
                                    ascii > 16rFFFF ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
                                        nBitsRequired := nBitsRequired max:32
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
                                    ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
                                        nBitsRequired := nBitsRequired max:16
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
                                    ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
                                ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
                                ascii < 16r4000000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
                                    errorReporter value:'overlong utf8 sequence'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
                                ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
                            ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
                                errorReporter value:'invalid utf8 encoding'
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
                            ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
                        ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
                    ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
                ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
            ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   194
        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   195
        sz := sz + 1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
    nBitsRequired == 8 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
        anyAbove7BitAscii ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
            "/ can return the original string
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
            aStringOrByteCollection isString ifTrue:[^ aStringOrByteCollection].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
        newString := String uninitializedNew:sz
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
    ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
        nBitsRequired <= 16 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
            newString := Unicode16String new:sz
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
        ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
            newString := Unicode32String new:sz
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
        ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   209
    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   210
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   211
    next6Bits := [
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
                    |byte|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   213
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   214
                    byte := s nextByte.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   215
                    ascii := (ascii bitShift:6) bitOr:(byte bitAnd:2r00111111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   216
                 ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   217
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   218
    s := aStringOrByteCollection readStream.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   219
    idx := 1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   220
    [s atEnd] whileFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   221
        byte := ascii := s nextByte.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   222
        (byte bitAnd:2r10000000) ~~ 0 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   223
            (byte bitAnd:2r11100000) == 2r11000000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   224
                ascii := (byte bitAnd:2r00011111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
            ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
                (byte bitAnd:2r11110000) == 2r11100000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
                    ascii := (byte bitAnd:2r00001111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
                    next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
                    next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
                ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
                    (byte bitAnd:2r11111000) == 2r11110000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
                        ascii := (byte bitAnd:2r00000111).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
                        next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
                        next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   236
                        next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   237
                    ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
                        (byte bitAnd:2r11111100) == 2r11111000 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   239
                            ascii := (byte bitAnd:2r00000011).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   240
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   241
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   242
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   243
                            next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   244
                        ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   245
                            (byte bitAnd:2r11111110) == 2r11111100 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   246
                                ascii := (byte bitAnd:2r00000001).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   247
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   248
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   249
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   250
                                next6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   251
                                last6Bits value.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   252
                            ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   253
                        ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   254
                    ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   255
                ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   256
            ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   257
        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   258
        newString at:idx put:(Character value:ascii).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   259
        idx := idx + 1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   260
    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
    ^ newString
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   262
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
    "
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
     CharacterArray fromUTF8Bytes:#[ 16r41 16r42 ]      
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
     CharacterArray fromUTF8Bytes:#[ 16rC1 16r02 ]       
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
     CharacterArray fromUTF8Bytes:#[ 16rE0 16r81 16r02 ]      
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
     CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ]  
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   268
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   269
   rfc2279 examples:
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   270
     CharacterArray fromUTF8Bytes:#[ 16r41 16rE2 16r89 16rA2 16rCE 16r91 16r2E ]           
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
     CharacterArray fromUTF8Bytes:#[ 16rED 16r95 16r9C 16rEA 16rB5 16rAD 16rEC 16r96 16rB4 ]      
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
     CharacterArray fromUTF8Bytes:#[ 16rE6 16r97 16rA5 16rE6 16r9C 16rAC 16rE8 16rAA 16r9E ]      
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
   invalid:
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
     CharacterArray fromUTF8Bytes:#[ 16rC0 16r80 ]      
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
     CharacterArray fromUTF8Bytes:#[ 16rE0 16r80 16r80 ]      
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
    "
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
encode:aCode
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
    self shouldNotImplement "/ no single byte conversion possible
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   282
!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   283
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   284
encodeString:aUnicodeString
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   285
    "return the UTF-8 representation of a aUnicodeString.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   286
     The resulting string is only useful to be stored on some external file,
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   287
     not for being used inside ST/X.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   288
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   289
     If you work a lot with utf8 encoded textFiles, 
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   290
     this is a first-class candidate for a primitive."
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   291
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   292
    |s anyAbove7BitAscii|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   293
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   294
    anyAbove7BitAscii := false.
8221
805e0b61b852 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8163
diff changeset
   295
    s := WriteStream on:(String uninitializedNew:aUnicodeString size).
8081
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   296
    aUnicodeString do:[:eachCharacter |
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   297
        |codePoint b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   298
8103
794d8e3f11d8 Use the ANSI-blessed #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents: 8081
diff changeset
   299
        codePoint := eachCharacter codePoint.
8081
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   300
        codePoint <= 16r7F ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   301
            s nextPut:eachCharacter.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   302
        ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   303
            anyAbove7BitAscii := true.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   304
            b1 := Character value:((codePoint bitAnd:16r3F) bitOr:2r10000000).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   305
            v := codePoint bitShift:-6.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   306
            v <= 16r1F ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   307
                s nextPut:(Character value:(v bitOr:2r11000000)).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   308
                s nextPut:b1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   309
            ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   310
                b2 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   311
                v := v bitShift:-6.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   312
                v <= 16r0F ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   313
                    s nextPut:(Character value:(v bitOr:2r11100000)).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   314
                    s nextPut:b2; nextPut:b1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   315
                ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   316
                    b3 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   317
                    v := v bitShift:-6.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   318
                    v <= 16r07 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   319
                        s nextPut:(Character value:(v bitOr:2r11110000)).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   320
                        s nextPut:b3; nextPut:b2; nextPut:b1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   321
                    ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   322
                        b4 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   323
                        v := v bitShift:-6.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   324
                        v <= 16r03 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   325
                            s nextPut:(Character value:(v bitOr:2r11111000)).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   326
                            s nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   327
                        ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   328
                            b5 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   329
                            v := v bitShift:-6.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   330
                            v <= 16r01 ifTrue:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   331
                                s nextPut:(Character value:(v bitOr:2r11111100)).
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   332
                                s nextPut:b5; nextPut:b4; nextPut:b3; nextPut:b2; nextPut:b1.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   333
                            ] ifFalse:[
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   334
                                "/ cannot happen - we only support up to 30 bit characters
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   335
                                self error:'ascii value > 31bit in utf8Encode'.
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   336
                            ]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   337
                        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   338
                    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   339
                ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   340
            ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   341
        ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   342
    ].
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   343
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   344
    anyAbove7BitAscii ifFalse:[^ aUnicodeString].   "/ avoid creation of new strings
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   345
    ^ s contents
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   346
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   347
    "
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   348
     (self encodeString:'hello') asByteArray                             #[104 101 108 108 111]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   349
     (self encodeString:(Character value:16r40) asString) asByteArray    #[64]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   350
     (self encodeString:(Character value:16r7F) asString) asByteArray    #[127]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   351
     (self encodeString:(Character value:16r80) asString) asByteArray    #[194 128]
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   352
     (self encodeString:(Character value:16rFF) asString) asByteArray    #[195 191] 
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   353
     (self encodeString:(Character value:16r100) asString) asByteArray   #[196 128]  
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   354
     (self encodeString:(Character value:16r200) asString) asByteArray   #[200 128]  
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   355
     (self encodeString:(Character value:16r400) asString) asByteArray   #[208 128]  
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   356
     (self encodeString:(Character value:16r800) asString) asByteArray   #[224 160 128]  
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   357
     (self encodeString:(Character value:16r1000) asString) asByteArray  #[225 128 128]   
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   358
     (self encodeString:(Character value:16r2000) asString) asByteArray  #[226 128 128]   
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   359
     (self encodeString:(Character value:16r4000) asString) asByteArray  #[228 128 128]   
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   360
     (self encodeString:(Character value:16r8000) asString) asByteArray  #[232 128 128]   
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   361
     (self encodeString:(Character value:16rFFFF) asString) asByteArray  #[239 191 191]   
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   362
    "
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   363
! !
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   364
8163
a867b07aa226 name query
ca
parents: 8148
diff changeset
   365
!ISO10646_to_UTF8 methodsFor:'queries'!
a867b07aa226 name query
ca
parents: 8148
diff changeset
   366
a867b07aa226 name query
ca
parents: 8148
diff changeset
   367
nameOfEncoding
a867b07aa226 name query
ca
parents: 8148
diff changeset
   368
    ^ #'utf8'
a867b07aa226 name query
ca
parents: 8148
diff changeset
   369
! !
a867b07aa226 name query
ca
parents: 8148
diff changeset
   370
8081
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   371
!ISO10646_to_UTF8 class methodsFor:'documentation'!
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   372
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   373
version
8221
805e0b61b852 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8163
diff changeset
   374
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8.st,v 1.6 2004-03-17 16:16:31 cg Exp $'
8081
b468050174a9 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   375
! !