CharacterEncoderImplementations__ISO10646_to_UTF8.st
changeset 18604 54caf7b64994
parent 18601 00dc53dfe54d
child 18610 b9799e74a9c5
child 18625 37d697b9bf8d
equal deleted inserted replaced
18603:907f3fa1c5d7 18604:54caf7b64994
       
     1 "{ Encoding: utf8 }"
       
     2 
     1 "
     3 "
     2  COPYRIGHT (c) 2004 by eXept Software AG
     4  COPYRIGHT (c) 2004 by eXept Software AG
     3 	      All Rights Reserved
     5 	      All Rights Reserved
     4 
     6 
     5  This software is furnished under a license and may be used
     7  This software is furnished under a license and may be used
    18 	classVariableNames:''
    20 	classVariableNames:''
    19 	poolDictionaries:''
    21 	poolDictionaries:''
    20 	category:'Collections-Text-Encodings'
    22 	category:'Collections-Text-Encodings'
    21 !
    23 !
    22 
    24 
       
    25 ISO10646_to_UTF8 class instanceVariableNames:'theOneAndOnlyInstance'
       
    26 
       
    27 "
       
    28  No other class instance variables are inherited by this class.
       
    29 "
       
    30 !
       
    31 
    23 !ISO10646_to_UTF8 class methodsFor:'documentation'!
    32 !ISO10646_to_UTF8 class methodsFor:'documentation'!
    24 
    33 
    25 copyright
    34 copyright
    26 "
    35 "
    27  COPYRIGHT (c) 2004 by eXept Software AG
    36  COPYRIGHT (c) 2004 by eXept Software AG
    43 
    52 
    44 
    53 
    45   Decoding (utf8 to unicode):
    54   Decoding (utf8 to unicode):
    46      |t|
    55      |t|
    47 
    56 
    48      t := ISO10646_to_UTF8 encodeString:'Helloœ'.
    57      t := ISO10646_to_UTF8 encodeString:'Helloœ'.
    49      ISO10646_to_UTF8 decodeString:t.
    58      ISO10646_to_UTF8 decodeString:t.
    50 "
    59 "
       
    60 ! !
       
    61 
       
    62 !ISO10646_to_UTF8 class methodsFor:'instance creation'!
       
    63 
       
    64 flushSingleton
       
    65     "flushes the cached singleton"
       
    66 
       
    67     theOneAndOnlyInstance := nil
       
    68 
       
    69     "
       
    70      self flushSingleton
       
    71     "
       
    72 !
       
    73 
       
    74 new
       
    75     "returns a singleton"
       
    76 
       
    77     theOneAndOnlyInstance isNil ifTrue:[
       
    78         theOneAndOnlyInstance := self basicNew initialize.
       
    79     ].
       
    80     ^ theOneAndOnlyInstance.
       
    81 !
       
    82 
       
    83 theOneAndOnlyInstance
       
    84     "returns a singleton"
       
    85 
       
    86     theOneAndOnlyInstance isNil ifTrue:[
       
    87         theOneAndOnlyInstance := self basicNew initialize.
       
    88     ].
       
    89     ^ theOneAndOnlyInstance.
    51 ! !
    90 ! !
    52 
    91 
    53 !ISO10646_to_UTF8 methodsFor:'encoding & decoding'!
    92 !ISO10646_to_UTF8 methodsFor:'encoding & decoding'!
    54 
    93 
    55 decode:aCode
    94 decode:aCode
   309      If you work a lot with utf8 encoded textFiles,
   348      If you work a lot with utf8 encoded textFiles,
   310      this is a first-class candidate for a primitive."
   349      this is a first-class candidate for a primitive."
   311 
   350 
   312     |s|
   351     |s|
   313 
   352 
   314     "/ avoid creation of new strings
   353     "/ avoid creation of new strings if possible
   315     aUnicodeString containsNon7BitAscii ifFalse:[
   354     aUnicodeString containsNon7BitAscii ifFalse:[
   316         ^ aUnicodeString asSingleByteString
   355         ^ aUnicodeString asSingleByteString
   317     ].
   356     ].
   318 
   357 
   319     s := WriteStream on:(String uninitializedNew:aUnicodeString size).
   358     s := WriteStream on:(String uninitializedNew:(aUnicodeString size * 3 // 2)).
   320     aUnicodeString do:[:eachCharacter |
   359     aUnicodeString do:[:eachCharacter |
   321         |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
   360         |codePoint "{Class: SmallInteger }" b1 b2 b3 b4 b5 v "{Class: SmallInteger }"|
   322 
   361 
   323         codePoint := eachCharacter codePoint.
   362         codePoint := eachCharacter codePoint.
   324         codePoint <= 16r7F ifTrue:[
   363         codePoint <= 16r7F ifTrue:[