CharacterEncoderImplementations__ISO10646_to_SGML.st
changeset 22477 5b8c1f5f8ffa
parent 10108 8e610353f2fa
equal deleted inserted replaced
22476:b30058f26971 22477:5b8c1f5f8ffa
       
     1 "{ Encoding: utf8 }"
       
     2 
     1 "
     3 "
     2  COPYRIGHT (c) 2004 by eXept Software AG
     4  COPYRIGHT (c) 2004 by eXept Software AG
     3               All Rights Reserved
     5               All Rights Reserved
     4 
     6 
     5  This software is furnished under a license and may be used
     7  This software is furnished under a license and may be used
    11 "
    13 "
    12 "{ Package: 'stx:libbasic' }"
    14 "{ Package: 'stx:libbasic' }"
    13 
    15 
    14 "{ NameSpace: CharacterEncoderImplementations }"
    16 "{ NameSpace: CharacterEncoderImplementations }"
    15 
    17 
    16 TwoByteEncoder subclass:#ISO10646_to_SGML
    18 VariableBytesEncoder subclass:#ISO10646_to_SGML
    17 	instanceVariableNames:''
    19 	instanceVariableNames:''
    18 	classVariableNames:''
    20 	classVariableNames:''
    19 	poolDictionaries:''
    21 	poolDictionaries:''
    20 	category:'Collections-Text-Encodings'
    22 	category:'Collections-Text-Encodings'
    21 !
    23 !
    47 "
    49 "
    48 ! !
    50 ! !
    49 
    51 
    50 !ISO10646_to_SGML methodsFor:'encoding & decoding'!
    52 !ISO10646_to_SGML methodsFor:'encoding & decoding'!
    51 
    53 
    52 decode:aCode
       
    53     self shouldNotImplement "/ no single byte conversion possible
       
    54 !
       
    55 
       
    56 decodeString:aStringOrByteCollection
    54 decodeString:aStringOrByteCollection
    57     "given a string in SGML encoding (i.e. with SGML escaped characters),
    55     "given a string in SGML encoding (i.e. with SGML escaped characters),
    58      return a new string containing the same characters, in 16bit (or more) encoding.
    56      return a new string containing the same characters, in 16bit (or more) encoding.
    59      Returns either a normal String, a TwoByteString or a FourByteString instance.
    57      Returns either a normal String, a TwoByteString or a FourByteString instance.
    60      Only useful, when reading from external sources.
    58      Only useful, when reading from external sources.
    61      This only handles up-to 30bit characters."
    59      This only handles up-to 30bit characters."
    62 
    60 
    63     |nBits ch 
    61     |nBits ch 
    64      in out codePoint t|
    62      in out codePoint|
    65 
    63 
    66     nBits := 8.
    64     nBits := 8.
    67     in := aStringOrByteCollection readStream.
    65     in := aStringOrByteCollection readStream.
    68     out := WriteStream on:(String new:10).
    66     out := CharacterWriteStream on:(String new:10).
    69     [in atEnd] whileFalse:[
    67     [in atEnd] whileFalse:[
    70         ch := in next.
    68         ch := in next.
    71         ch == $& ifTrue:[
    69         ch == $& ifTrue:[
    72             in peekOrNil == $# ifTrue:[
    70             in peekOrNil == $# ifTrue:[
    73                 in next.
    71                 in next.
    76                  ch notNil and:[ch isDigit]
    74                  ch notNil and:[ch isDigit]
    77                 ] whileTrue:[
    75                 ] whileTrue:[
    78                     codePoint := (codePoint * 10) + ch digitValue.
    76                     codePoint := (codePoint * 10) + ch digitValue.
    79                     in next.
    77                     in next.
    80                 ].
    78                 ].
    81                 codePoint > 16rFF ifTrue:[
    79                 out nextPut:(Character codePoint:codePoint).
    82                     codePoint > 16rFFFF ifTrue:[
       
    83                         nBits < 32 ifTrue:[
       
    84                             t := out contents.
       
    85                             out := WriteStream on:(Unicode32String fromString:t).
       
    86                             out position:t size.
       
    87                             nBits := 32.
       
    88                         ]
       
    89                     ] ifFalse:[
       
    90                         nBits < 16 ifTrue:[
       
    91                             t := out contents.
       
    92                             out := WriteStream on:(Unicode16String fromString:t).
       
    93                             out position:t size.
       
    94                             nBits := 16.
       
    95                         ]
       
    96                     ]
       
    97                 ].
       
    98                 out nextPut:(Character value:codePoint).
       
    99                 in peekOrNil == $; ifTrue:[
    80                 in peekOrNil == $; ifTrue:[
   100                     in next.
    81                     in next.
   101                 ]
    82                 ]
   102             ] ifFalse:[
    83             ] ifFalse:[
   103                 out nextPut:ch
    84                 out nextPut:ch
   113         decodeString:'&#1060;&#1072;&#1081;&#1083;' 
    94         decodeString:'&#1060;&#1072;&#1081;&#1083;' 
   114 
    95 
   115      CharacterEncoderImplementations::ISO10646_to_SGML
    96      CharacterEncoderImplementations::ISO10646_to_SGML
   116         decodeString:'#197;&bn...'
    97         decodeString:'#197;&bn...'
   117     "
    98     "
   118 !
       
   119 
    99 
   120 encode:aCode
   100     "Modified: / 17-01-2018 / 18:35:52 / stefan"
   121     self shouldNotImplement "/ no single byte conversion possible
       
   122 !
   101 !
   123 
   102 
   124 encodeString:aUnicodeString
   103 encodeString:aUnicodeString
   125     "return the SGML representation of aUnicodeString.
   104     "return the SGML representation of aUnicodeString.
   126      The resulting string is only useful to be stored on some external file,
   105      The resulting string is only useful to be stored on some external file,
   127      not for being used inside ST/X."
   106      not for being used inside ST/X."
   128 
   107 
   129     |ch in out codePoint|
   108     |in out|
   130 
   109 
   131     in := aUnicodeString readStream.
   110     in := aUnicodeString readStream.
   132     out := WriteStream on:(String new:10).
   111     out := WriteStream on:(String new:aUnicodeString size + 10).
   133     [in atEnd] whileFalse:[
   112     [in atEnd] whileFalse:[
       
   113         |ch codePoint|
       
   114 
   134         ch := in next.
   115         ch := in next.
   135         codePoint := ch codePoint.
   116         codePoint := ch codePoint.
   136         (codePoint between:16r20 and:16r7F) ifTrue:[
   117         (codePoint between:16r20 and:16r7F) ifTrue:[
   137             out nextPut:ch.
   118             out nextPut:ch.
   138         ] ifFalse:[
   119         ] ifFalse:[
   139             out nextPutAll:'&#'.
   120             out nextPutAll:'&#'.
   140             out nextPutAll:(codePoint printString).
   121             codePoint printOn:out.
   141             out nextPutAll:';'.
   122             out nextPut:$;.
   142         ].
   123         ].
   143     ].
   124     ].
   144     ^ out contents
   125     ^ out contents
   145 
   126 
   146     "
   127     "
   147      CharacterEncoderImplementations::ISO10646_to_SGML
   128      CharacterEncoderImplementations::ISO10646_to_SGML
   148         encodeString:'hello äöü' 
   129         encodeString:'hello äöü' 
   149     "
   130     "
   150 
   131 
   151     "Modified: / 23-10-2006 / 13:25:27 / cg"
   132     "Modified: / 23-10-2006 / 13:25:27 / cg"
       
   133     "Modified (format): / 17-01-2018 / 18:41:16 / stefan"
       
   134 ! !
       
   135 
       
   136 !ISO10646_to_SGML methodsFor:'queries'!
       
   137 
       
   138 characterSize:aCharacter
       
   139     |codePoint|
       
   140 
       
   141     codePoint := aCharacter codePoint.
       
   142     (codePoint between:16r20 and:16r7F) ifTrue:[
       
   143         ^ 1.
       
   144     ].
       
   145     ^ codePoint printString size + 3   "#&1234;"
       
   146 
       
   147     "Created: / 17-01-2018 / 18:01:40 / stefan"
       
   148 ! !
       
   149 
       
   150 !ISO10646_to_SGML methodsFor:'stream support'!
       
   151 
       
   152 readNextCharacterFrom:aStream
       
   153     |char codePoint|
       
   154 
       
   155     char := aStream next.
       
   156     (char ~~ $# and:[aStream peek ~~ $&]) ifTrue:[
       
   157         ^ char.
       
   158     ].
       
   159     aStream next.
       
   160 
       
   161     codePoint := 0.
       
   162     [char := aStream peekOrNil.
       
   163      char notNil and:[char isDigit]
       
   164     ] whileTrue:[
       
   165         codePoint := (codePoint * 10) + char digitValue.
       
   166         aStream next.
       
   167     ].
       
   168     aStream peekOrNil == $; ifTrue:[
       
   169         aStream next.
       
   170     ].
       
   171     ^ Character codePoint:codePoint.
       
   172 
       
   173     "Created: / 17-01-2018 / 18:37:40 / stefan"
   152 ! !
   174 ! !
   153 
   175 
   154 !ISO10646_to_SGML class methodsFor:'documentation'!
   176 !ISO10646_to_SGML class methodsFor:'documentation'!
   155 
   177 
   156 version
   178 version
   157     ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.3 2006-10-23 11:25:11 cg Exp $'
   179     ^ '$Header$'
   158 ! !
   180 ! !
       
   181