47 " |
49 " |
48 ! ! |
50 ! ! |
49 |
51 |
50 !ISO10646_to_SGML methodsFor:'encoding & decoding'! |
52 !ISO10646_to_SGML methodsFor:'encoding & decoding'! |
51 |
53 |
52 decode:aCode |
|
53 self shouldNotImplement "/ no single byte conversion possible |
|
54 ! |
|
55 |
|
56 decodeString:aStringOrByteCollection |
54 decodeString:aStringOrByteCollection |
57 "given a string in SGML encoding (i.e. with SGML escaped characters), |
55 "given a string in SGML encoding (i.e. with SGML escaped characters), |
58 return a new string containing the same characters, in 16bit (or more) encoding. |
56 return a new string containing the same characters, in 16bit (or more) encoding. |
59 Returns either a normal String, a TwoByteString or a FourByteString instance. |
57 Returns either a normal String, a TwoByteString or a FourByteString instance. |
60 Only useful, when reading from external sources. |
58 Only useful, when reading from external sources. |
61 This only handles up-to 30bit characters." |
59 This only handles up-to 30bit characters." |
62 |
60 |
63 |nBits ch |
61 |nBits ch |
64 in out codePoint t| |
62 in out codePoint| |
65 |
63 |
66 nBits := 8. |
64 nBits := 8. |
67 in := aStringOrByteCollection readStream. |
65 in := aStringOrByteCollection readStream. |
68 out := WriteStream on:(String new:10). |
66 out := CharacterWriteStream on:(String new:10). |
69 [in atEnd] whileFalse:[ |
67 [in atEnd] whileFalse:[ |
70 ch := in next. |
68 ch := in next. |
71 ch == $& ifTrue:[ |
69 ch == $& ifTrue:[ |
72 in peekOrNil == $# ifTrue:[ |
70 in peekOrNil == $# ifTrue:[ |
73 in next. |
71 in next. |
76 ch notNil and:[ch isDigit] |
74 ch notNil and:[ch isDigit] |
77 ] whileTrue:[ |
75 ] whileTrue:[ |
78 codePoint := (codePoint * 10) + ch digitValue. |
76 codePoint := (codePoint * 10) + ch digitValue. |
79 in next. |
77 in next. |
80 ]. |
78 ]. |
81 codePoint > 16rFF ifTrue:[ |
79 out nextPut:(Character codePoint:codePoint). |
82 codePoint > 16rFFFF ifTrue:[ |
|
83 nBits < 32 ifTrue:[ |
|
84 t := out contents. |
|
85 out := WriteStream on:(Unicode32String fromString:t). |
|
86 out position:t size. |
|
87 nBits := 32. |
|
88 ] |
|
89 ] ifFalse:[ |
|
90 nBits < 16 ifTrue:[ |
|
91 t := out contents. |
|
92 out := WriteStream on:(Unicode16String fromString:t). |
|
93 out position:t size. |
|
94 nBits := 16. |
|
95 ] |
|
96 ] |
|
97 ]. |
|
98 out nextPut:(Character value:codePoint). |
|
99 in peekOrNil == $; ifTrue:[ |
80 in peekOrNil == $; ifTrue:[ |
100 in next. |
81 in next. |
101 ] |
82 ] |
102 ] ifFalse:[ |
83 ] ifFalse:[ |
103 out nextPut:ch |
84 out nextPut:ch |
113 decodeString:'Файл' |
94 decodeString:'Файл' |
114 |
95 |
115 CharacterEncoderImplementations::ISO10646_to_SGML |
96 CharacterEncoderImplementations::ISO10646_to_SGML |
116 decodeString:'#197;&bn...' |
97 decodeString:'#197;&bn...' |
117 " |
98 " |
118 ! |
|
119 |
99 |
120 encode:aCode |
100 "Modified: / 17-01-2018 / 18:35:52 / stefan" |
121 self shouldNotImplement "/ no single byte conversion possible |
|
122 ! |
101 ! |
123 |
102 |
124 encodeString:aUnicodeString |
103 encodeString:aUnicodeString |
125 "return the SGML representation of aUnicodeString. |
104 "return the SGML representation of aUnicodeString. |
126 The resulting string is only useful to be stored on some external file, |
105 The resulting string is only useful to be stored on some external file, |
127 not for being used inside ST/X." |
106 not for being used inside ST/X." |
128 |
107 |
129 |ch in out codePoint| |
108 |in out| |
130 |
109 |
131 in := aUnicodeString readStream. |
110 in := aUnicodeString readStream. |
132 out := WriteStream on:(String new:10). |
111 out := WriteStream on:(String new:aUnicodeString size + 10). |
133 [in atEnd] whileFalse:[ |
112 [in atEnd] whileFalse:[ |
|
113 |ch codePoint| |
|
114 |
134 ch := in next. |
115 ch := in next. |
135 codePoint := ch codePoint. |
116 codePoint := ch codePoint. |
136 (codePoint between:16r20 and:16r7F) ifTrue:[ |
117 (codePoint between:16r20 and:16r7F) ifTrue:[ |
137 out nextPut:ch. |
118 out nextPut:ch. |
138 ] ifFalse:[ |
119 ] ifFalse:[ |
139 out nextPutAll:'&#'. |
120 out nextPutAll:'&#'. |
140 out nextPutAll:(codePoint printString). |
121 codePoint printOn:out. |
141 out nextPutAll:';'. |
122 out nextPut:$;. |
142 ]. |
123 ]. |
143 ]. |
124 ]. |
144 ^ out contents |
125 ^ out contents |
145 |
126 |
146 " |
127 " |
147 CharacterEncoderImplementations::ISO10646_to_SGML |
128 CharacterEncoderImplementations::ISO10646_to_SGML |
148 encodeString:'hello äöü' |
129 encodeString:'hello äöü' |
149 " |
130 " |
150 |
131 |
151 "Modified: / 23-10-2006 / 13:25:27 / cg" |
132 "Modified: / 23-10-2006 / 13:25:27 / cg" |
|
133 "Modified (format): / 17-01-2018 / 18:41:16 / stefan" |
|
134 ! ! |
|
135 |
|
136 !ISO10646_to_SGML methodsFor:'queries'! |
|
137 |
|
138 characterSize:aCharacter |
|
139 |codePoint| |
|
140 |
|
141 codePoint := aCharacter codePoint. |
|
142 (codePoint between:16r20 and:16r7F) ifTrue:[ |
|
143 ^ 1. |
|
144 ]. |
|
145 ^ codePoint printString size + 3 "#&1234;" |
|
146 |
|
147 "Created: / 17-01-2018 / 18:01:40 / stefan" |
|
148 ! ! |
|
149 |
|
150 !ISO10646_to_SGML methodsFor:'stream support'! |
|
151 |
|
152 readNextCharacterFrom:aStream |
|
153 |char codePoint| |
|
154 |
|
155 char := aStream next. |
|
156 (char ~~ $# and:[aStream peek ~~ $&]) ifTrue:[ |
|
157 ^ char. |
|
158 ]. |
|
159 aStream next. |
|
160 |
|
161 codePoint := 0. |
|
162 [char := aStream peekOrNil. |
|
163 char notNil and:[char isDigit] |
|
164 ] whileTrue:[ |
|
165 codePoint := (codePoint * 10) + char digitValue. |
|
166 aStream next. |
|
167 ]. |
|
168 aStream peekOrNil == $; ifTrue:[ |
|
169 aStream next. |
|
170 ]. |
|
171 ^ Character codePoint:codePoint. |
|
172 |
|
173 "Created: / 17-01-2018 / 18:37:40 / stefan" |
152 ! ! |
174 ! ! |
153 |
175 |
154 !ISO10646_to_SGML class methodsFor:'documentation'! |
176 !ISO10646_to_SGML class methodsFor:'documentation'! |
155 |
177 |
156 version |
178 version |
157 ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.3 2006-10-23 11:25:11 cg Exp $' |
179 ^ '$Header$' |
158 ! ! |
180 ! ! |
|
181 |