79 romans := CharacterEncoderImplementations::JIS0208 romanTable. |
95 romans := CharacterEncoderImplementations::JIS0208 romanTable. |
80 |
96 |
81 %{ |
97 %{ |
82 if (__isStringLike(aString) |
98 if (__isStringLike(aString) |
83 && (__Class(newString) == @global(TwoByteString))) { |
99 && (__Class(newString) == @global(TwoByteString))) { |
84 INT _dstIdx = 0, _srcIdx = 0; |
100 INT _dstIdx = 0, _srcIdx = 0; |
85 int _sz = __intVal(sz); |
101 int _sz = __intVal(sz); |
86 unsigned char *_cp = __stringVal(aString); |
102 unsigned char *_cp = __stringVal(aString); |
87 unsigned char _c1, _c2; |
103 unsigned char _c1, _c2; |
88 unsigned short *_jcp = (unsigned short *)__stringVal(newString); |
104 unsigned short *_jcp = (unsigned short *)__stringVal(newString); |
89 |
105 |
90 while (_srcIdx < _sz) { |
106 while (_srcIdx < _sz) { |
91 int _val; |
107 int _val; |
92 |
108 |
93 _c1 = _cp[_srcIdx]; |
109 _c1 = _cp[_srcIdx]; |
94 _srcIdx++; |
110 _srcIdx++; |
95 |
111 |
96 if ((_srcIdx < _sz) |
112 if ((_srcIdx < _sz) |
97 && (((_c1 >= 129) && (_c1 <= 159)) |
113 && (((_c1 >= 129) && (_c1 <= 159)) |
98 || ((_c1 >= 224) && (_c1 <= 239)))) { |
114 || ((_c1 >= 224) && (_c1 <= 239)))) { |
99 _c2 = _cp[_srcIdx]; |
115 _c2 = _cp[_srcIdx]; |
100 _srcIdx++; |
116 _srcIdx++; |
101 if ((_c2 >= 64) && (_c2 <= 252)) { |
117 if ((_c2 >= 64) && (_c2 <= 252)) { |
102 int _adjust, _rowOffs, _cellOffs; |
118 int _adjust, _rowOffs, _cellOffs; |
103 int _b1, _b2; |
119 int _b1, _b2; |
104 |
120 |
105 _adjust = (_c2 < 159) ? 1 : 0; |
121 _adjust = (_c2 < 159) ? 1 : 0; |
106 _rowOffs = (_c1 < 160) ? 112 : 176; |
122 _rowOffs = (_c1 < 160) ? 112 : 176; |
107 if (_adjust) { |
123 if (_adjust) { |
108 _cellOffs = 31 + ((_c2 > 127) ? 1 : 0); |
124 _cellOffs = 31 + ((_c2 > 127) ? 1 : 0); |
109 } else { |
125 } else { |
110 _cellOffs = 126; |
126 _cellOffs = 126; |
111 } |
127 } |
112 _b1 = ((_c1 - _rowOffs) << 1) - _adjust; |
128 _b1 = ((_c1 - _rowOffs) << 1) - _adjust; |
113 _b2 = (_c2 - _cellOffs); |
129 _b2 = (_c2 - _cellOffs); |
114 _val = (_b1<<8) + _b2; |
130 _val = (_b1<<8) + _b2; |
115 if (_val <= 0) { |
131 if (_val <= 0) { |
116 /* decoder error - let smalltalk handle that */ |
132 /* decoder error - let smalltalk handle that */ |
117 _srcIdx -= 2; |
133 _srcIdx -= 2; |
118 goto getOutOfHere; |
134 goto getOutOfHere; |
119 } |
135 } |
120 if (_val > 0xFF) any16bit = true; |
136 if (_val > 0xFF) any16bit = true; |
121 _jcp[_dstIdx] = _val; |
137 _jcp[_dstIdx] = _val; |
122 } else { |
138 } else { |
123 /* mhmh - append untranslated */ |
139 /* mhmh - append untranslated */ |
124 |
140 |
125 _jcp[_dstIdx] = _c1; |
141 _jcp[_dstIdx] = _c1; |
126 _dstIdx++; |
142 _dstIdx++; |
127 _jcp[_dstIdx] = _c2; |
143 _jcp[_dstIdx] = _c2; |
128 } |
144 } |
129 } else { |
145 } else { |
130 if ((_c1 >= 0xA1 /* 161 */) && (_c1 <= 0xDF /* 223 */)) { |
146 if ((_c1 >= 0xA1 /* 161 */) && (_c1 <= 0xDF /* 223 */)) { |
131 /* HALFWIDTH KATAKANA |
147 /* HALFWIDTH KATAKANA |
132 * map half-width katakana to 8E:xx |
148 * map half-width katakana to 8E:xx |
133 */ |
149 */ |
134 _val = _c1 - 128; |
150 _val = _c1 - 128; |
135 _val = _val + 0x8E00; |
151 _val = _val + 0x8E00; |
136 any16bit = true; |
152 any16bit = true; |
137 _jcp[_dstIdx] = _val; |
153 _jcp[_dstIdx] = _val; |
138 } else { |
154 } else { |
139 /* roman characters are translated as per romanTable */ |
155 /* roman characters are translated as per romanTable */ |
140 _jcp[_dstIdx] = _c1; |
156 _jcp[_dstIdx] = _c1; |
141 if ((romans != nil) |
157 if ((romans != nil) |
142 && (__isArrayLike(romans)) |
158 && (__isArrayLike(romans)) |
143 && ((_c1 - 0x20) < __arraySize(romans))) { |
159 && ((_c1 - 0x20) < __arraySize(romans))) { |
144 any16bit = true; |
160 any16bit = true; |
145 _jcp[_dstIdx] = __intVal(__ArrayInstPtr(romans)->a_element[(_c1 - 0x20)]); |
161 _jcp[_dstIdx] = __intVal(__ArrayInstPtr(romans)->a_element[(_c1 - 0x20)]); |
146 } |
162 } |
147 } |
163 } |
148 } |
164 } |
149 _dstIdx++; |
165 _dstIdx++; |
150 } |
166 } |
151 getOutOfHere: ; |
167 getOutOfHere: ; |
152 dstIdx = __mkSmallInteger(_dstIdx+1); |
168 dstIdx = __mkSmallInteger(_dstIdx+1); |
153 srcIdx = __mkSmallInteger(_srcIdx+1); |
169 srcIdx = __mkSmallInteger(_srcIdx+1); |
154 } |
170 } |
155 %}. |
171 %}. |
156 |
172 |
157 [srcIdx <= sz] whileTrue:[ |
173 [srcIdx <= sz] whileTrue:[ |
158 "/ |
174 "/ |
159 "/ scan for next character in 129..159 or 224..239 |
175 "/ scan for next character in 129..159 or 224..239 |
160 "/ |
176 "/ |
161 char1 := aString at:srcIdx. |
177 char1 := aString at:srcIdx. |
162 srcIdx := srcIdx + 1. |
178 srcIdx := srcIdx + 1. |
163 b1 := char1 codePoint. |
179 b1 := char1 codePoint. |
164 |
180 |
165 ((srcIdx <= sz) |
181 ((srcIdx <= sz) |
166 and:[(b1 >= 16r81"129" and:[b1 <= 16r9F"159"]) "/ SJIS1 81 .. 9F |
182 and:[(b1 >= 16r81"129" and:[b1 <= 16r9F"159"]) "/ SJIS1 81 .. 9F |
167 or:[b1 >= 16rE0"224" and:[b1 <= 16rEF"239"]]]) ifTrue:[ "/ E0 .. EF |
183 or:[b1 >= 16rE0"224" and:[b1 <= 16rEF"239"]]]) ifTrue:[ "/ E0 .. EF |
168 char2 := aString at:srcIdx. |
184 char2 := aString at:srcIdx. |
169 srcIdx := srcIdx + 1. |
185 srcIdx := srcIdx + 1. |
170 b2 := char2 codePoint. |
186 b2 := char2 codePoint. |
171 (b2 >= 16r40"64" and:[b2 <= 16rFC"252"]) ifTrue:[ "/ SJIS2 40 .. FC |
187 (b2 >= 16r40"64" and:[b2 <= 16rFC"252"]) ifTrue:[ "/ SJIS2 40 .. FC |
172 |adjust rowOffs cellOffs| |
188 |adjust rowOffs cellOffs| |
173 |
189 |
174 adjust := (b2 < 16r9F"159") ifTrue:[1] ifFalse:[0]. |
190 adjust := (b2 < 16r9F"159") ifTrue:[1] ifFalse:[0]. |
175 rowOffs := b1 < 16rA0"160" ifTrue:[112] ifFalse:[176]. |
191 rowOffs := b1 < 16rA0"160" ifTrue:[112] ifFalse:[176]. |
176 adjust == 1 ifTrue:[ |
192 adjust == 1 ifTrue:[ |
177 cellOffs := 31 + (b2 > 127 ifTrue:[1] ifFalse:[0]). |
193 cellOffs := 31 + (b2 > 127 ifTrue:[1] ifFalse:[0]). |
178 ] ifFalse:[ |
194 ] ifFalse:[ |
179 cellOffs := 126. |
195 cellOffs := 126. |
180 ]. |
196 ]. |
181 b1 := ((b1 - rowOffs) bitShift:1) - adjust. |
197 b1 := ((b1 - rowOffs) bitShift:1) - adjust. |
182 b2 := (b2 - cellOffs). |
198 b2 := (b2 - cellOffs). |
183 val := (b1 bitShift:8) + b2. |
199 val := (b1 bitShift:8) + b2. |
184 val <= 0 ifTrue:[ |
200 val <= 0 ifTrue:[ |
185 DecodingError |
201 DecodingError |
186 raiseWith:aString |
202 raiseWith:aString |
187 errorString:'SJIS decoding failed (not SJIS encoded ?)'. |
203 errorString:'SJIS decoding failed (not SJIS encoded ?)'. |
188 newString at:dstIdx put:char1. |
204 newString at:dstIdx put:char1. |
189 dstIdx := dstIdx + 1. |
205 dstIdx := dstIdx + 1. |
190 newString at:dstIdx put:char2. |
206 newString at:dstIdx put:char2. |
191 ] ifFalse:[ |
207 ] ifFalse:[ |
192 val > 16rFF ifTrue:[any16bit := true]. |
208 val > 16rFF ifTrue:[any16bit := true]. |
193 newString at:dstIdx put:(Character value:val). |
209 newString at:dstIdx put:(Character value:val). |
194 ] |
210 ] |
195 ] ifFalse:[ |
211 ] ifFalse:[ |
196 "/ mhmh - append untranslated |
212 "/ mhmh - append untranslated |
197 |
213 |
198 newString at:dstIdx put:char1. |
214 newString at:dstIdx put:char1. |
199 dstIdx := dstIdx + 1. |
215 dstIdx := dstIdx + 1. |
200 newString at:dstIdx put:char2. |
216 newString at:dstIdx put:char2. |
201 ] |
217 ] |
202 ] ifFalse:[ |
218 ] ifFalse:[ |
203 (b1 >= 16rA1 "161" and:[b1 <= 16rDF "223"]) ifTrue:[ "/ HALFWIDTH KATAKANA |
219 (b1 >= 16rA1 "161" and:[b1 <= 16rDF "223"]) ifTrue:[ "/ HALFWIDTH KATAKANA |
204 "/ map half-width katakan to 8E:xx |
220 "/ map half-width katakan to 8E:xx |
205 val := b1 - 128. |
221 val := b1 - 128. |
206 val := val + (16r8E"142" bitShift:8). |
222 val := val + (16r8E"142" bitShift:8). |
207 any16bit := true. |
223 any16bit := true. |
208 newString at:dstIdx put:(Character value:val). |
224 newString at:dstIdx put:(Character value:val). |
209 ] ifFalse:[ |
225 ] ifFalse:[ |
210 "/ roman characters translated as per romanTable |
226 "/ roman characters translated as per romanTable |
211 newString at:dstIdx put:char1 |
227 newString at:dstIdx put:char1. |
212 romans isArray ifTrue:[ |
228 romans isArray ifTrue:[ |
213 char1 codePoint < romans size ifTrue:[ |
229 char1 codePoint < romans size ifTrue:[ |
214 any16bit := true. |
230 any16bit := true. |
215 newString at:dstIdx put:(Character value:(romans at:char1 codePoint-32+1)). |
231 newString at:dstIdx put:(Character value:(romans at:char1 codePoint-32+1)). |
216 ] |
232 ] |
217 ] |
233 ] |
218 ] |
234 ] |
219 ]. |
235 ]. |
220 dstIdx := dstIdx + 1. |
236 dstIdx := dstIdx + 1. |
221 ]. |
237 ]. |
222 any16bit ifFalse:[ |
238 any16bit ifFalse:[ |
223 newString := String fromString:newString |
239 newString := String fromString:newString |
224 ]. |
240 ]. |
225 |
241 |
226 (dstIdx-1) ~~ sz ifTrue:[ |
242 (dstIdx-1) ~~ sz ifTrue:[ |
227 newString := newString copyTo:dstIdx - 1. |
243 newString := newString copyTo:dstIdx - 1. |
228 ]. |
244 ]. |
229 |
245 |
230 ^ newString |
246 ^ newString |
231 |
247 |
232 "simple: |
248 "simple: |
233 |
249 |
234 CharacterEncoderImplementations::JIS0208_to_SJIS decodeString:'hello' |
250 CharacterEncoderImplementations::JIS0208_to_SJIS decodeString:'hello' |
235 (CharacterEncoder encoderFor:#sjis) decodeString:'hello' |
251 (CharacterEncoder encoderFor:#sjis) decodeString:'hello' |
236 |
252 |
237 CharacterEncoderImplementations::JIS0208_to_SJIS decodeString:('../../doc/online/japanese/TOP.html' asFilename contents asString) |
253 CharacterEncoderImplementations::JIS0208_to_SJIS decodeString:('../../doc/online/japanese/TOP.html' asFilename contentsAsString) |
238 |
254 |
239 '../../doc/online/japanese/TOP.html' asFilename contents asString |
255 '../../doc/online/japanese/TOP.html' asFilename contentsAsString |
240 decodeFrom:#sjis |
256 decodeFrom:#jis208 |
241 " |
257 " |
|
258 |
|
259 "Modified (comment): / 17-01-2018 / 17:48:08 / stefan" |
242 ! |
260 ! |
243 |
261 |
244 encodeString:aJISString |
262 encodeString:aJISString |
245 "return a new string with aJISString's characters as SJIS encoded 8bit string. |
263 "return a new string with aJISString's characters as SJIS encoded 8bit string. |
246 The resulting string is only useful to be stored on some external file, |
264 The resulting string is only useful to be stored on some external file, |