author | Claus Gittinger <cg@exept.de> |
Tue, 26 Mar 2019 11:36:14 +0100 | |
changeset 23974 | 2601c14688d8 |
parent 22375 | 0baa14a9b02e |
child 23984 | ac00c411f7f6 |
permissions | -rw-r--r-- |
1 | 1 |
" |
2 |
COPYRIGHT (c) 1993 by Claus Gittinger |
|
235 | 3 |
All Rights Reserved |
1 | 4 |
|
5 |
This software is furnished under a license and may be used |
|
6 |
only in accordance with the terms of that license and with the |
|
7 |
inclusion of the above copyright notice. This software may not |
|
8 |
be provided or otherwise made available to, or used by, any |
|
9 |
other person. No title to or ownership of the software is |
|
10 |
hereby transferred. |
|
11 |
" |
|
10223
761e2a050b69
twoByteString moved (req'd in VM)
Claus Gittinger <cg@exept.de>
parents:
8094
diff
changeset
|
12 |
"{ Package: 'stx:libbasic' }" |
8094
d05f69bd0097
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
5761
diff
changeset
|
13 |
|
17621 | 14 |
"{ NameSpace: Smalltalk }" |
15 |
||
5761 | 16 |
CharacterArray variableWordSubclass:#TwoByteString |
992
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
17 |
instanceVariableNames:'' |
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
18 |
classVariableNames:'' |
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
19 |
poolDictionaries:'' |
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
20 |
category:'Collections-Text' |
1 | 21 |
! |
22 |
||
89 | 23 |
!TwoByteString class methodsFor:'documentation'! |
24 |
||
25 |
copyright |
|
26 |
" |
|
27 |
COPYRIGHT (c) 1993 by Claus Gittinger |
|
235 | 28 |
All Rights Reserved |
1 | 29 |
|
89 | 30 |
This software is furnished under a license and may be used |
31 |
only in accordance with the terms of that license and with the |
|
32 |
inclusion of the above copyright notice. This software may not |
|
33 |
be provided or otherwise made available to, or used by, any |
|
34 |
other person. No title to or ownership of the software is |
|
35 |
hereby transferred. |
|
36 |
" |
|
37 |
! |
|
38 |
||
39 |
documentation |
|
40 |
" |
|
41 |
TwoByteStrings are like strings, but storing 16bits per character. |
|
42 |
The integration of them into the system is not completed .... |
|
1290 | 43 |
|
44 |
[author:] |
|
45 |
Claus Gittinger |
|
1309 | 46 |
|
47 |
[see also:] |
|
1382 | 48 |
Text JISEncodedString |
1309 | 49 |
StringCollection |
89 | 50 |
" |
1214 | 51 |
! ! |
52 |
||
53 |
!TwoByteString class methodsFor:'initialization'! |
|
996 | 54 |
|
55 |
initialize |
|
1253 | 56 |
"initialize the class - private" |
57 |
||
996 | 58 |
self flags:(Behavior flagWords) |
59 |
||
60 |
" |
|
61 |
TwoByteString initialize |
|
62 |
" |
|
1214 | 63 |
|
1253 | 64 |
"Modified: 22.4.1996 / 16:14:14 / cg" |
89 | 65 |
! ! |
1 | 66 |
|
67 |
!TwoByteString class methodsFor:'instance creation'! |
|
68 |
||
69 |
basicNew:anInteger |
|
22375 | 70 |
"return a new empty string with anInteger number of characters" |
1 | 71 |
|
1024 | 72 |
^ (super basicNew:anInteger) atAllPut:(Character space) |
73 |
||
22375 | 74 |
"Modified: / 26-02-1996 / 14:38:47 / cg" |
75 |
"Modified (comment): / 22-11-2017 / 21:32:49 / cg" |
|
17621 | 76 |
! |
77 |
||
78 |
uninitializedNew:anInteger |
|
79 |
"return a new empty string with anInteger characters" |
|
80 |
||
81 |
^ super basicNew:anInteger |
|
82 |
||
83 |
" |
|
84 |
self uninitializedNew:10 |
|
85 |
" |
|
1 | 86 |
! ! |
87 |
||
88 |
!TwoByteString methodsFor:'accessing'! |
|
89 |
||
90 |
basicAt:index |
|
91 |
"return the character at position index, an Integer |
|
73 | 92 |
- reimplemented here since we return 16-bit characters" |
1 | 93 |
|
1024 | 94 |
|val| |
63 | 95 |
|
1024 | 96 |
val := super basicAt:index. |
63 | 97 |
^ Character value:val |
1024 | 98 |
|
99 |
"Modified: 26.2.1996 / 17:02:16 / cg" |
|
1 | 100 |
! |
101 |
||
102 |
basicAt:index put:aCharacter |
|
1230 | 103 |
"store the argument, aCharacter at position index, an Integer. |
104 |
Returns aCharacter (sigh). |
|
73 | 105 |
- reimplemented here since we store 16-bit characters" |
1 | 106 |
|
8094
d05f69bd0097
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
5761
diff
changeset
|
107 |
super basicAt:index put:aCharacter codePoint. |
63 | 108 |
^ aCharacter |
608 | 109 |
|
1230 | 110 |
"Modified: 19.4.1996 / 11:16:22 / cg" |
14123 | 111 |
! |
112 |
||
113 |
unsignedShortAt:index |
|
114 |
"return the short at position index, an Integer" |
|
115 |
||
116 |
^ super basicAt:index. |
|
1014 | 117 |
! ! |
118 |
||
16750 | 119 |
!TwoByteString methodsFor:'filling and replacing'! |
120 |
||
23974 | 121 |
from:start to:stop put:aCharacter |
122 |
"fill part of the receiver with aCharacter. |
|
123 |
- reimplemented here for speed" |
|
124 |
||
125 |
%{ /* NOCONTEXT */ |
|
126 |
||
127 |
REGISTER unsigned short *dstp; |
|
128 |
REGISTER int count, charValue; |
|
129 |
int len, index1, index2; |
|
130 |
OBJ cls; |
|
131 |
||
132 |
// fprintf(stderr, "fill16...\n"); |
|
133 |
if (__isCharacter(aCharacter) |
|
134 |
&& __bothSmallInteger(start, stop)) { |
|
135 |
len = __twoByteStringSize(self); |
|
136 |
index1 = __intVal(start); |
|
137 |
index2 = __intVal(stop); |
|
138 |
||
139 |
dstp = __twoByteStringVal(self) + index1 - 1; |
|
140 |
if ((cls = __qClass(self)) != @global(Unicode16String)) { |
|
141 |
int nInst; |
|
142 |
||
143 |
nInst = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
|
144 |
dstp += nInst; |
|
145 |
len -= nInst; |
|
146 |
} |
|
147 |
||
148 |
charValue = __intVal(__characterVal(aCharacter)); |
|
149 |
if (((unsigned)charValue <= 0xFFFF) |
|
150 |
&& (index1 <= index2) |
|
151 |
&& (index1 > 0)) { |
|
152 |
if (index2 <= len) { |
|
153 |
count = index2 - index1 + 1; |
|
154 |
||
155 |
#if (__POINTER_SIZE__ == 8) |
|
156 |
{ |
|
157 |
INT v4; |
|
158 |
||
159 |
v4 = (charValue << 16) | charValue; |
|
160 |
v4 = (v4 << 32) | v4; |
|
161 |
||
162 |
/* fill unaligned part */ |
|
163 |
while ((count > 0) && (((unsigned INT)dstp & 7) != 0)) { |
|
164 |
*dstp++ = charValue; |
|
165 |
count--; |
|
166 |
} |
|
167 |
||
168 |
/* fill aligned part */ |
|
169 |
while (count >= 4) { |
|
170 |
((unsigned INT *)dstp)[0] = v4; |
|
171 |
dstp += 4; |
|
172 |
count -= 4; |
|
173 |
} |
|
174 |
||
175 |
/* fill rest */ |
|
176 |
while (count > 0) { |
|
177 |
*dstp++ = charValue; |
|
178 |
count--; |
|
179 |
} |
|
180 |
RETURN (self); |
|
181 |
} |
|
182 |
#endif /* 64bit */ |
|
183 |
||
184 |
while (count >= 8) { |
|
185 |
dstp[0] = dstp[1] = dstp[2] = dstp[3] = |
|
186 |
dstp[4] = dstp[5] = dstp[6] = dstp[7] = charValue; |
|
187 |
dstp += 8; |
|
188 |
count -= 8; |
|
189 |
} |
|
190 |
while (count--) { |
|
191 |
*dstp++ = charValue; |
|
192 |
} |
|
193 |
RETURN (self); |
|
194 |
} |
|
195 |
} |
|
196 |
} |
|
197 |
%}. |
|
198 |
" |
|
199 |
fall back in case of non-integer index or out-of-bound index/value; |
|
200 |
will eventually lead to an out-of-bound signal raise |
|
201 |
" |
|
202 |
^ super from:start to:stop put:aCharacter |
|
203 |
||
204 |
" |
|
205 |
(Unicode16String new:10) from:1 to:10 put:$a |
|
206 |
(Unicode16String new:20) from:10 to:20 put:$b |
|
207 |
(Unicode16String new:20) from:1 to:10 put:$c |
|
208 |
(Unicode16String new:20) from:1 to:10 put:$c |
|
209 |
(Unicode16String new:100) from:2 to:99 put:$c |
|
210 |
||
211 |
(Unicode16String new:10) from:0 to:9 put:$a |
|
212 |
(Unicode16String new:10) from:1 to:11 put:$a |
|
213 |
" |
|
214 |
||
215 |
"Created: / 26-03-2019 / 11:20:14 / Claus Gittinger" |
|
216 |
! |
|
217 |
||
16750 | 218 |
replaceFrom:start to:stop with:aString startingAt:repStart |
219 |
"replace the characters starting at index start, anInteger and ending |
|
220 |
at stop, anInteger with characters from aString starting at repStart. |
|
221 |
Return the receiver. |
|
222 |
||
223 |
- reimplemented here for speed" |
|
224 |
||
225 |
%{ /* NOCONTEXT */ |
|
226 |
||
227 |
#ifndef NO_PRIM_STRING |
|
228 |
if (__bothSmallInteger(start, stop)) { |
|
18576 | 229 |
int len; |
230 |
int index1 = __intVal(start); |
|
231 |
int index2 = __intVal(stop); |
|
232 |
int count = index2 - index1 + 1; |
|
16750 | 233 |
|
234 |
if (count <= 0) { |
|
235 |
RETURN (self); |
|
236 |
} |
|
237 |
len = __twoByteStringSize(self); |
|
238 |
if ((index2 <= len) && (index1 > 0)) { |
|
18576 | 239 |
int repIndex = __intVal(repStart); |
16750 | 240 |
|
241 |
if (__isStringLike(aString)) { |
|
18576 | 242 |
int repLen = __stringSize(aString); |
16750 | 243 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
18576 | 244 |
REGISTER unsigned char *srcp = __stringVal(aString) + repIndex - 1; |
245 |
REGISTER unsigned short *dstp = __twoByteStringVal(self) + index1 - 1; |
|
16750 | 246 |
|
247 |
while (count-- > 0) { |
|
248 |
*dstp++ = *srcp++; |
|
249 |
} |
|
250 |
RETURN (self); |
|
251 |
} |
|
252 |
} else if (__isTwoByteString(aString) || __isUnicode16String(aString)) { |
|
18576 | 253 |
int repLen = __twoByteStringSize(aString); |
16750 | 254 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
18576 | 255 |
REGISTER unsigned short *srcp = __twoByteStringVal(aString) + repIndex - 1; |
256 |
REGISTER unsigned short *dstp = __twoByteStringVal(self) + index1 - 1; |
|
16750 | 257 |
|
258 |
if (aString == self) { |
|
259 |
/* take care of overlapping copy */ |
|
18576 | 260 |
memmove(dstp, srcp, count*sizeof(short)); |
261 |
RETURN (self); |
|
16750 | 262 |
} |
263 |
if (count > 5) { |
|
264 |
memcpy(dstp, srcp, count*sizeof(short)); |
|
265 |
} else { |
|
266 |
while (count-- > 0) { |
|
267 |
*dstp++ = *srcp++; |
|
268 |
} |
|
269 |
} |
|
270 |
RETURN (self); |
|
271 |
} |
|
272 |
} |
|
273 |
} |
|
274 |
} |
|
275 |
#endif |
|
276 |
%}. |
|
277 |
"/ arrive here if any index arg is out o range, or the source is neither a string, |
|
278 |
"/ nor a two-byte string. |
|
279 |
^ super replaceFrom:start to:stop with:aString startingAt:repStart |
|
280 |
||
281 |
" |
|
282 |
'hello world' asUnicode16String replaceFrom:1 to:5 with:'123456' startingAt:2 |
|
283 |
'hello world' asUnicode16String replaceFrom:1 to:5 with:'123456' asUnicode16String startingAt:2 |
|
284 |
'hello world' asUnicode16String replaceFrom:1 to:0 with:'123456' startingAt:2 |
|
285 |
'hello' asUnicode16String replaceFrom:1 to:6 with:'123456' startingAt:2 |
|
286 |
'hello world' asUnicode16String replaceFrom:1 to:1 with:'123456' startingAt:2 |
|
287 |
" |
|
288 |
! ! |
|
289 |
||
608 | 290 |
!TwoByteString methodsFor:'queries'! |
291 |
||
1017 | 292 |
bitsPerCharacter |
1239 | 293 |
"return the number of bits each character has. |
294 |
Here, 16 is returned (storing double byte characters)." |
|
295 |
||
1017 | 296 |
^ 16 |
1239 | 297 |
|
298 |
"Modified: 20.4.1996 / 23:08:38 / cg" |
|
14557 | 299 |
! |
300 |
||
19758 | 301 |
bytesPerCharacter |
302 |
"return the number of bytes each character has. |
|
303 |
Here, 2 is returned (storing double byte characters)." |
|
304 |
||
305 |
^ 2 |
|
306 |
! |
|
307 |
||
18593 | 308 |
characterSize |
309 |
"answer the size in bits of my largest character (actually only 7, 8 or 16)" |
|
310 |
||
311 |
%{ /* NOCONTEXT */ |
|
312 |
||
313 |
REGISTER unsigned short *sp = __twoByteStringVal(self); |
|
314 |
REGISTER unsigned short *last = sp + __twoByteStringSize(self); |
|
315 |
OBJ cls = __qClass(self); |
|
316 |
int has8BitChars = 0; |
|
317 |
||
318 |
if (cls != Unicode16String && cls != TwoByteString) { |
|
319 |
sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2; |
|
320 |
} |
|
18586 | 321 |
|
18593 | 322 |
#if __POINTER_SIZE__ == 8 |
323 |
if (sizeof(unsigned INT) == 8) { |
|
324 |
if (!has8BitChars) { |
|
325 |
for ( ; (sp+4) <= last; sp += 4) { |
|
326 |
if (*(unsigned INT *)sp & 0xFF80FF80FF80FF80) { |
|
327 |
/* there are at least 8-bit chars - check for more */ |
|
328 |
has8BitChars = 1; |
|
329 |
break; |
|
330 |
} |
|
331 |
} |
|
332 |
} |
|
333 |
for ( ; (sp+4) <= last; sp += 4) { |
|
334 |
if (*(unsigned INT *)sp & 0xFF00FF00FF00FF00) { |
|
335 |
RETURN(__mkSmallInteger(16)); |
|
336 |
} |
|
337 |
} |
|
338 |
} |
|
339 |
#endif |
|
340 |
if (sizeof(unsigned int) == 4) { |
|
341 |
if (!has8BitChars) { |
|
342 |
for ( ; (sp+2) <= last; sp += 2) { |
|
343 |
if (*(unsigned int *)sp & 0xFF80FF80) { |
|
344 |
/* there are at least 8-bit chars - check for more */ |
|
345 |
has8BitChars = 1; |
|
346 |
break; |
|
347 |
} |
|
348 |
} |
|
349 |
} |
|
350 |
for ( ; (sp+2) <= last; sp += 2) { |
|
351 |
if (*(unsigned int *)sp & 0xFF00FF00) { |
|
352 |
RETURN(__mkSmallInteger(16)); |
|
353 |
} |
|
354 |
} |
|
355 |
} |
|
356 |
if (!has8BitChars) { |
|
357 |
for ( ; sp < last; sp++) { |
|
358 |
if (*sp & 0xFF80) { |
|
359 |
/* there are at least 8-bit chars - check for more */ |
|
360 |
has8BitChars = 1; |
|
361 |
break; |
|
362 |
} |
|
363 |
} |
|
364 |
} |
|
365 |
for ( ; sp < last; sp++) { |
|
366 |
if (*sp & 0xFF00) { |
|
367 |
RETURN(__mkSmallInteger(16)); |
|
368 |
} |
|
369 |
} |
|
370 |
RETURN (__mkSmallInteger(has8BitChars ? 8 : 7)); |
|
371 |
%}. |
|
372 |
||
373 |
" |
|
374 |
'hello world' asUnicode16String characterSize |
|
19581 | 375 |
'hello worldüäö' asUnicode16String characterSize |
18593 | 376 |
'a' asUnicode16String characterSize |
19581 | 377 |
'ü' asUnicode16String characterSize |
18593 | 378 |
'aa' asUnicode16String characterSize |
19581 | 379 |
'aü' asUnicode16String characterSize |
18593 | 380 |
'aaa' asUnicode16String characterSize |
19581 | 381 |
'aaü' asUnicode16String characterSize |
382 |
'aaaü' asUnicode16String characterSize |
|
18593 | 383 |
'aaaa' asUnicode16String characterSize |
19581 | 384 |
'aaaaü' asUnicode16String characterSize |
18593 | 385 |
" |
18586 | 386 |
! |
387 |
||
18593 | 388 |
containsNon7BitAscii |
17621 | 389 |
"return true, if the underlying string contains 8BitCharacters (or widers) |
390 |
(i.e. if it is non-ascii)" |
|
391 |
||
392 |
%{ /* NOCONTEXT */ |
|
393 |
||
18593 | 394 |
REGISTER unsigned short *sp = __twoByteStringVal(self); |
395 |
REGISTER unsigned short *last = sp + __twoByteStringSize(self); |
|
396 |
OBJ cls = __qClass(self); |
|
17621 | 397 |
|
18593 | 398 |
if ( cls != Unicode16String && cls != TwoByteString) { |
17621 | 399 |
sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2; |
400 |
} |
|
401 |
#if __POINTER_SIZE__ == 8 |
|
18593 | 402 |
if (sizeof(unsigned INT) == 8) { |
403 |
for ( ; (sp+4) <= last; sp += 4) { |
|
404 |
if (*(unsigned INT *)sp & 0xFF80FF80FF80FF80) { |
|
17621 | 405 |
RETURN ( true ); |
406 |
} |
|
407 |
} |
|
408 |
} |
|
409 |
#endif |
|
18593 | 410 |
if (sizeof(unsigned int) == 4) { |
411 |
for ( ; (sp+2) <= last; sp += 2) { |
|
17621 | 412 |
if (*(unsigned int *)sp & 0xFF80FF80) { |
413 |
RETURN ( true ); |
|
414 |
} |
|
415 |
} |
|
416 |
} |
|
18593 | 417 |
for ( ; sp < last; sp++) { |
17621 | 418 |
if (*sp & 0xFF80) { |
419 |
RETURN ( true ); |
|
420 |
} |
|
421 |
} |
|
422 |
RETURN (false); |
|
423 |
%}. |
|
424 |
||
425 |
" |
|
18593 | 426 |
'hello world' asUnicode16String containsNon7BitAscii |
19581 | 427 |
'hello worldüäö' asUnicode16String containsNon7BitAscii |
428 |
'ü' asUnicode16String containsNon7BitAscii |
|
429 |
'aü' asUnicode16String containsNon7BitAscii |
|
430 |
'aaü' asUnicode16String containsNon7BitAscii |
|
431 |
'aaaü' asUnicode16String containsNon7BitAscii |
|
432 |
'aaaaü' asUnicode16String containsNon7BitAscii |
|
18593 | 433 |
'aaaaa' asUnicode16String containsNon7BitAscii |
17621 | 434 |
" |
435 |
! |
|
436 |
||
14557 | 437 |
isWideString |
19581 | 438 |
"true if I require more than one byte per character" |
439 |
||
14557 | 440 |
^ true |
608 | 441 |
! ! |
442 |
||
22226 | 443 |
!TwoByteString methodsFor:'testing'! |
444 |
||
22228 | 445 |
isSingleByteCollection |
446 |
"return true, if the receiver has access methods for bytes; |
|
447 |
i.e. #at: and #at:put: accesses a byte and are equivalent to #byteAt: and byteAt:put: |
|
448 |
and #replaceFrom:to: is equivalent to #replaceBytesFrom:to:. |
|
449 |
false is returned here since at: returns 2-byte characters and not bytes |
|
450 |
- the method is redefined from UninterpretedBytes." |
|
451 |
||
22226 | 452 |
^ false |
453 |
||
454 |
"Created: / 30-08-2017 / 23:30:36 / cg" |
|
455 |
! ! |
|
456 |
||
631 | 457 |
!TwoByteString class methodsFor:'documentation'! |
458 |
||
459 |
version |
|
18576 | 460 |
^ '$Header$' |
631 | 461 |
! ! |
8094
d05f69bd0097
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
5761
diff
changeset
|
462 |
|
16750 | 463 |
|
996 | 464 |
TwoByteString initialize! |