author | Claus Gittinger <cg@exept.de> |
Mon, 15 Jul 2019 15:39:26 +0200 | |
changeset 5054 | b01df68cba0a |
parent 4923 | 6e6fea06fff6 |
child 5090 | 33d4825d883d |
permissions | -rw-r--r-- |
1404 | 1 |
" |
2 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
3 |
All Rights Reserved |
|
4 |
||
5 |
This software is furnished under a license and may be used |
|
6 |
only in accordance with the terms of that license and with the |
|
7 |
inclusion of the above copyright notice. This software may not |
|
8 |
be provided or otherwise made available to, or used by, any |
|
9 |
other person. No title to or ownership of the software is |
|
10 |
hereby transferred. |
|
11 |
" |
|
1415
3ef6a2c42611
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
1404
diff
changeset
|
12 |
"{ Package: 'stx:libbasic2' }" |
1404 | 13 |
|
3533 | 14 |
"{ NameSpace: Smalltalk }" |
15 |
||
1404 | 16 |
CharacterArray variableLongSubclass:#FourByteString |
17 |
instanceVariableNames:'' |
|
18 |
classVariableNames:'' |
|
19 |
poolDictionaries:'' |
|
20 |
category:'Collections-Text' |
|
21 |
! |
|
22 |
||
23 |
!FourByteString class methodsFor:'documentation'! |
|
24 |
||
25 |
copyright |
|
26 |
" |
|
27 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
28 |
All Rights Reserved |
|
29 |
||
30 |
This software is furnished under a license and may be used |
|
31 |
only in accordance with the terms of that license and with the |
|
32 |
inclusion of the above copyright notice. This software may not |
|
33 |
be provided or otherwise made available to, or used by, any |
|
34 |
other person. No title to or ownership of the software is |
|
35 |
hereby transferred. |
|
36 |
" |
|
37 |
! |
|
38 |
||
39 |
documentation |
|
40 |
" |
|
41 |
FourByteStrings are like strings, but storing 32bits per character. |
|
42 |
The integration of them into the system is not completed .... |
|
43 |
||
44 |
[author:] |
|
45 |
Claus Gittinger |
|
46 |
||
47 |
[see also:] |
|
48 |
Text TwoByteString UnicodeEncodedString |
|
49 |
StringCollection |
|
50 |
" |
|
51 |
! ! |
|
52 |
||
53 |
!FourByteString class methodsFor:'initialization'! |
|
54 |
||
55 |
initialize |
|
56 |
"initialize the class - private" |
|
57 |
||
58 |
self flags:(Behavior flagLongs) |
|
59 |
||
60 |
" |
|
61 |
FourByteString initialize |
|
62 |
" |
|
63 |
||
64 |
"Modified: 22.4.1996 / 16:14:14 / cg" |
|
65 |
! ! |
|
66 |
||
67 |
!FourByteString class methodsFor:'instance creation'! |
|
68 |
||
69 |
basicNew:anInteger |
|
70 |
"return a new empty string with anInteger characters" |
|
71 |
||
72 |
^ (super basicNew:anInteger) atAllPut:(Character space) |
|
73 |
||
74 |
"Modified: 26.2.1996 / 14:38:47 / cg" |
|
3533 | 75 |
! |
76 |
||
77 |
uninitializedNew:anInteger |
|
78 |
"return a new empty string with anInteger characters" |
|
79 |
||
80 |
^ super basicNew:anInteger |
|
81 |
||
82 |
" |
|
83 |
self uninitializedNew:10 |
|
84 |
" |
|
1404 | 85 |
! ! |
86 |
||
87 |
!FourByteString methodsFor:'accessing'! |
|
88 |
||
89 |
basicAt:index |
|
90 |
"return the character at position index, an Integer |
|
91 |
- reimplemented here since we return 32-bit characters" |
|
92 |
||
93 |
|val| |
|
94 |
||
95 |
val := super basicAt:index. |
|
96 |
^ Character value:val |
|
97 |
! |
|
98 |
||
99 |
basicAt:index put:aCharacter |
|
100 |
"store the argument, aCharacter at position index, an Integer. |
|
101 |
Returns aCharacter (sigh). |
|
102 |
- reimplemented here since we store 32-bit characters" |
|
103 |
||
104 |
|val| |
|
105 |
||
1415
3ef6a2c42611
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
1404
diff
changeset
|
106 |
val := aCharacter codePoint. |
1404 | 107 |
super basicAt:index put:val. |
108 |
^ aCharacter |
|
109 |
! ! |
|
110 |
||
4920 | 111 |
!FourByteString methodsFor:'filling and replacing'! |
112 |
||
113 |
from:start to:stop put:aCharacter |
|
114 |
"fill part of the receiver with aCharacter. |
|
115 |
- reimplemented here for speed" |
|
116 |
||
117 |
%{ /* NOCONTEXT */ |
|
118 |
||
4923 | 119 |
REGISTER int count; |
4920 | 120 |
|
121 |
// fprintf(stderr, "fill32...\n"); |
|
122 |
if (__isCharacter(aCharacter) |
|
123 |
&& __bothSmallInteger(start, stop)) { |
|
4923 | 124 |
OBJ cls; |
125 |
int len, index1, index2; |
|
126 |
REGISTER unsigned int *dstp; |
|
127 |
||
4920 | 128 |
len = __unicode32StringSize(self); |
129 |
index1 = __intVal(start); |
|
130 |
index2 = __intVal(stop); |
|
131 |
||
132 |
dstp = __unicode32StringVal(self) + index1 - 1; |
|
4923 | 133 |
if (((cls = __qClass(self)) == Unicode32String) |
134 |
|| (__OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) == 0)) { |
|
135 |
REGISTER unsigned charValue = __intVal(__characterVal(aCharacter)); |
|
4920 | 136 |
|
4923 | 137 |
if (((unsigned)charValue <= 0x0FFFFFFF) |
138 |
&& (index1 <= index2) |
|
139 |
&& (index1 > 0) |
|
140 |
&& (index2 <= len)) { |
|
141 |
count = index2 - index1 + 1; |
|
4920 | 142 |
|
143 |
#if (__POINTER_SIZE__ == 8) |
|
4923 | 144 |
{ |
145 |
unsigned INT v2; |
|
146 |
||
147 |
v2 = (charValue << 32) | charValue; |
|
4920 | 148 |
|
4923 | 149 |
/* fill unaligned part */ |
150 |
while ((count > 0) && (((unsigned INT)dstp & 7) != 0)) { |
|
151 |
*dstp++ = charValue; |
|
152 |
count--; |
|
153 |
} |
|
4920 | 154 |
|
4923 | 155 |
/* fill aligned part */ |
156 |
// TODO: use SSE instructions, if possible |
|
157 |
while (count >= 16) { |
|
158 |
((unsigned INT *)dstp)[0] = v2; |
|
159 |
((unsigned INT *)dstp)[1] = v2; |
|
160 |
((unsigned INT *)dstp)[2] = v2; |
|
161 |
((unsigned INT *)dstp)[3] = v2; |
|
162 |
((unsigned INT *)dstp)[4] = v2; |
|
163 |
((unsigned INT *)dstp)[5] = v2; |
|
164 |
((unsigned INT *)dstp)[6] = v2; |
|
165 |
((unsigned INT *)dstp)[7] = v2; |
|
166 |
dstp += 16; |
|
167 |
count -= 16; |
|
168 |
} |
|
169 |
if (count >= 8) { |
|
170 |
((unsigned INT *)dstp)[0] = v2; |
|
171 |
((unsigned INT *)dstp)[1] = v2; |
|
172 |
((unsigned INT *)dstp)[2] = v2; |
|
173 |
((unsigned INT *)dstp)[3] = v2; |
|
174 |
dstp += 8; |
|
175 |
count -= 8; |
|
176 |
} |
|
177 |
if (count >= 4) { |
|
178 |
((unsigned INT *)dstp)[0] = v2; |
|
179 |
((unsigned INT *)dstp)[1] = v2; |
|
180 |
dstp += 4; |
|
181 |
count -= 4; |
|
182 |
} |
|
183 |
if (count >= 2) { |
|
184 |
((unsigned INT *)dstp)[0] = v2; |
|
185 |
dstp += 2; |
|
186 |
count -= 2; |
|
187 |
} |
|
188 |
if (count > 0) { |
|
189 |
*dstp = charValue; |
|
190 |
} |
|
4922 | 191 |
} |
4923 | 192 |
#else // not 64bit |
4920 | 193 |
while (count >= 8) { |
4923 | 194 |
dstp[0] = dstp[1] = dstp[2] = dstp[3] = |
195 |
dstp[4] = dstp[5] = dstp[6] = dstp[7] = charValue; |
|
4920 | 196 |
dstp += 8; |
197 |
count -= 8; |
|
198 |
} |
|
4923 | 199 |
while (count--) { |
200 |
*dstp++ = charValue; |
|
4922 | 201 |
} |
4923 | 202 |
#endif /* 64bit */ |
203 |
RETURN (self); |
|
4920 | 204 |
} |
205 |
} |
|
206 |
} |
|
207 |
%}. |
|
208 |
" |
|
209 |
fall back in case of non-integer index or out-of-bound index/value; |
|
210 |
will eventually lead to an out-of-bound signal raise |
|
211 |
" |
|
212 |
^ super from:start to:stop put:aCharacter |
|
213 |
||
214 |
" |
|
215 |
(Unicode16String new:10) from:1 to:10 put:$a |
|
216 |
(Unicode16String new:20) from:10 to:20 put:$b |
|
217 |
(Unicode16String new:20) from:1 to:10 put:$c |
|
218 |
(Unicode16String new:20) from:1 to:10 put:$c |
|
219 |
(Unicode16String new:100) from:2 to:99 put:$c |
|
220 |
||
221 |
(Unicode16String new:10) from:0 to:9 put:$a |
|
222 |
(Unicode16String new:10) from:1 to:11 put:$a |
|
223 |
" |
|
224 |
||
225 |
"Created: / 26-03-2019 / 11:30:51 / Claus Gittinger" |
|
4923 | 226 |
"Modified: / 27-03-2019 / 14:10:18 / Claus Gittinger" |
4922 | 227 |
! |
228 |
||
229 |
replaceFrom:start to:stop with:aString startingAt:repStart |
|
230 |
"replace the characters starting at index start, anInteger and ending |
|
231 |
at stop, anInteger with characters from aString starting at repStart. |
|
232 |
Return the receiver. |
|
233 |
||
234 |
- reimplemented here for speed" |
|
235 |
||
236 |
%{ /* NOCONTEXT */ |
|
237 |
||
238 |
#ifndef NO_PRIM_STRING |
|
239 |
if (__bothSmallInteger(start, stop)) { |
|
240 |
int len; |
|
241 |
int index1 = __intVal(start); |
|
242 |
int index2 = __intVal(stop); |
|
243 |
int count = index2 - index1 + 1; |
|
244 |
||
245 |
if (count <= 0) { |
|
246 |
RETURN (self); |
|
247 |
} |
|
248 |
len = __unicode32StringSize(self); |
|
249 |
if ((index2 <= len) && (index1 > 0)) { |
|
250 |
int repIndex = __intVal(repStart); |
|
4923 | 251 |
OBJ cls; |
252 |
||
253 |
if (((cls = __qClass(self)) == Unicode32String) |
|
254 |
|| (__OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) == 0)) { |
|
255 |
if (__isStringLike(aString)) { |
|
256 |
int repLen = __stringSize(aString); |
|
257 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
258 |
REGISTER unsigned char *srcp = __stringVal(aString) + repIndex - 1; |
|
259 |
REGISTER unsigned int *dstp = __unicode32StringVal(self) + index1 - 1; |
|
4922 | 260 |
|
261 |
while (count-- > 0) { |
|
262 |
*dstp++ = *srcp++; |
|
263 |
} |
|
4923 | 264 |
RETURN (self); |
4922 | 265 |
} |
4923 | 266 |
} else if (__isTwoByteString(aString) || __isUnicode16String(aString)) { |
267 |
int repLen = __twoByteStringSize(aString); |
|
268 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
269 |
REGISTER unsigned short *srcp = __twoByteStringVal(aString) + repIndex - 1; |
|
270 |
REGISTER unsigned int *dstp = __unicode32StringVal(self) + index1 - 1; |
|
271 |
||
272 |
while (count-- > 0) { |
|
273 |
*dstp++ = *srcp++; |
|
274 |
} |
|
275 |
RETURN (self); |
|
276 |
} |
|
277 |
} else if (__isUnicode32String(aString)) { |
|
278 |
int repLen = __unicode32StringSize(aString); |
|
279 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
280 |
REGISTER unsigned int *srcp = __unicode32StringVal(aString) + repIndex - 1; |
|
281 |
REGISTER unsigned int *dstp = __unicode32StringVal(self) + index1 - 1; |
|
282 |
||
283 |
if (aString == self) { |
|
284 |
/* take care of overlapping copy */ |
|
285 |
memmove(dstp, srcp, count*sizeof(int)); |
|
286 |
RETURN (self); |
|
287 |
} |
|
288 |
if (count > 5) { |
|
289 |
memcpy(dstp, srcp, count*sizeof(int)); |
|
290 |
} else { |
|
291 |
while (count-- > 0) { |
|
292 |
*dstp++ = *srcp++; |
|
293 |
} |
|
294 |
} |
|
295 |
RETURN (self); |
|
296 |
} |
|
4922 | 297 |
} |
298 |
} |
|
299 |
} |
|
300 |
} |
|
301 |
#endif |
|
302 |
%}. |
|
303 |
"/ arrive here if any index arg is out o range, or the source is neither a string, |
|
304 |
"/ nor a two-byte string. |
|
305 |
^ super replaceFrom:start to:stop with:aString startingAt:repStart |
|
306 |
||
307 |
" |
|
308 |
'hello world' asUnicode32String replaceFrom:1 to:5 with:'123456' startingAt:2 |
|
309 |
'hello world' asUnicode32String replaceFrom:1 to:5 with:'123456' asUnicode16String startingAt:2 |
|
310 |
'hello world' asUnicode32String replaceFrom:1 to:5 with:'123456' asUnicode32String startingAt:2 |
|
311 |
'hello world' asUnicode32String replaceFrom:1 to:0 with:'123456' startingAt:2 |
|
312 |
'hello' asUnicode32String replaceFrom:1 to:6 with:'123456' startingAt:2 |
|
313 |
'hello world' asUnicode32String replaceFrom:1 to:1 with:'123456' startingAt:2 |
|
314 |
" |
|
315 |
||
316 |
"Created: / 26-03-2019 / 12:10:26 / Claus Gittinger" |
|
4923 | 317 |
"Modified: / 27-03-2019 / 14:11:27 / Claus Gittinger" |
4920 | 318 |
! ! |
319 |
||
1404 | 320 |
!FourByteString methodsFor:'queries'! |
321 |
||
322 |
bitsPerCharacter |
|
323 |
"return the number of bits each character has. |
|
324 |
Here, 32 is returned (storing quad byte characters)." |
|
325 |
||
326 |
^ 32 |
|
2865 | 327 |
! |
328 |
||
3826 | 329 |
bytesPerCharacter |
330 |
"return the number of bytes each character has. |
|
331 |
Here, 4 is returned (storing quad byte characters)." |
|
332 |
||
333 |
^ 4 |
|
334 |
! |
|
335 |
||
2865 | 336 |
isWideString |
3805 | 337 |
"true if I require more than one byte per character" |
338 |
||
2865 | 339 |
^ true |
4923 | 340 |
! |
341 |
||
342 |
occurrencesOf:aCharacter |
|
343 |
"count the occurrences of the argument, aCharacter in myself |
|
344 |
- reimplemented here for speed" |
|
345 |
||
346 |
%{ /* NOCONTEXT */ |
|
347 |
||
348 |
REGISTER unsigned charValue; |
|
349 |
REGISTER INT count, limit; |
|
350 |
||
351 |
if (__isCharacter(aCharacter)) { |
|
352 |
limit = __unicode32StringSize(self); |
|
353 |
count = 0; |
|
354 |
charValue = __intVal(__characterVal(aCharacter)); |
|
355 |
if (charValue <= 0x3FFFFFFF) { |
|
356 |
OBJ cls; |
|
357 |
REGISTER unsigned int* cp; |
|
358 |
||
359 |
cp = __unicode32StringVal(self); |
|
360 |
if (((cls = __qClass(self)) == Unicode32String) |
|
361 |
|| (__OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) == 0)) { |
|
362 |
/* loop unrolled and software-pipelined |
|
363 |
* (gives 30-40% speedup on Intel-DUO using borland bcc55) |
|
364 |
*/ |
|
365 |
while (limit >= 4) { |
|
366 |
register unsigned int c1, c2; |
|
367 |
||
368 |
c1 = cp[0]; |
|
369 |
limit -= 4; |
|
370 |
c2 = cp[1]; |
|
371 |
if (c1 == charValue) count++; |
|
372 |
c1 = cp[2]; |
|
373 |
if (c2 == charValue) count++; |
|
374 |
c2 = cp[3]; |
|
375 |
if (c1 == charValue) count++; |
|
376 |
cp += 4; |
|
377 |
if (c2 == charValue) count++; |
|
378 |
} |
|
379 |
while (limit > 0) { |
|
380 |
register unsigned int c; |
|
381 |
||
382 |
c = cp[0]; |
|
383 |
limit--; |
|
384 |
if (c == charValue) count++; |
|
385 |
cp++; |
|
386 |
} |
|
387 |
} |
|
388 |
RETURN ( __mkSmallInteger(count) ); |
|
389 |
} |
|
390 |
} |
|
391 |
%}. |
|
392 |
^ super occurrencesOf:aCharacter |
|
393 |
||
394 |
" |
|
395 |
'hello world' asUnicode32String occurrencesOf:$a |
|
396 |
'hello world' asUnicode32String occurrencesOf:$w |
|
397 |
'hello world' asUnicode32String occurrencesOf:$l |
|
398 |
'hello world' asUnicode32String occurrencesOf:$x |
|
399 |
'hello world' asUnicode32String occurrencesOf:1 |
|
400 |
||
401 |
Time millisecondsToRun:[ |
|
402 |
|s| |
|
403 |
||
404 |
s := 'abcdefghijklmn' asUnicode32String. |
|
405 |
1000000 timesRepeat:[ s occurrencesOf:$x ] |
|
406 |
]. 60 60 60 70 (untuned: 690 760 670) |
|
407 |
" |
|
408 |
||
409 |
"Created: / 27-03-2019 / 14:13:43 / Claus Gittinger" |
|
1404 | 410 |
! ! |
411 |
||
4511 | 412 |
!FourByteString methodsFor:'testing'! |
413 |
||
4512 | 414 |
isSingleByteCollection |
415 |
"return true, if the receiver has access methods for bytes; |
|
416 |
i.e. #at: and #at:put: accesses a byte and are equivalent to #byteAt: and byteAt:put: |
|
417 |
and #replaceFrom:to: is equivalent to #replaceBytesFrom:to:. |
|
418 |
false is returned here since at: returns 4-byte characters and not bytes |
|
419 |
- the method is redefined from UninterpretedBytes." |
|
420 |
||
4511 | 421 |
^ false |
422 |
||
423 |
"Created: / 30-08-2017 / 23:31:02 / cg" |
|
424 |
! ! |
|
425 |
||
1404 | 426 |
!FourByteString class methodsFor:'documentation'! |
427 |
||
428 |
version |
|
3586 | 429 |
^ '$Header$' |
4922 | 430 |
! |
431 |
||
432 |
version_CVS |
|
433 |
^ '$Header$' |
|
1404 | 434 |
! ! |
435 |
||
3533 | 436 |
|
1404 | 437 |
FourByteString initialize! |