author | Claus Gittinger <cg@exept.de> |
Tue, 26 Mar 2019 12:12:05 +0100 | |
changeset 4922 | ff7afd151379 |
parent 4920 | 8c94a1a0ed25 |
child 4923 | 6e6fea06fff6 |
permissions | -rw-r--r-- |
1404 | 1 |
" |
2 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
3 |
All Rights Reserved |
|
4 |
||
5 |
This software is furnished under a license and may be used |
|
6 |
only in accordance with the terms of that license and with the |
|
7 |
inclusion of the above copyright notice. This software may not |
|
8 |
be provided or otherwise made available to, or used by, any |
|
9 |
other person. No title to or ownership of the software is |
|
10 |
hereby transferred. |
|
11 |
" |
|
1415
3ef6a2c42611
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
1404
diff
changeset
|
12 |
"{ Package: 'stx:libbasic2' }" |
1404 | 13 |
|
3533 | 14 |
"{ NameSpace: Smalltalk }" |
15 |
||
1404 | 16 |
CharacterArray variableLongSubclass:#FourByteString |
17 |
instanceVariableNames:'' |
|
18 |
classVariableNames:'' |
|
19 |
poolDictionaries:'' |
|
20 |
category:'Collections-Text' |
|
21 |
! |
|
22 |
||
23 |
!FourByteString class methodsFor:'documentation'! |
|
24 |
||
25 |
copyright |
|
26 |
" |
|
27 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
28 |
All Rights Reserved |
|
29 |
||
30 |
This software is furnished under a license and may be used |
|
31 |
only in accordance with the terms of that license and with the |
|
32 |
inclusion of the above copyright notice. This software may not |
|
33 |
be provided or otherwise made available to, or used by, any |
|
34 |
other person. No title to or ownership of the software is |
|
35 |
hereby transferred. |
|
36 |
" |
|
37 |
! |
|
38 |
||
39 |
documentation |
|
40 |
" |
|
41 |
FourByteStrings are like strings, but storing 32bits per character. |
|
42 |
The integration of them into the system is not completed .... |
|
43 |
||
44 |
[author:] |
|
45 |
Claus Gittinger |
|
46 |
||
47 |
[see also:] |
|
48 |
Text TwoByteString UnicodeEncodedString |
|
49 |
StringCollection |
|
50 |
" |
|
51 |
! ! |
|
52 |
||
53 |
!FourByteString class methodsFor:'initialization'! |
|
54 |
||
55 |
initialize |
|
56 |
"initialize the class - private" |
|
57 |
||
58 |
self flags:(Behavior flagLongs) |
|
59 |
||
60 |
" |
|
61 |
FourByteString initialize |
|
62 |
" |
|
63 |
||
64 |
"Modified: 22.4.1996 / 16:14:14 / cg" |
|
65 |
! ! |
|
66 |
||
67 |
!FourByteString class methodsFor:'instance creation'! |
|
68 |
||
69 |
basicNew:anInteger |
|
70 |
"return a new empty string with anInteger characters" |
|
71 |
||
72 |
^ (super basicNew:anInteger) atAllPut:(Character space) |
|
73 |
||
74 |
"Modified: 26.2.1996 / 14:38:47 / cg" |
|
3533 | 75 |
! |
76 |
||
77 |
uninitializedNew:anInteger |
|
78 |
"return a new empty string with anInteger characters" |
|
79 |
||
80 |
^ super basicNew:anInteger |
|
81 |
||
82 |
" |
|
83 |
self uninitializedNew:10 |
|
84 |
" |
|
1404 | 85 |
! ! |
86 |
||
87 |
!FourByteString methodsFor:'accessing'! |
|
88 |
||
89 |
basicAt:index |
|
90 |
"return the character at position index, an Integer |
|
91 |
- reimplemented here since we return 32-bit characters" |
|
92 |
||
93 |
|val| |
|
94 |
||
95 |
val := super basicAt:index. |
|
96 |
^ Character value:val |
|
97 |
! |
|
98 |
||
99 |
basicAt:index put:aCharacter |
|
100 |
"store the argument, aCharacter at position index, an Integer. |
|
101 |
Returns aCharacter (sigh). |
|
102 |
- reimplemented here since we store 32-bit characters" |
|
103 |
||
104 |
|val| |
|
105 |
||
1415
3ef6a2c42611
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
1404
diff
changeset
|
106 |
val := aCharacter codePoint. |
1404 | 107 |
super basicAt:index put:val. |
108 |
^ aCharacter |
|
109 |
! ! |
|
110 |
||
4920 | 111 |
!FourByteString methodsFor:'filling and replacing'! |
112 |
||
113 |
from:start to:stop put:aCharacter |
|
114 |
"fill part of the receiver with aCharacter. |
|
115 |
- reimplemented here for speed" |
|
116 |
||
117 |
%{ /* NOCONTEXT */ |
|
118 |
||
119 |
REGISTER unsigned int *dstp; |
|
120 |
REGISTER int count, charValue; |
|
121 |
int len, index1, index2; |
|
122 |
OBJ cls; |
|
123 |
||
124 |
// fprintf(stderr, "fill32...\n"); |
|
125 |
if (__isCharacter(aCharacter) |
|
126 |
&& __bothSmallInteger(start, stop)) { |
|
127 |
len = __unicode32StringSize(self); |
|
128 |
index1 = __intVal(start); |
|
129 |
index2 = __intVal(stop); |
|
130 |
||
131 |
dstp = __unicode32StringVal(self) + index1 - 1; |
|
132 |
if ((cls = __qClass(self)) != @global(Unicode32String)) { |
|
133 |
int nInst; |
|
134 |
||
135 |
nInst = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
|
136 |
dstp += nInst; |
|
137 |
len -= nInst; |
|
138 |
} |
|
139 |
||
140 |
charValue = __intVal(__characterVal(aCharacter)); |
|
141 |
if (((unsigned)charValue <= 0x0FFFFFFF) |
|
142 |
&& (index1 <= index2) |
|
4922 | 143 |
&& (index1 > 0) |
144 |
&& (index2 <= len)) { |
|
145 |
count = index2 - index1 + 1; |
|
4920 | 146 |
|
147 |
#if (__POINTER_SIZE__ == 8) |
|
4922 | 148 |
{ |
149 |
INT v2; |
|
4920 | 150 |
|
4922 | 151 |
v2 = (charValue << 32) | charValue; |
4920 | 152 |
|
4922 | 153 |
/* fill unaligned part */ |
154 |
while ((count > 0) && (((unsigned INT)dstp & 7) != 0)) { |
|
155 |
*dstp++ = charValue; |
|
156 |
count--; |
|
157 |
} |
|
4920 | 158 |
|
4922 | 159 |
/* fill aligned part */ |
4920 | 160 |
while (count >= 8) { |
4922 | 161 |
((unsigned INT *)dstp)[0] = v2; |
162 |
((unsigned INT *)dstp)[1] = v2; |
|
163 |
((unsigned INT *)dstp)[2] = v2; |
|
164 |
((unsigned INT *)dstp)[3] = v2; |
|
4920 | 165 |
dstp += 8; |
166 |
count -= 8; |
|
167 |
} |
|
4922 | 168 |
while (count >= 2) { |
169 |
((unsigned INT *)dstp)[0] = v2; |
|
170 |
dstp += 2; |
|
171 |
count -= 2; |
|
172 |
} |
|
173 |
||
174 |
/* fill rest */ |
|
175 |
while (count > 0) { |
|
4920 | 176 |
*dstp++ = charValue; |
4922 | 177 |
count--; |
4920 | 178 |
} |
179 |
} |
|
4922 | 180 |
#else // not 64bit |
181 |
while (count >= 8) { |
|
182 |
dstp[0] = dstp[1] = dstp[2] = dstp[3] = |
|
183 |
dstp[4] = dstp[5] = dstp[6] = dstp[7] = charValue; |
|
184 |
dstp += 8; |
|
185 |
count -= 8; |
|
186 |
} |
|
187 |
while (count--) { |
|
188 |
*dstp++ = charValue; |
|
189 |
} |
|
190 |
#endif /* 64bit */ |
|
191 |
RETURN (self); |
|
4920 | 192 |
} |
193 |
} |
|
194 |
%}. |
|
195 |
" |
|
196 |
fall back in case of non-integer index or out-of-bound index/value; |
|
197 |
will eventually lead to an out-of-bound signal raise |
|
198 |
" |
|
199 |
^ super from:start to:stop put:aCharacter |
|
200 |
||
201 |
" |
|
202 |
(Unicode16String new:10) from:1 to:10 put:$a |
|
203 |
(Unicode16String new:20) from:10 to:20 put:$b |
|
204 |
(Unicode16String new:20) from:1 to:10 put:$c |
|
205 |
(Unicode16String new:20) from:1 to:10 put:$c |
|
206 |
(Unicode16String new:100) from:2 to:99 put:$c |
|
207 |
||
208 |
(Unicode16String new:10) from:0 to:9 put:$a |
|
209 |
(Unicode16String new:10) from:1 to:11 put:$a |
|
210 |
" |
|
211 |
||
212 |
"Created: / 26-03-2019 / 11:30:51 / Claus Gittinger" |
|
4922 | 213 |
! |
214 |
||
215 |
replaceFrom:start to:stop with:aString startingAt:repStart |
|
216 |
"replace the characters starting at index start, anInteger and ending |
|
217 |
at stop, anInteger with characters from aString starting at repStart. |
|
218 |
Return the receiver. |
|
219 |
||
220 |
- reimplemented here for speed" |
|
221 |
||
222 |
%{ /* NOCONTEXT */ |
|
223 |
||
224 |
#ifndef NO_PRIM_STRING |
|
225 |
if (__bothSmallInteger(start, stop)) { |
|
226 |
int len; |
|
227 |
int index1 = __intVal(start); |
|
228 |
int index2 = __intVal(stop); |
|
229 |
int count = index2 - index1 + 1; |
|
230 |
||
231 |
if (count <= 0) { |
|
232 |
RETURN (self); |
|
233 |
} |
|
234 |
len = __unicode32StringSize(self); |
|
235 |
if ((index2 <= len) && (index1 > 0)) { |
|
236 |
int repIndex = __intVal(repStart); |
|
237 |
||
238 |
if (__isStringLike(aString)) { |
|
239 |
int repLen = __stringSize(aString); |
|
240 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
241 |
REGISTER unsigned char *srcp = __stringVal(aString) + repIndex - 1; |
|
242 |
REGISTER unsigned int *dstp = __unicode32StringVal(self) + index1 - 1; |
|
243 |
||
244 |
while (count-- > 0) { |
|
245 |
*dstp++ = *srcp++; |
|
246 |
} |
|
247 |
RETURN (self); |
|
248 |
} |
|
249 |
} else if (__isTwoByteString(aString) || __isUnicode16String(aString)) { |
|
250 |
int repLen = __twoByteStringSize(aString); |
|
251 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
252 |
REGISTER unsigned short *srcp = __twoByteStringVal(aString) + repIndex - 1; |
|
253 |
REGISTER unsigned int *dstp = __unicode32StringVal(self) + index1 - 1; |
|
254 |
||
255 |
while (count-- > 0) { |
|
256 |
*dstp++ = *srcp++; |
|
257 |
} |
|
258 |
RETURN (self); |
|
259 |
} |
|
260 |
} else if (__isUnicode32String(aString)) { |
|
261 |
int repLen = __unicode32StringSize(aString); |
|
262 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
263 |
REGISTER unsigned int *srcp = __unicode32StringVal(aString) + repIndex - 1; |
|
264 |
REGISTER unsigned int *dstp = __unicode32StringVal(self) + index1 - 1; |
|
265 |
||
266 |
if (aString == self) { |
|
267 |
/* take care of overlapping copy */ |
|
268 |
memmove(dstp, srcp, count*sizeof(int)); |
|
269 |
RETURN (self); |
|
270 |
} |
|
271 |
if (count > 5) { |
|
272 |
memcpy(dstp, srcp, count*sizeof(int)); |
|
273 |
} else { |
|
274 |
while (count-- > 0) { |
|
275 |
*dstp++ = *srcp++; |
|
276 |
} |
|
277 |
} |
|
278 |
RETURN (self); |
|
279 |
} |
|
280 |
} |
|
281 |
} |
|
282 |
} |
|
283 |
#endif |
|
284 |
%}. |
|
285 |
"/ arrive here if any index arg is out o range, or the source is neither a string, |
|
286 |
"/ nor a two-byte string. |
|
287 |
^ super replaceFrom:start to:stop with:aString startingAt:repStart |
|
288 |
||
289 |
" |
|
290 |
'hello world' asUnicode32String replaceFrom:1 to:5 with:'123456' startingAt:2 |
|
291 |
'hello world' asUnicode32String replaceFrom:1 to:5 with:'123456' asUnicode16String startingAt:2 |
|
292 |
'hello world' asUnicode32String replaceFrom:1 to:5 with:'123456' asUnicode32String startingAt:2 |
|
293 |
'hello world' asUnicode32String replaceFrom:1 to:0 with:'123456' startingAt:2 |
|
294 |
'hello' asUnicode32String replaceFrom:1 to:6 with:'123456' startingAt:2 |
|
295 |
'hello world' asUnicode32String replaceFrom:1 to:1 with:'123456' startingAt:2 |
|
296 |
" |
|
297 |
||
298 |
"Created: / 26-03-2019 / 12:10:26 / Claus Gittinger" |
|
4920 | 299 |
! ! |
300 |
||
1404 | 301 |
!FourByteString methodsFor:'queries'! |
302 |
||
303 |
bitsPerCharacter |
|
304 |
"return the number of bits each character has. |
|
305 |
Here, 32 is returned (storing quad byte characters)." |
|
306 |
||
307 |
^ 32 |
|
2865 | 308 |
! |
309 |
||
3826 | 310 |
bytesPerCharacter |
311 |
"return the number of bytes each character has. |
|
312 |
Here, 4 is returned (storing quad byte characters)." |
|
313 |
||
314 |
^ 4 |
|
315 |
! |
|
316 |
||
2865 | 317 |
isWideString |
3805 | 318 |
"true if I require more than one byte per character" |
319 |
||
2865 | 320 |
^ true |
1404 | 321 |
! ! |
322 |
||
4511 | 323 |
!FourByteString methodsFor:'testing'! |
324 |
||
4512 | 325 |
isSingleByteCollection |
326 |
"return true, if the receiver has access methods for bytes; |
|
327 |
i.e. #at: and #at:put: accesses a byte and are equivalent to #byteAt: and byteAt:put: |
|
328 |
and #replaceFrom:to: is equivalent to #replaceBytesFrom:to:. |
|
329 |
false is returned here since at: returns 4-byte characters and not bytes |
|
330 |
- the method is redefined from UninterpretedBytes." |
|
331 |
||
4511 | 332 |
^ false |
333 |
||
334 |
"Created: / 30-08-2017 / 23:31:02 / cg" |
|
335 |
! ! |
|
336 |
||
1404 | 337 |
!FourByteString class methodsFor:'documentation'! |
338 |
||
339 |
version |
|
3586 | 340 |
^ '$Header$' |
4922 | 341 |
! |
342 |
||
343 |
version_CVS |
|
344 |
^ '$Header$' |
|
1404 | 345 |
! ! |
346 |
||
3533 | 347 |
|
1404 | 348 |
FourByteString initialize! |