author | Merge Script |
Sun, 07 Jun 2015 06:38:49 +0200 | |
branch | jv |
changeset 18457 | 214d760f8247 |
parent 18120 | e3a375d5f6a8 |
child 18608 | 7d521f25267c |
permissions | -rw-r--r-- |
17621 | 1 |
"{ Encoding: utf8 }" |
2 |
||
1 | 3 |
" |
4 |
COPYRIGHT (c) 1993 by Claus Gittinger |
|
235 | 5 |
All Rights Reserved |
1 | 6 |
|
7 |
This software is furnished under a license and may be used |
|
8 |
only in accordance with the terms of that license and with the |
|
9 |
inclusion of the above copyright notice. This software may not |
|
10 |
be provided or otherwise made available to, or used by, any |
|
11 |
other person. No title to or ownership of the software is |
|
12 |
hereby transferred. |
|
13 |
" |
|
10223
761e2a050b69
twoByteString moved (req'd in VM)
Claus Gittinger <cg@exept.de>
parents:
8094
diff
changeset
|
14 |
"{ Package: 'stx:libbasic' }" |
8094
d05f69bd0097
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
5761
diff
changeset
|
15 |
|
17621 | 16 |
"{ NameSpace: Smalltalk }" |
17 |
||
5761 | 18 |
CharacterArray variableWordSubclass:#TwoByteString |
992
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
19 |
instanceVariableNames:'' |
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
20 |
classVariableNames:'' |
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
21 |
poolDictionaries:'' |
f456f8f7d421
JIS decode now in CharacterArray
Claus Gittinger <cg@exept.de>
parents:
631
diff
changeset
|
22 |
category:'Collections-Text' |
1 | 23 |
! |
24 |
||
89 | 25 |
!TwoByteString class methodsFor:'documentation'! |
26 |
||
27 |
copyright |
|
28 |
" |
|
29 |
COPYRIGHT (c) 1993 by Claus Gittinger |
|
235 | 30 |
All Rights Reserved |
1 | 31 |
|
89 | 32 |
This software is furnished under a license and may be used |
33 |
only in accordance with the terms of that license and with the |
|
34 |
inclusion of the above copyright notice. This software may not |
|
35 |
be provided or otherwise made available to, or used by, any |
|
36 |
other person. No title to or ownership of the software is |
|
37 |
hereby transferred. |
|
38 |
" |
|
39 |
! |
|
40 |
||
41 |
documentation |
|
42 |
" |
|
43 |
TwoByteStrings are like strings, but storing 16bits per character. |
|
44 |
The integration of them into the system is not completed .... |
|
1290 | 45 |
|
46 |
[author:] |
|
47 |
Claus Gittinger |
|
1309 | 48 |
|
49 |
[see also:] |
|
1382 | 50 |
Text JISEncodedString |
1309 | 51 |
StringCollection |
89 | 52 |
" |
1214 | 53 |
! ! |
54 |
||
55 |
!TwoByteString class methodsFor:'initialization'! |
|
996 | 56 |
|
57 |
initialize |
|
1253 | 58 |
"initialize the class - private" |
59 |
||
996 | 60 |
self flags:(Behavior flagWords) |
61 |
||
62 |
" |
|
63 |
TwoByteString initialize |
|
64 |
" |
|
1214 | 65 |
|
1253 | 66 |
"Modified: 22.4.1996 / 16:14:14 / cg" |
89 | 67 |
! ! |
1 | 68 |
|
69 |
!TwoByteString class methodsFor:'instance creation'! |
|
70 |
||
71 |
basicNew:anInteger |
|
72 |
"return a new empty string with anInteger characters" |
|
73 |
||
1024 | 74 |
^ (super basicNew:anInteger) atAllPut:(Character space) |
75 |
||
76 |
"Modified: 26.2.1996 / 14:38:47 / cg" |
|
17621 | 77 |
! |
78 |
||
79 |
uninitializedNew:anInteger |
|
80 |
"return a new empty string with anInteger characters" |
|
81 |
||
82 |
^ super basicNew:anInteger |
|
83 |
||
84 |
" |
|
85 |
self uninitializedNew:10 |
|
86 |
" |
|
1 | 87 |
! ! |
88 |
||
89 |
!TwoByteString methodsFor:'accessing'! |
|
90 |
||
91 |
basicAt:index |
|
92 |
"return the character at position index, an Integer |
|
73 | 93 |
- reimplemented here since we return 16-bit characters" |
1 | 94 |
|
1024 | 95 |
|val| |
63 | 96 |
|
1024 | 97 |
val := super basicAt:index. |
63 | 98 |
^ Character value:val |
1024 | 99 |
|
100 |
"Modified: 26.2.1996 / 17:02:16 / cg" |
|
1 | 101 |
! |
102 |
||
103 |
basicAt:index put:aCharacter |
|
1230 | 104 |
"store the argument, aCharacter at position index, an Integer. |
105 |
Returns aCharacter (sigh). |
|
73 | 106 |
- reimplemented here since we store 16-bit characters" |
1 | 107 |
|
8094
d05f69bd0097
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
5761
diff
changeset
|
108 |
super basicAt:index put:aCharacter codePoint. |
63 | 109 |
^ aCharacter |
608 | 110 |
|
1230 | 111 |
"Modified: 19.4.1996 / 11:16:22 / cg" |
14123 | 112 |
! |
113 |
||
114 |
unsignedShortAt:index |
|
115 |
"return the short at position index, an Integer" |
|
116 |
||
117 |
^ super basicAt:index. |
|
1014 | 118 |
! ! |
119 |
||
17621 | 120 |
!TwoByteString methodsFor:'encoding'! |
121 |
||
122 |
utf8Encoded |
|
123 |
"Return my UTF-8 representation as a new String" |
|
124 |
||
125 |
self contains8BitCharacters ifTrue:[ |
|
126 |
^ self basicUtf8Encoded. |
|
127 |
]. |
|
128 |
||
129 |
^ self asSingleByteString. |
|
130 |
||
131 |
||
132 |
" |
|
133 |
'abcdef' asUnicode16String utf8Encoded |
|
134 |
'abcdefäöü' asUnicode16String utf8Encoded |
|
135 |
" |
|
136 |
! |
|
137 |
||
138 |
utf8EncodedOn:aStream |
|
139 |
"write to aStream in utf8 encoding" |
|
140 |
||
141 |
self contains8BitCharacters ifTrue:[ |
|
142 |
aStream nextPutAllUtf8:self. |
|
143 |
] ifFalse:[ |
|
144 |
|sz "{Class: SmallInteger}"| |
|
145 |
||
146 |
sz := self size. |
|
147 |
1 to:sz do:[:idx| |
|
148 |
aStream nextPut:(self basicAt:idx). |
|
149 |
]. |
|
150 |
]. |
|
151 |
||
152 |
" |
|
153 |
|s| |
|
154 |
s := '' writeStream. |
|
155 |
'abcdef' asUnicode16String utf8EncodedOn:s. |
|
156 |
s contents |
|
157 |
" |
|
158 |
||
159 |
" |
|
160 |
|s| |
|
161 |
s := '' writeStream. |
|
162 |
'abcdefäöü' asUnicode16String utf8EncodedOn:s. |
|
163 |
s contents |
|
164 |
" |
|
165 |
! ! |
|
166 |
||
16750 | 167 |
!TwoByteString methodsFor:'filling and replacing'! |
168 |
||
169 |
replaceFrom:start to:stop with:aString startingAt:repStart |
|
170 |
"replace the characters starting at index start, anInteger and ending |
|
171 |
at stop, anInteger with characters from aString starting at repStart. |
|
172 |
Return the receiver. |
|
173 |
||
174 |
- reimplemented here for speed" |
|
175 |
||
176 |
%{ /* NOCONTEXT */ |
|
177 |
||
178 |
#ifndef NO_PRIM_STRING |
|
179 |
if (__bothSmallInteger(start, stop)) { |
|
180 |
REGISTER int count; |
|
181 |
int len, index1, index2; |
|
182 |
||
183 |
index1 = __intVal(start); |
|
184 |
index2 = __intVal(stop); |
|
185 |
count = index2 - index1 + 1; |
|
186 |
if (count <= 0) { |
|
187 |
RETURN (self); |
|
188 |
} |
|
189 |
len = __twoByteStringSize(self); |
|
190 |
if ((index2 <= len) && (index1 > 0)) { |
|
191 |
int repLen, repIndex; |
|
192 |
||
193 |
repIndex = __intVal(repStart); |
|
194 |
||
195 |
if (__isStringLike(aString)) { |
|
196 |
repLen = __stringSize(aString); |
|
197 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
198 |
REGISTER unsigned char *srcp; |
|
199 |
REGISTER unsigned short *dstp; |
|
200 |
||
201 |
srcp = __stringVal(aString) + repIndex - 1; |
|
202 |
dstp = __twoByteStringVal(self) + index1 - 1; |
|
203 |
while (count-- > 0) { |
|
204 |
*dstp++ = *srcp++; |
|
205 |
} |
|
206 |
RETURN (self); |
|
207 |
} |
|
208 |
} else if (__isTwoByteString(aString) || __isUnicode16String(aString)) { |
|
209 |
repLen = __twoByteStringSize(aString); |
|
210 |
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) { |
|
211 |
REGISTER unsigned short *srcp; |
|
212 |
REGISTER unsigned short *dstp; |
|
213 |
||
214 |
srcp = __twoByteStringVal(aString) + repIndex - 1; |
|
215 |
dstp = __twoByteStringVal(self) + index1 - 1; |
|
216 |
if (aString == self) { |
|
217 |
/* take care of overlapping copy */ |
|
218 |
if (srcp < dstp) { |
|
219 |
/* must do a reverse copy */ |
|
220 |
srcp += count; |
|
221 |
dstp += count; |
|
222 |
while (count-- > 0) { |
|
223 |
*--dstp = *--srcp; |
|
224 |
} |
|
225 |
RETURN (self); |
|
226 |
} |
|
227 |
} |
|
228 |
if (count > 5) { |
|
229 |
memcpy(dstp, srcp, count*sizeof(short)); |
|
230 |
} else { |
|
231 |
while (count-- > 0) { |
|
232 |
*dstp++ = *srcp++; |
|
233 |
} |
|
234 |
} |
|
235 |
RETURN (self); |
|
236 |
} |
|
237 |
} |
|
238 |
} |
|
239 |
} |
|
240 |
#endif |
|
241 |
%}. |
|
242 |
"/ arrive here if any index arg is out o range, or the source is neither a string, |
|
243 |
"/ nor a two-byte string. |
|
244 |
^ super replaceFrom:start to:stop with:aString startingAt:repStart |
|
245 |
||
246 |
" |
|
247 |
'hello world' asUnicode16String replaceFrom:1 to:5 with:'123456' startingAt:2 |
|
248 |
'hello world' asUnicode16String replaceFrom:1 to:5 with:'123456' asUnicode16String startingAt:2 |
|
249 |
'hello world' asUnicode16String replaceFrom:1 to:0 with:'123456' startingAt:2 |
|
250 |
'hello' asUnicode16String replaceFrom:1 to:6 with:'123456' startingAt:2 |
|
251 |
'hello world' asUnicode16String replaceFrom:1 to:1 with:'123456' startingAt:2 |
|
252 |
" |
|
253 |
! ! |
|
254 |
||
608 | 255 |
!TwoByteString methodsFor:'queries'! |
256 |
||
1017 | 257 |
bitsPerCharacter |
1239 | 258 |
"return the number of bits each character has. |
259 |
Here, 16 is returned (storing double byte characters)." |
|
260 |
||
1017 | 261 |
^ 16 |
1239 | 262 |
|
263 |
"Modified: 20.4.1996 / 23:08:38 / cg" |
|
14557 | 264 |
! |
265 |
||
17621 | 266 |
contains8BitCharacters |
267 |
"return true, if the underlying string contains 8BitCharacters (or widers) |
|
268 |
(i.e. if it is non-ascii)" |
|
269 |
||
270 |
%{ /* NOCONTEXT */ |
|
271 |
||
272 |
REGISTER unsigned short *sp, *last; |
|
273 |
OBJ cls; |
|
274 |
||
275 |
sp = __twoByteStringVal(self); |
|
276 |
last = sp + __twoByteStringSize(self); |
|
277 |
if ((cls = __qClass(self)) != TwoByteString && cls != Unicode16String) { |
|
278 |
sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2; |
|
279 |
} |
|
280 |
#if __POINTER_SIZE__ == 8 |
|
281 |
/* assume sizeof(long) == 4 |
|
282 |
* if __POINTER_SIZE__ == 4 |
|
283 |
*/ |
|
284 |
if (sizeof(long) == 8) { |
|
285 |
while ((sp+4) <= last) { |
|
286 |
if (*(unsigned long *)sp & 0xFF80FF80FF80FF80) { |
|
287 |
RETURN ( true ); |
|
288 |
} |
|
289 |
sp += 4; |
|
290 |
} |
|
291 |
} |
|
292 |
#endif |
|
293 |
if (sizeof(int) == 4) { |
|
294 |
while ((sp+2) <= last) { |
|
295 |
if (*(unsigned int *)sp & 0xFF80FF80) { |
|
296 |
RETURN ( true ); |
|
297 |
} |
|
298 |
sp += 2; |
|
299 |
} |
|
300 |
} |
|
301 |
while (sp <= last) { |
|
302 |
if (*sp & 0xFF80) { |
|
303 |
RETURN ( true ); |
|
304 |
} |
|
305 |
sp++; |
|
306 |
} |
|
307 |
RETURN (false); |
|
308 |
%}. |
|
309 |
||
310 |
" |
|
311 |
'hello world' asUnicode16String contains8BitCharacters |
|
312 |
'hello worldüäö' asUnicode16String contains8BitCharacters |
|
313 |
'ü' asUnicode16String contains8BitCharacters |
|
314 |
'aü' asUnicode16String contains8BitCharacters |
|
315 |
'aaü' asUnicode16String contains8BitCharacters |
|
316 |
'aaaü' asUnicode16String contains8BitCharacters |
|
317 |
'aaaaü' asUnicode16String contains8BitCharacters |
|
318 |
" |
|
319 |
! |
|
320 |
||
14557 | 321 |
isWideString |
322 |
^ true |
|
608 | 323 |
! ! |
324 |
||
631 | 325 |
!TwoByteString class methodsFor:'documentation'! |
326 |
||
327 |
version |
|
17621 | 328 |
^ '$Header: /cvs/stx/stx/libbasic/TwoByteString.st,v 1.36 2015-03-14 21:30:22 stefan Exp $' |
631 | 329 |
! ! |
8094
d05f69bd0097
Use #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents:
5761
diff
changeset
|
330 |
|
16750 | 331 |
|
996 | 332 |
TwoByteString initialize! |