2526
|
1 |
"
|
|
2 |
COPYRIGHT (c) 2011 by eXept Software AG
|
|
3 |
All Rights Reserved
|
|
4 |
|
|
5 |
This software is furnished under a license and may be used
|
|
6 |
only in accordance with the terms of that license and with the
|
|
7 |
inclusion of the above copyright notice. This software may not
|
|
8 |
be provided or otherwise made available to, or used by, any
|
|
9 |
other person. No title to or ownership of the software is
|
|
10 |
hereby transferred.
|
|
11 |
"
|
|
12 |
"{ Package: 'stx:libbasic2' }"
|
|
13 |
|
4957
|
14 |
"{ NameSpace: Smalltalk }"
|
|
15 |
|
2526
|
16 |
Collection subclass:#CharacterSet
|
|
17 |
instanceVariableNames:'bits'
|
|
18 |
classVariableNames:''
|
|
19 |
poolDictionaries:''
|
|
20 |
category:'Collections-Unordered'
|
|
21 |
!
|
|
22 |
|
|
23 |
!CharacterSet class methodsFor:'documentation'!
|
|
24 |
|
|
25 |
copyright
|
|
26 |
"
|
|
27 |
COPYRIGHT (c) 2011 by eXept Software AG
|
|
28 |
All Rights Reserved
|
|
29 |
|
|
30 |
This software is furnished under a license and may be used
|
|
31 |
only in accordance with the terms of that license and with the
|
|
32 |
inclusion of the above copyright notice. This software may not
|
|
33 |
be provided or otherwise made available to, or used by, any
|
|
34 |
other person. No title to or ownership of the software is
|
|
35 |
hereby transferred.
|
|
36 |
"
|
|
37 |
!
|
|
38 |
|
|
39 |
documentation
|
|
40 |
"
|
|
41 |
Used to represent 8-bit character sets (for now) as a bitmap.
|
|
42 |
Bit[n] is set, if Character codePoint:(n-1) is included in the set.
|
|
43 |
|
|
44 |
[author:]
|
|
45 |
Claus Gittinger
|
|
46 |
"
|
|
47 |
! !
|
|
48 |
|
|
49 |
!CharacterSet class methodsFor:'instance creation'!
|
|
50 |
|
3459
|
51 |
allSingleByteCharacters
|
|
52 |
"return a new character set, which contains all single-byte characters"
|
|
53 |
|
|
54 |
^ self new addAllSingleByteCharacters
|
|
55 |
!
|
|
56 |
|
|
57 |
empty
|
|
58 |
"return a new, empty character set"
|
|
59 |
|
|
60 |
^ self new
|
|
61 |
!
|
|
62 |
|
2526
|
63 |
new
|
3459
|
64 |
"return a new, empty character set"
|
|
65 |
|
2526
|
66 |
^ self basicNew initialize
|
|
67 |
|
|
68 |
"Created: / 28-01-2011 / 17:29:24 / cg"
|
3459
|
69 |
!
|
|
70 |
|
|
71 |
nonSeparators
|
|
72 |
"return a new character set, which contains any but whitespace characters"
|
|
73 |
|
|
74 |
^ self new
|
|
75 |
addAllSingleByteCharacters;
|
|
76 |
removeAll:(Character separators);
|
|
77 |
yourself
|
|
78 |
!
|
|
79 |
|
|
80 |
separators
|
|
81 |
"return a new character set, which contains all whitespace characters"
|
|
82 |
|
|
83 |
^ self new
|
|
84 |
addAll:(Character separators);
|
|
85 |
yourself
|
2526
|
86 |
! !
|
|
87 |
|
3458
|
88 |
!CharacterSet methodsFor:'accessing'!
|
|
89 |
|
|
90 |
byteArrayMap
|
|
91 |
^bits
|
|
92 |
! !
|
|
93 |
|
2526
|
94 |
!CharacterSet methodsFor:'adding & removing'!
|
|
95 |
|
|
96 |
add:aCharacter
|
2933
|
97 |
"add aCharacter to the collection.
|
|
98 |
Returns aCharacter (sigh)"
|
|
99 |
|
2526
|
100 |
|cp byteIndex bitIndex|
|
|
101 |
|
|
102 |
cp := aCharacter codePoint.
|
|
103 |
self assert:(cp <= 255).
|
|
104 |
byteIndex := (cp // 8) + 1.
|
|
105 |
bitIndex := (cp \\ 8) + 1.
|
|
106 |
byteIndex > bits size ifTrue:[
|
|
107 |
bits := (ByteArray new:byteIndex) replaceFrom:1 with:bits.
|
|
108 |
].
|
|
109 |
bits at:byteIndex put:((bits at:byteIndex) setBit:bitIndex).
|
|
110 |
^ aCharacter "/ sigh
|
|
111 |
|
|
112 |
"Created: / 28-01-2011 / 17:44:21 / cg"
|
|
113 |
!
|
|
114 |
|
|
115 |
remove:aCharacter ifAbsent:exceptionValue
|
|
116 |
|cp byteIndex bitIndex mask|
|
|
117 |
|
|
118 |
cp := aCharacter codePoint.
|
|
119 |
self assert:(cp <= 255).
|
|
120 |
byteIndex := (cp // 8) + 1.
|
|
121 |
bitIndex := (cp \\ 8) + 1.
|
|
122 |
byteIndex <= bits size ifTrue:[
|
|
123 |
((mask := bits at:byteIndex) isBitSet:bitIndex) ifTrue:[
|
|
124 |
bits at:byteIndex put:(mask clearBit:bitIndex).
|
|
125 |
^ aCharacter
|
|
126 |
].
|
|
127 |
].
|
|
128 |
^ exceptionValue value
|
|
129 |
|
|
130 |
"Created: / 28-01-2011 / 17:51:22 / cg"
|
|
131 |
! !
|
|
132 |
|
3458
|
133 |
!CharacterSet methodsFor:'comparing'!
|
|
134 |
|
|
135 |
= something
|
|
136 |
^ (self species = something species)
|
|
137 |
and:[ bits = something byteArrayMap ]
|
|
138 |
!
|
|
139 |
|
|
140 |
hash
|
|
141 |
^ bits hash
|
|
142 |
! !
|
|
143 |
|
4957
|
144 |
!CharacterSet methodsFor:'copying-private'!
|
3458
|
145 |
|
|
146 |
postCopy
|
3459
|
147 |
"make sure that the bitmap is not shared with the copy"
|
|
148 |
|
3458
|
149 |
bits := bits copy
|
|
150 |
! !
|
|
151 |
|
2526
|
152 |
!CharacterSet methodsFor:'initialization'!
|
|
153 |
|
3459
|
154 |
addAllSingleByteCharacters
|
3460
|
155 |
bits := ByteArray new:"(256 / 8)"32 withAll:16rFF.
|
3459
|
156 |
|
|
157 |
"
|
|
158 |
self assert:(
|
|
159 |
CharacterSet new addAllCharacters
|
|
160 |
includesAll:((Character value:0) to:(Character value:255)))
|
|
161 |
|
|
162 |
self assert:(
|
|
163 |
CharacterSet allCharacters
|
|
164 |
includesAll:((Character value:0) to:(Character value:255)))
|
|
165 |
"
|
|
166 |
!
|
|
167 |
|
2526
|
168 |
initialize
|
|
169 |
bits := nil "/ empty
|
|
170 |
|
|
171 |
"Created: / 28-01-2011 / 17:29:48 / cg"
|
3459
|
172 |
!
|
|
173 |
|
|
174 |
setByteArrayMap:aByteArray
|
|
175 |
bits := aByteArray
|
2526
|
176 |
! !
|
|
177 |
|
|
178 |
!CharacterSet methodsFor:'queries'!
|
|
179 |
|
|
180 |
do:aBlock
|
|
181 |
|cp|
|
|
182 |
|
|
183 |
cp := 0.
|
|
184 |
bits notNil ifTrue:[
|
|
185 |
bits do:[:eachByte |
|
|
186 |
eachByte ~~ 0 ifTrue:[
|
|
187 |
#(1 2 4 8 16 32 64 128) do:[:mask |
|
|
188 |
(eachByte bitTest:mask) ifTrue:[
|
|
189 |
aBlock value:(Character codePoint:cp).
|
|
190 |
].
|
|
191 |
cp := cp + 1.
|
|
192 |
].
|
|
193 |
] ifFalse:[
|
|
194 |
cp := cp + 8.
|
|
195 |
].
|
|
196 |
]
|
|
197 |
].
|
|
198 |
|
|
199 |
"Created: / 28-01-2011 / 17:39:16 / cg"
|
|
200 |
!
|
|
201 |
|
3459
|
202 |
includes:aCharacter
|
|
203 |
"Return true if the set contains aCharacter"
|
|
204 |
|
|
205 |
|cp byteIndex bitIndex|
|
|
206 |
|
|
207 |
cp := aCharacter codePoint.
|
|
208 |
(cp > 255) ifTrue:[^ false].
|
|
209 |
|
|
210 |
byteIndex := (cp // 8) + 1.
|
|
211 |
bitIndex := (cp \\ 8) + 1.
|
|
212 |
byteIndex > bits size ifTrue:[^ false].
|
|
213 |
^ ((bits at:byteIndex) bitAt:bitIndex) ~~ 0
|
|
214 |
!
|
|
215 |
|
2526
|
216 |
size
|
|
217 |
|n|
|
|
218 |
|
|
219 |
bits isNil ifTrue:[^ 0].
|
|
220 |
|
|
221 |
n := 0.
|
|
222 |
bits do:[:eachByte |
|
|
223 |
n := n + (eachByte bitCount)
|
|
224 |
].
|
|
225 |
^ n
|
|
226 |
|
|
227 |
"Created: / 28-01-2011 / 17:35:21 / cg"
|
|
228 |
! !
|
|
229 |
|
3459
|
230 |
!CharacterSet methodsFor:'set operations'!
|
|
231 |
|
|
232 |
complement
|
|
233 |
"return a character set containing all characters (from codepoint 0 to 255),
|
|
234 |
which are NOT included in the receiver"
|
|
235 |
|
|
236 |
^ self class allSingleByteCharacters
|
|
237 |
removeAll:self;
|
|
238 |
yourself
|
|
239 |
! !
|
|
240 |
|
2526
|
241 |
!CharacterSet class methodsFor:'documentation'!
|
|
242 |
|
3459
|
243 |
version
|
4957
|
244 |
^ '$Header$'
|
3459
|
245 |
!
|
|
246 |
|
2526
|
247 |
version_CVS
|
4957
|
248 |
^ '$Header$'
|
2526
|
249 |
! !
|
2933
|
250 |
|