2526
|
1 |
"
|
|
2 |
COPYRIGHT (c) 2011 by eXept Software AG
|
|
3 |
All Rights Reserved
|
|
4 |
|
|
5 |
This software is furnished under a license and may be used
|
|
6 |
only in accordance with the terms of that license and with the
|
|
7 |
inclusion of the above copyright notice. This software may not
|
|
8 |
be provided or otherwise made available to, or used by, any
|
|
9 |
other person. No title to or ownership of the software is
|
|
10 |
hereby transferred.
|
|
11 |
"
|
|
12 |
"{ Package: 'stx:libbasic2' }"
|
|
13 |
|
|
14 |
Collection subclass:#CharacterSet
|
|
15 |
instanceVariableNames:'bits'
|
|
16 |
classVariableNames:''
|
|
17 |
poolDictionaries:''
|
|
18 |
category:'Collections-Unordered'
|
|
19 |
!
|
|
20 |
|
|
21 |
!CharacterSet class methodsFor:'documentation'!
|
|
22 |
|
|
23 |
copyright
|
|
24 |
"
|
|
25 |
COPYRIGHT (c) 2011 by eXept Software AG
|
|
26 |
All Rights Reserved
|
|
27 |
|
|
28 |
This software is furnished under a license and may be used
|
|
29 |
only in accordance with the terms of that license and with the
|
|
30 |
inclusion of the above copyright notice. This software may not
|
|
31 |
be provided or otherwise made available to, or used by, any
|
|
32 |
other person. No title to or ownership of the software is
|
|
33 |
hereby transferred.
|
|
34 |
"
|
|
35 |
!
|
|
36 |
|
|
37 |
documentation
|
|
38 |
"
|
|
39 |
Used to represent 8-bit character sets (for now) as a bitmap.
|
|
40 |
Bit[n] is set, if Character codePoint:(n-1) is included in the set.
|
|
41 |
|
|
42 |
[author:]
|
|
43 |
Claus Gittinger
|
|
44 |
"
|
|
45 |
! !
|
|
46 |
|
|
47 |
!CharacterSet class methodsFor:'instance creation'!
|
|
48 |
|
3459
|
49 |
allSingleByteCharacters
|
|
50 |
"return a new character set, which contains all single-byte characters"
|
|
51 |
|
|
52 |
^ self new addAllSingleByteCharacters
|
|
53 |
!
|
|
54 |
|
|
55 |
empty
|
|
56 |
"return a new, empty character set"
|
|
57 |
|
|
58 |
^ self new
|
|
59 |
!
|
|
60 |
|
2526
|
61 |
new
|
3459
|
62 |
"return a new, empty character set"
|
|
63 |
|
2526
|
64 |
^ self basicNew initialize
|
|
65 |
|
|
66 |
"Created: / 28-01-2011 / 17:29:24 / cg"
|
3459
|
67 |
!
|
|
68 |
|
|
69 |
nonSeparators
|
|
70 |
"return a new character set, which contains any but whitespace characters"
|
|
71 |
|
|
72 |
^ self new
|
|
73 |
addAllSingleByteCharacters;
|
|
74 |
removeAll:(Character separators);
|
|
75 |
yourself
|
|
76 |
!
|
|
77 |
|
|
78 |
separators
|
|
79 |
"return a new character set, which contains all whitespace characters"
|
|
80 |
|
|
81 |
^ self new
|
|
82 |
addAll:(Character separators);
|
|
83 |
yourself
|
2526
|
84 |
! !
|
|
85 |
|
3458
|
86 |
!CharacterSet methodsFor:'accessing'!
|
|
87 |
|
|
88 |
byteArrayMap
|
|
89 |
^bits
|
|
90 |
! !
|
|
91 |
|
2526
|
92 |
!CharacterSet methodsFor:'adding & removing'!
|
|
93 |
|
|
94 |
add:aCharacter
|
2933
|
95 |
"add aCharacter to the collection.
|
|
96 |
Returns aCharacter (sigh)"
|
|
97 |
|
2526
|
98 |
|cp byteIndex bitIndex|
|
|
99 |
|
|
100 |
cp := aCharacter codePoint.
|
|
101 |
self assert:(cp <= 255).
|
|
102 |
byteIndex := (cp // 8) + 1.
|
|
103 |
bitIndex := (cp \\ 8) + 1.
|
|
104 |
byteIndex > bits size ifTrue:[
|
|
105 |
bits := (ByteArray new:byteIndex) replaceFrom:1 with:bits.
|
|
106 |
].
|
|
107 |
bits at:byteIndex put:((bits at:byteIndex) setBit:bitIndex).
|
|
108 |
^ aCharacter "/ sigh
|
|
109 |
|
|
110 |
"Created: / 28-01-2011 / 17:44:21 / cg"
|
|
111 |
!
|
|
112 |
|
|
113 |
remove:aCharacter ifAbsent:exceptionValue
|
|
114 |
|cp byteIndex bitIndex mask|
|
|
115 |
|
|
116 |
cp := aCharacter codePoint.
|
|
117 |
self assert:(cp <= 255).
|
|
118 |
byteIndex := (cp // 8) + 1.
|
|
119 |
bitIndex := (cp \\ 8) + 1.
|
|
120 |
byteIndex <= bits size ifTrue:[
|
|
121 |
((mask := bits at:byteIndex) isBitSet:bitIndex) ifTrue:[
|
|
122 |
bits at:byteIndex put:(mask clearBit:bitIndex).
|
|
123 |
^ aCharacter
|
|
124 |
].
|
|
125 |
].
|
|
126 |
^ exceptionValue value
|
|
127 |
|
|
128 |
"Created: / 28-01-2011 / 17:51:22 / cg"
|
|
129 |
! !
|
|
130 |
|
3458
|
131 |
!CharacterSet methodsFor:'comparing'!
|
|
132 |
|
|
133 |
= something
|
|
134 |
^ (self species = something species)
|
|
135 |
and:[ bits = something byteArrayMap ]
|
|
136 |
!
|
|
137 |
|
|
138 |
hash
|
|
139 |
^ bits hash
|
|
140 |
! !
|
|
141 |
|
|
142 |
!CharacterSet methodsFor:'copying'!
|
|
143 |
|
|
144 |
postCopy
|
3459
|
145 |
"make sure that the bitmap is not shared with the copy"
|
|
146 |
|
3458
|
147 |
bits := bits copy
|
|
148 |
! !
|
|
149 |
|
2526
|
150 |
!CharacterSet methodsFor:'initialization'!
|
|
151 |
|
3459
|
152 |
addAllSingleByteCharacters
|
3460
|
153 |
bits := ByteArray new:"(256 / 8)"32 withAll:16rFF.
|
3459
|
154 |
|
|
155 |
"
|
|
156 |
self assert:(
|
|
157 |
CharacterSet new addAllCharacters
|
|
158 |
includesAll:((Character value:0) to:(Character value:255)))
|
|
159 |
|
|
160 |
self assert:(
|
|
161 |
CharacterSet allCharacters
|
|
162 |
includesAll:((Character value:0) to:(Character value:255)))
|
|
163 |
"
|
|
164 |
!
|
|
165 |
|
2526
|
166 |
initialize
|
|
167 |
bits := nil "/ empty
|
|
168 |
|
|
169 |
"Created: / 28-01-2011 / 17:29:48 / cg"
|
3459
|
170 |
!
|
|
171 |
|
|
172 |
setByteArrayMap:aByteArray
|
|
173 |
bits := aByteArray
|
2526
|
174 |
! !
|
|
175 |
|
|
176 |
!CharacterSet methodsFor:'queries'!
|
|
177 |
|
|
178 |
do:aBlock
|
|
179 |
|cp|
|
|
180 |
|
|
181 |
cp := 0.
|
|
182 |
bits notNil ifTrue:[
|
|
183 |
bits do:[:eachByte |
|
|
184 |
eachByte ~~ 0 ifTrue:[
|
|
185 |
#(1 2 4 8 16 32 64 128) do:[:mask |
|
|
186 |
(eachByte bitTest:mask) ifTrue:[
|
|
187 |
aBlock value:(Character codePoint:cp).
|
|
188 |
].
|
|
189 |
cp := cp + 1.
|
|
190 |
].
|
|
191 |
] ifFalse:[
|
|
192 |
cp := cp + 8.
|
|
193 |
].
|
|
194 |
]
|
|
195 |
].
|
|
196 |
|
|
197 |
"Created: / 28-01-2011 / 17:39:16 / cg"
|
|
198 |
!
|
|
199 |
|
3459
|
200 |
includes:aCharacter
|
|
201 |
"Return true if the set contains aCharacter"
|
|
202 |
|
|
203 |
|cp byteIndex bitIndex|
|
|
204 |
|
|
205 |
cp := aCharacter codePoint.
|
|
206 |
(cp > 255) ifTrue:[^ false].
|
|
207 |
|
|
208 |
byteIndex := (cp // 8) + 1.
|
|
209 |
bitIndex := (cp \\ 8) + 1.
|
|
210 |
byteIndex > bits size ifTrue:[^ false].
|
|
211 |
^ ((bits at:byteIndex) bitAt:bitIndex) ~~ 0
|
|
212 |
!
|
|
213 |
|
2526
|
214 |
size
|
|
215 |
|n|
|
|
216 |
|
|
217 |
bits isNil ifTrue:[^ 0].
|
|
218 |
|
|
219 |
n := 0.
|
|
220 |
bits do:[:eachByte |
|
|
221 |
n := n + (eachByte bitCount)
|
|
222 |
].
|
|
223 |
^ n
|
|
224 |
|
|
225 |
"Created: / 28-01-2011 / 17:35:21 / cg"
|
|
226 |
! !
|
|
227 |
|
3459
|
228 |
!CharacterSet methodsFor:'set operations'!
|
|
229 |
|
|
230 |
complement
|
|
231 |
"return a character set containing all characters (from codepoint 0 to 255),
|
|
232 |
which are NOT included in the receiver"
|
|
233 |
|
|
234 |
^ self class allSingleByteCharacters
|
|
235 |
removeAll:self;
|
|
236 |
yourself
|
|
237 |
! !
|
|
238 |
|
2526
|
239 |
!CharacterSet class methodsFor:'documentation'!
|
|
240 |
|
3459
|
241 |
version
|
3460
|
242 |
^ '$Header: /cvs/stx/stx/libbasic2/CharacterSet.st,v 1.5 2014-12-28 14:08:18 cg Exp $'
|
3459
|
243 |
!
|
|
244 |
|
2526
|
245 |
version_CVS
|
3460
|
246 |
^ '$Header: /cvs/stx/stx/libbasic2/CharacterSet.st,v 1.5 2014-12-28 14:08:18 cg Exp $'
|
2526
|
247 |
! !
|
2933
|
248 |
|