author | Claus Gittinger <cg@exept.de> |
Thu, 23 Nov 1995 02:21:27 +0100 | |
changeset 607 | a9a526c51233 |
parent 530 | 07d0bce293c9 |
child 609 | 12be97f6d5a7 |
permissions | -rw-r--r-- |
1 | 1 |
" |
5 | 2 |
COPYRIGHT (c) 1991 by Claus Gittinger |
159 | 3 |
All Rights Reserved |
1 | 4 |
|
5 |
This software is furnished under a license and may be used |
|
6 |
only in accordance with the terms of that license and with the |
|
7 |
inclusion of the above copyright notice. This software may not |
|
8 |
be provided or otherwise made available to, or used by, any |
|
9 |
other person. No title to or ownership of the software is |
|
10 |
hereby transferred. |
|
11 |
" |
|
12 |
||
13 |
Collection subclass:#Set |
|
13 | 14 |
instanceVariableNames:'tally keyArray' |
41 | 15 |
classVariableNames:'DeletedEntry' |
1 | 16 |
poolDictionaries:'' |
17 |
category:'Collections-Unordered' |
|
18 |
! |
|
19 |
||
88 | 20 |
!Set class methodsFor:'documentation'! |
21 |
||
22 |
copyright |
|
23 |
" |
|
24 |
COPYRIGHT (c) 1991 by Claus Gittinger |
|
159 | 25 |
All Rights Reserved |
1 | 26 |
|
88 | 27 |
This software is furnished under a license and may be used |
28 |
only in accordance with the terms of that license and with the |
|
29 |
inclusion of the above copyright notice. This software may not |
|
30 |
be provided or otherwise made available to, or used by, any |
|
31 |
other person. No title to or ownership of the software is |
|
32 |
hereby transferred. |
|
33 |
" |
|
34 |
! |
|
1 | 35 |
|
88 | 36 |
version |
530
07d0bce293c9
uff - version methods changed to return stings
Claus Gittinger <cg@exept.de>
parents:
421
diff
changeset
|
37 |
^ '$Header: /cvs/stx/stx/libbasic/Set.st,v 1.24 1995-11-11 15:26:45 cg Exp $' |
88 | 38 |
! |
39 |
||
40 |
documentation |
|
41 |
" |
|
42 |
a Set is a collection where each element occurs at most once. |
|
95 | 43 |
The inclusion test is done using = for comparison; |
44 |
see IdentitySet for sets using identity compare. |
|
45 |
Sets use hashing for fast access, this access is considerably faster, |
|
46 |
if a good hash-number is returned by the elements. |
|
362 | 47 |
|
48 |
Notice that the default hash (Object>>hash) is not perfect; due to |
|
49 |
the implementation of hash-keys in ST/X, increased hash collisions |
|
50 |
are to be expected for large sets (say: > 20000 element). |
|
51 |
If your objects are heavyly used in sets or dictionaries, and you need |
|
52 |
big collections, your instances may provide a better hash values. |
|
95 | 53 |
|
345 | 54 |
Performance hints: |
362 | 55 |
If only symbols or smallIntegers are entered into a set, |
56 |
use an instance of IdentitySet for slightly better performance, |
|
57 |
since both hashing and comparison is faster. |
|
345 | 58 |
|
59 |
If you have a rough idea how big the set is going to grow, |
|
60 |
create it using #new: instead of #new. Even if the size given is a |
|
61 |
poor guess (say half of the real size), there is some 20-30% performance |
|
62 |
win to expect, since many resizing operations of the set are avoided. |
|
63 |
||
95 | 64 |
Examples: |
65 |
||
159 | 66 |
|s| |
67 |
s := Set new. |
|
68 |
s add:'hello'. |
|
69 |
s add:'world'. |
|
70 |
s add:#foo. |
|
71 |
s add:1.2345678. |
|
72 |
s add:'hello'. |
|
95 | 73 |
|
159 | 74 |
s printNL. |
75 |
's size -> ' print. s size printNL. |
|
362 | 76 |
'(s includes:''hello'') -> ' print. (s includes:'hello') printNL. |
77 |
'(s includes:#foo) -> ' print. (s includes:#foo) printNL. |
|
78 |
'(s includes:''foo'') -> ' print. (s includes:'foo') printNL. |
|
79 |
'(s includes:#bar) -> ' print. (s includes:#bar) printNL. |
|
88 | 80 |
" |
81 |
! ! |
|
1 | 82 |
|
41 | 83 |
!Set class methodsFor:'initialization'! |
84 |
||
85 |
initialize |
|
86 |
"initialize the Set class" |
|
87 |
||
88 |
DeletedEntry isNil ifTrue:[ |
|
159 | 89 |
DeletedEntry := Object new |
41 | 90 |
]. |
91 |
||
92 |
"Set initialize" |
|
93 |
! ! |
|
94 |
||
1 | 95 |
!Set class methodsFor:'instance creation'! |
96 |
||
97 |
new |
|
98 |
"return a new empty Set" |
|
99 |
||
100 |
^ self new:7 |
|
101 |
! |
|
102 |
||
103 |
new:anInteger |
|
104 |
"return a new empty Set with space for anInteger elements" |
|
105 |
||
159 | 106 |
" |
107 |
make it somewhat bigger; hashing works better if fill grade is |
|
108 |
below 10% (make it 75% here ..) |
|
109 |
" |
|
110 |
^ self basicNew setTally:(anInteger * 4 // 3) |
|
111 |
! ! |
|
112 |
||
252 | 113 |
!Set class methodsFor:'queries'! |
114 |
||
115 |
goodSizeFrom:arg |
|
116 |
"return a good array size for the given argument. |
|
117 |
Returns the next prime after arg, since prime sizes are good for hashing." |
|
118 |
||
302 | 119 |
|n| |
120 |
||
252 | 121 |
arg <= 11 ifTrue:[^ 11]. |
122 |
||
302 | 123 |
n := arg * 3 // 2. |
124 |
||
252 | 125 |
" |
126 |
mhmh - this returns good numbers for collections with up-to about |
|
127 |
500k elements; if you have bigger ones, add some more primes here ... |
|
128 |
" |
|
302 | 129 |
n <= 524288 ifTrue:[ |
252 | 130 |
"2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288" |
302 | 131 |
^ #(11 11 11 17 37 67 131 257 521 1031 2053 4099 8209 16411 32771 65537 131101 262147 524309) at:(n highBit) |
252 | 132 |
]. |
133 |
" |
|
134 |
make it odd - at least |
|
135 |
" |
|
302 | 136 |
^ n bitOr:1 |
252 | 137 |
! ! |
138 |
||
159 | 139 |
!Set methodsFor:'copying'! |
140 |
||
141 |
postCopy |
|
142 |
"have to copy the keyArray too" |
|
143 |
||
144 |
keyArray := keyArray shallowCopy |
|
1 | 145 |
! ! |
146 |
||
147 |
!Set methodsFor:'private'! |
|
148 |
||
13 | 149 |
keyContainerOfSize:n |
150 |
"return a container for keys of size n. |
|
151 |
Extracted to make life of weak subclasses easier ..." |
|
152 |
||
359 | 153 |
^ Array basicNew:n |
13 | 154 |
! |
155 |
||
2 | 156 |
fullCheck |
95 | 157 |
"check if collection is full (after an add); grow if so. |
362 | 158 |
Definition of 'full' is currently: 'filled more than 75% (i.e. 3/4th)'" |
2 | 159 |
|
61 | 160 |
|sz "{Class: SmallInteger}" | |
161 |
||
324 | 162 |
" |
163 |
grow if filled more than 75% |
|
164 |
" |
|
61 | 165 |
sz := keyArray basicSize. |
77 | 166 |
tally > (sz * 3 // 4) ifTrue:[ |
2 | 167 |
self grow |
168 |
] |
|
169 |
! |
|
170 |
||
41 | 171 |
emptyCheck |
252 | 172 |
"check if the receiver has become too empty (after a remove) |
324 | 173 |
and shrink if it makes sense. |
174 |
Definition of 'too empty' is 'filled less than 12.5% (i.e. 1/8th)'" |
|
41 | 175 |
|
61 | 176 |
|sz "{Class: SmallInteger}" |
177 |
newSize "{Class: SmallInteger}" | |
|
41 | 178 |
|
61 | 179 |
sz := keyArray basicSize. |
95 | 180 |
sz > 30 ifTrue:[ |
324 | 181 |
" |
182 |
shrink if too empty |
|
183 |
" |
|
159 | 184 |
tally < (sz // 8) ifTrue:[ |
185 |
newSize := sz // 7. |
|
186 |
self grow:newSize |
|
187 |
] |
|
41 | 188 |
] |
189 |
! |
|
190 |
||
324 | 191 |
initialIndexFor:hashKey boundedBy:length |
192 |
"for ST-80 compatibility only; it is (currently) not used in this |
|
193 |
implementation of sets. Therefore, in ST/X it does not make sense |
|
194 |
to redefine it. (which may be a bad design decision, but slightly |
|
195 |
improves performance, by avoiding an extra message send ...)" |
|
196 |
||
375 | 197 |
^ (hashKey \\ length) + 1. |
324 | 198 |
! |
199 |
||
1 | 200 |
setTally:count |
201 |
"initialize the contents array (for at least count slots) |
|
202 |
and set tally to zero. |
|
203 |
The size is increased to the next prime for better hashing behavior." |
|
204 |
||
252 | 205 |
keyArray := self keyContainerOfSize:(self class goodSizeFrom:count). |
1 | 206 |
tally := 0 |
207 |
! |
|
208 |
||
209 |
find:key ifAbsent:aBlock |
|
210 |
"Look for the key in the receiver. If it is found, return |
|
211 |
the index of the slot containing the key, otherwise |
|
212 |
return the value of evaluating aBlock." |
|
213 |
||
61 | 214 |
|index "{ Class:SmallInteger }" |
215 |
length "{ Class:SmallInteger }" |
|
216 |
startIndex probe| |
|
1 | 217 |
|
13 | 218 |
length := keyArray basicSize. |
362 | 219 |
"/ length < 10 ifTrue:[ |
220 |
"/ "assuming, that for small collections the overhead of hashing |
|
221 |
"/ is larger ... maybe that proves wrong |
|
222 |
"/ (if overhead of comparing is higher)" |
|
223 |
"/ ^ keyArray indexOf:key ifAbsent:aBlock |
|
224 |
"/ ]. |
|
13 | 225 |
|
362 | 226 |
index := key hash. |
227 |
index := index \\ length + 1. |
|
228 |
startIndex := index. |
|
1 | 229 |
|
230 |
[true] whileTrue:[ |
|
159 | 231 |
probe := (keyArray basicAt:index). |
362 | 232 |
probe isNil ifTrue:[^ aBlock value]. |
159 | 233 |
key = probe ifTrue:[^ index]. |
1 | 234 |
|
159 | 235 |
index == length ifTrue:[ |
236 |
index := 1 |
|
237 |
] ifFalse:[ |
|
238 |
index := index + 1 |
|
239 |
]. |
|
362 | 240 |
index == startIndex ifTrue:[^ aBlock value]. |
1 | 241 |
] |
242 |
! |
|
243 |
||
13 | 244 |
findKeyOrNil:key |
1 | 245 |
"Look for the key in the receiver. If it is found, return |
246 |
the index of the slot containing the key, otherwise |
|
247 |
return the index of the first unused slot. Grow the receiver, |
|
248 |
if key was not found, and no unused slots where present" |
|
249 |
||
61 | 250 |
|index "{ Class:SmallInteger }" |
251 |
length "{ Class:SmallInteger }" |
|
252 |
startIndex probe| |
|
1 | 253 |
|
13 | 254 |
length := keyArray basicSize. |
362 | 255 |
index := key hash. |
256 |
index := index \\ length + 1. |
|
257 |
startIndex := index. |
|
1 | 258 |
|
259 |
[true] whileTrue:[ |
|
159 | 260 |
probe := keyArray basicAt:index. |
261 |
(probe isNil or: [key = probe]) ifTrue:[^ index]. |
|
262 |
probe == DeletedEntry ifTrue:[ |
|
263 |
keyArray basicAt:index put:nil. |
|
264 |
^ index |
|
265 |
]. |
|
1 | 266 |
|
159 | 267 |
index == length ifTrue:[ |
268 |
index := 1 |
|
269 |
] ifFalse:[ |
|
270 |
index := index + 1 |
|
271 |
]. |
|
272 |
index == startIndex ifTrue:[^ self grow findKeyOrNil:key]. |
|
1 | 273 |
] |
274 |
! |
|
275 |
||
276 |
findNil:key |
|
277 |
"Look for the next slot usable for key. This method assumes that |
|
278 |
key is not already in the receiver - used only while growing/rehashing" |
|
279 |
||
280 |
|index "{ Class:SmallInteger }" |
|
61 | 281 |
length "{ Class:SmallInteger }"| |
1 | 282 |
|
13 | 283 |
length := keyArray basicSize. |
362 | 284 |
index := key hash. |
285 |
index := index \\ length + 1. |
|
1 | 286 |
|
13 | 287 |
[(keyArray basicAt:index) notNil] whileTrue:[ |
159 | 288 |
index == length ifTrue:[ |
289 |
index := 1 |
|
290 |
] ifFalse:[ |
|
291 |
index := index + 1 |
|
292 |
]. |
|
293 |
"notice: no check for no nil found - we must find one since |
|
294 |
this is only called after growing" |
|
1 | 295 |
]. |
296 |
^ index |
|
297 |
! |
|
298 |
||
299 |
grow |
|
300 |
"change the number of element slots of the collection to a useful |
|
301 |
new size" |
|
302 |
||
13 | 303 |
self grow:(keyArray basicSize * 2) |
1 | 304 |
! |
305 |
||
306 |
grow:newSize |
|
307 |
"change the number of element slots of the collection - to do this, |
|
308 |
we have to rehash (which is done by re-adding all elements to a new |
|
309 |
empty set)." |
|
310 |
||
95 | 311 |
|elem oldKeyArray newKeyArray deletedEntry |
252 | 312 |
containerSize oldSize "{ Class:SmallInteger }"| |
1 | 313 |
|
13 | 314 |
oldKeyArray := keyArray. |
252 | 315 |
oldSize := oldKeyArray size. |
316 |
containerSize := (self class goodSizeFrom:newSize). |
|
317 |
containerSize == oldSize ifTrue:[^ self]. |
|
1 | 318 |
|
252 | 319 |
keyArray := newKeyArray := self keyContainerOfSize:containerSize. |
320 |
||
95 | 321 |
deletedEntry := DeletedEntry. |
13 | 322 |
1 to:oldSize do:[:srcIndex | |
159 | 323 |
elem := oldKeyArray basicAt:srcIndex. |
324 |
(elem notNil and:[elem ~~ deletedEntry]) ifTrue:[ |
|
325 |
"cannot be already there" |
|
326 |
newKeyArray basicAt:(self findNil:elem) put:elem |
|
327 |
]. |
|
95 | 328 |
]. |
1 | 329 |
! |
330 |
||
331 |
rehash |
|
61 | 332 |
"rehash is done by re-adding all elements to a new empty set. |
333 |
Rehash is needed after a binaryRead, for example." |
|
1 | 334 |
|
95 | 335 |
|element oldKeyArray newKeyArray |
336 |
n "{ Class:SmallInteger }"| |
|
1 | 337 |
|
95 | 338 |
oldKeyArray := keyArray. |
339 |
n := oldKeyArray size. |
|
340 |
keyArray := newKeyArray := self keyContainerOfSize:n. |
|
341 |
||
2 | 342 |
1 to:n do:[:index | |
159 | 343 |
element := oldKeyArray at:index. |
344 |
(element notNil and:[element ~~ DeletedEntry]) ifTrue:[ |
|
345 |
"cannot be already there" |
|
346 |
newKeyArray basicAt:(self findNil:element) put:element |
|
347 |
]. |
|
1 | 348 |
] |
349 |
! |
|
350 |
||
351 |
rehashFrom:startIndex |
|
61 | 352 |
"rehash elements starting at index - after a remove. |
353 |
Notice: due to the new implementation of remove, |
|
159 | 354 |
this is no longer needed" |
1 | 355 |
|
3 | 356 |
|element i "{ Class:SmallInteger }" |
357 |
length |
|
1 | 358 |
index "{ Class:SmallInteger }" | |
359 |
||
13 | 360 |
length := keyArray basicSize. |
1 | 361 |
index := startIndex. |
13 | 362 |
element := keyArray basicAt:index. |
1 | 363 |
[element notNil] whileTrue:[ |
159 | 364 |
i := self findNil:element. |
365 |
i == index ifTrue:[ |
|
366 |
^ self |
|
367 |
]. |
|
368 |
keyArray basicAt:i put:element. |
|
369 |
keyArray basicAt:index put:nil. |
|
1 | 370 |
|
159 | 371 |
index == length ifTrue:[ |
372 |
index := 1 |
|
373 |
] ifFalse:[ |
|
374 |
index := index + 1. |
|
375 |
]. |
|
376 |
element := keyArray basicAt:index. |
|
1 | 377 |
] |
378 |
! ! |
|
379 |
||
380 |
!Set methodsFor:'accessing'! |
|
381 |
||
382 |
at:index |
|
383 |
"report an error: at: is not allowed for Sets" |
|
384 |
||
385 |
^ self errorNotKeyed |
|
386 |
! |
|
387 |
||
388 |
at:index put:anObject |
|
389 |
"report an error: at:put: is not allowed for Sets" |
|
390 |
||
391 |
^ self errorNotKeyed |
|
392 |
! ! |
|
393 |
||
394 |
!Set methodsFor:'testing'! |
|
395 |
||
396 |
size |
|
397 |
"return the number of set elements" |
|
398 |
||
399 |
^ tally |
|
400 |
! |
|
401 |
||
362 | 402 |
capacity |
403 |
"return the number of elements, that the receiver is |
|
404 |
prepared to take. |
|
405 |
Not used by the system; added for ST-80 compatibility." |
|
406 |
||
407 |
^ keyArray size |
|
408 |
! |
|
409 |
||
1 | 410 |
includes:anObject |
411 |
"return true if the argument anObject is in the receiver" |
|
412 |
||
362 | 413 |
^ (self find:anObject ifAbsent:0) ~~ 0 |
1 | 414 |
! |
415 |
||
416 |
isEmpty |
|
417 |
"return true if the receiver is empty" |
|
418 |
||
419 |
^ tally == 0 |
|
420 |
! |
|
421 |
||
422 |
occurrencesOf:anObject |
|
423 |
"return the number of occurrences of anObject in the receiver" |
|
424 |
||
362 | 425 |
(self find:anObject ifAbsent:0) == 0 ifTrue:[^ 0]. |
1 | 426 |
^ 1 |
427 |
! |
|
428 |
||
429 |
isFixedSize |
|
430 |
"return true if the receiver cannot grow - this will vanish once |
|
431 |
Arrays and Strings learn how to grow ..." |
|
432 |
||
433 |
^ false |
|
434 |
! ! |
|
435 |
||
436 |
!Set methodsFor:'adding & removing'! |
|
437 |
||
438 |
add:anObject |
|
439 |
"add the argument, anObject to the receiver" |
|
440 |
||
95 | 441 |
|index "{ Class: SmallInteger }"| |
1 | 442 |
|
443 |
anObject notNil ifTrue:[ |
|
159 | 444 |
index := self findKeyOrNil:anObject. |
445 |
(keyArray basicAt:index) isNil ifTrue:[ |
|
446 |
keyArray basicAt:index put:anObject. |
|
447 |
tally := tally + 1. |
|
1 | 448 |
|
159 | 449 |
self fullCheck. |
450 |
] |
|
1 | 451 |
]. |
452 |
^ anObject |
|
453 |
! |
|
454 |
||
455 |
remove:oldObject ifAbsent:exceptionBlock |
|
456 |
"remove oldObject from the collection and return it. |
|
457 |
If it was not in the collection return the value of exceptionBlock." |
|
458 |
||
459 |
|index next| |
|
460 |
||
362 | 461 |
"/ code below is actually the same as: |
462 |
"/ |
|
463 |
"/ index := self find:oldObject ifAbsent:[^ exceptionBlock value]. |
|
464 |
"/ |
|
465 |
"/ but cheaper, since there will be no new block to create |
|
466 |
"/ (remember: [0] blocks are super-cheap) |
|
50 | 467 |
|
362 | 468 |
index := self find:oldObject ifAbsent:0. |
50 | 469 |
index == 0 ifTrue:[^ exceptionBlock value]. |
470 |
||
13 | 471 |
keyArray basicAt:index put:nil. |
1 | 472 |
tally := tally - 1. |
473 |
tally == 0 ifTrue:[ |
|
252 | 474 |
keyArray := self keyContainerOfSize:(self class goodSizeFrom:0). |
1 | 475 |
] ifFalse:[ |
159 | 476 |
index == keyArray basicSize ifTrue:[ |
477 |
next := 1 |
|
478 |
] ifFalse:[ |
|
479 |
next := index + 1. |
|
480 |
]. |
|
481 |
(keyArray basicAt:next) notNil ifTrue:[ |
|
482 |
keyArray basicAt:index put:DeletedEntry. |
|
483 |
]. |
|
484 |
self emptyCheck |
|
1 | 485 |
]. |
486 |
^ oldObject |
|
119 | 487 |
! |
488 |
||
489 |
removeAll |
|
490 |
"remove all elements from the receiver." |
|
491 |
||
492 |
self setTally:7. |
|
1 | 493 |
! ! |
494 |
||
495 |
!Set methodsFor:'enumerating'! |
|
496 |
||
497 |
do:aBlock |
|
498 |
"perform the block for all members in the collection." |
|
499 |
||
50 | 500 |
|sz "{ Class: SmallInteger }" |
501 |
element| |
|
502 |
||
503 |
sz := keyArray size. |
|
504 |
1 to:sz do:[:index | |
|
159 | 505 |
element := keyArray at:index. |
506 |
(element notNil and:[element ~~ DeletedEntry]) ifTrue:[ |
|
507 |
aBlock value:element |
|
508 |
] |
|
1 | 509 |
] |
510 |
! ! |
|
2 | 511 |
|
512 |
!Set methodsFor: 'binary storage'! |
|
513 |
||
514 |
readBinaryContentsFrom: stream manager: manager |
|
515 |
"must rehash after reload" |
|
516 |
||
517 |
super readBinaryContentsFrom: stream manager: manager. |
|
518 |
self rehash |
|
519 |
! ! |