22 COPYRIGHT (c) 1991 by Claus Gittinger |
22 COPYRIGHT (c) 1991 by Claus Gittinger |
23 All Rights Reserved |
23 All Rights Reserved |
24 |
24 |
25 a Set is a collection where each element occurs at most once. |
25 a Set is a collection where each element occurs at most once. |
26 |
26 |
27 $Header: /cvs/stx/stx/libbasic/Set.st,v 1.4 1993-10-13 02:13:35 claus Exp $ |
27 $Header: /cvs/stx/stx/libbasic/Set.st,v 1.5 1993-12-11 00:56:26 claus Exp $ |
28 written jun 91 by claus |
28 written jun 91 by claus |
29 jan 93 claus: changed to use hashing |
29 jan 93 claus: changed to use hashing |
30 '! |
30 '! |
31 |
31 |
32 !Set class methodsFor:'instance creation'! |
32 !Set class methodsFor:'instance creation'! |
43 ^ self basicNew setTally:anInteger |
43 ^ self basicNew setTally:anInteger |
44 ! ! |
44 ! ! |
45 |
45 |
46 !Set methodsFor:'private'! |
46 !Set methodsFor:'private'! |
47 |
47 |
|
48 keyContainerOfSize:n |
|
49 "return a container for keys of size n. |
|
50 Extracted to make life of weak subclasses easier ..." |
|
51 |
|
52 ^ Array new:n |
|
53 ! |
|
54 |
48 fullCheck |
55 fullCheck |
49 "check if dictionary is full, grow if so. |
56 "check if dictionary is full, grow if so. |
50 Definition of full is currently:'filled more than 70%'" |
57 Definition of full is currently:'filled more than 70%'" |
51 |
58 |
52 "grow if filled more than 70% " |
59 "grow if filled more than 70% " |
53 tally > (contentsArray basicSize * 7 // 10) ifTrue:[ |
60 tally > (keyArray basicSize * 7 // 10) ifTrue:[ |
54 self grow |
61 self grow |
55 ] |
62 ] |
56 ! |
63 ! |
57 |
64 |
58 goodSizeFor:arg |
65 goodSizeFor:arg |
70 setTally:count |
77 setTally:count |
71 "initialize the contents array (for at least count slots) |
78 "initialize the contents array (for at least count slots) |
72 and set tally to zero. |
79 and set tally to zero. |
73 The size is increased to the next prime for better hashing behavior." |
80 The size is increased to the next prime for better hashing behavior." |
74 |
81 |
75 contentsArray := Array new:(self goodSizeFor:count). |
82 keyArray := Array new:(self goodSizeFor:count). |
76 tally := 0 |
83 tally := 0 |
77 ! |
84 ! |
78 |
85 |
79 find:key ifAbsent:aBlock |
86 find:key ifAbsent:aBlock |
80 "Look for the key in the receiver. If it is found, return |
87 "Look for the key in the receiver. If it is found, return |
82 return the value of evaluating aBlock." |
89 return the value of evaluating aBlock." |
83 |
90 |
84 |index "{ Class:SmallInteger }" |
91 |index "{ Class:SmallInteger }" |
85 length startIndex probe| |
92 length startIndex probe| |
86 |
93 |
87 length := contentsArray basicSize. |
94 length := keyArray basicSize. |
|
95 length < 10 ifTrue:[ |
|
96 "assuming, that for small collections the overhead of hashing |
|
97 is large ... maybe that proves wrong (if overhead of comparing |
|
98 is high)" |
|
99 ^ keyArray indexOf:key ifAbsent:aBlock |
|
100 ]. |
|
101 |
88 startIndex := key hash \\ length + 1. |
102 startIndex := key hash \\ length + 1. |
89 index := startIndex. |
103 index := startIndex. |
90 |
104 |
91 [true] whileTrue:[ |
105 [true] whileTrue:[ |
92 probe := (contentsArray basicAt:index). |
106 probe := (keyArray basicAt:index). |
93 key = probe ifTrue:[^ index]. |
107 key = probe ifTrue:[^ index]. |
94 probe isNil ifTrue:[^ aBlock value]. |
|
95 |
108 |
96 index == length ifTrue:[ |
109 index == length ifTrue:[ |
97 index := 1 |
110 index := 1 |
98 ] ifFalse:[ |
111 ] ifFalse:[ |
99 index := index + 1 |
112 index := index + 1 |
100 ]. |
113 ]. |
101 index == startIndex ifTrue:[^ aBlock value]. |
114 ((probe isNil) or:[index == startIndex]) ifTrue:[^ aBlock value]. |
102 ] |
115 ] |
103 ! |
116 ! |
104 |
117 |
105 findElementOrNil:key |
118 findKeyOrNil:key |
106 "Look for the key in the receiver. If it is found, return |
119 "Look for the key in the receiver. If it is found, return |
107 the index of the slot containing the key, otherwise |
120 the index of the slot containing the key, otherwise |
108 return the index of the first unused slot. Grow the receiver, |
121 return the index of the first unused slot. Grow the receiver, |
109 if key was not found, and no unused slots where present" |
122 if key was not found, and no unused slots where present" |
110 |
123 |
111 |index "{ Class:SmallInteger }" |
124 |index "{ Class:SmallInteger }" |
112 length startIndex probe| |
125 length startIndex probe| |
113 |
126 |
114 length := contentsArray basicSize. |
127 length := keyArray basicSize. |
115 startIndex := key hash \\ length + 1. |
128 startIndex := key hash \\ length + 1. |
116 index := startIndex. |
129 index := startIndex. |
117 |
130 |
118 [true] whileTrue:[ |
131 [true] whileTrue:[ |
119 probe := contentsArray basicAt:index. |
132 probe := keyArray basicAt:index. |
120 (probe isNil or: [key = probe]) ifTrue:[^ index]. |
133 (probe isNil or: [key = probe]) ifTrue:[^ index]. |
121 |
134 |
122 index == length ifTrue:[ |
135 index == length ifTrue:[ |
123 index := 1 |
136 index := 1 |
124 ] ifFalse:[ |
137 ] ifFalse:[ |
125 index := index + 1 |
138 index := index + 1 |
126 ]. |
139 ]. |
127 index == startIndex ifTrue:[^ self grow findElementOrNil:key]. |
140 index == startIndex ifTrue:[^ self grow findKeyOrNil:key]. |
128 ] |
141 ] |
129 ! |
142 ! |
130 |
143 |
131 findNil:key |
144 findNil:key |
132 "Look for the next slot usable for key. This method assumes that |
145 "Look for the next slot usable for key. This method assumes that |
133 key is not already in the receiver - used only while growing/rehashing" |
146 key is not already in the receiver - used only while growing/rehashing" |
134 |
147 |
135 |index "{ Class:SmallInteger }" |
148 |index "{ Class:SmallInteger }" |
136 length| |
149 length| |
137 |
150 |
138 length := contentsArray basicSize. |
151 length := keyArray basicSize. |
139 index := key hash \\ length + 1. |
152 index := key hash \\ length + 1. |
140 |
153 |
141 [(contentsArray basicAt:index) notNil] whileTrue:[ |
154 [(keyArray basicAt:index) notNil] whileTrue:[ |
142 index == length ifTrue:[ |
155 index == length ifTrue:[ |
143 index := 1 |
156 index := 1 |
144 ] ifFalse:[ |
157 ] ifFalse:[ |
145 index := index + 1 |
158 index := index + 1 |
146 ]. |
159 ]. |
152 |
165 |
153 grow |
166 grow |
154 "change the number of element slots of the collection to a useful |
167 "change the number of element slots of the collection to a useful |
155 new size" |
168 new size" |
156 |
169 |
157 self grow:(contentsArray basicSize * 2) |
170 self grow:(keyArray basicSize * 2) |
158 ! |
171 ! |
159 |
172 |
160 grow:newSize |
173 grow:newSize |
161 "change the number of element slots of the collection - to do this, |
174 "change the number of element slots of the collection - to do this, |
162 we have to rehash (which is done by re-adding all elements to a new |
175 we have to rehash (which is done by re-adding all elements to a new |
163 empty set)." |
176 empty set)." |
164 |
177 |
165 |oldArray oldSize elem |
178 |oldKeyArray elem |
166 n "{ Class:SmallInteger }" | |
179 oldSize "{ Class:SmallInteger }" | |
167 |
180 |
168 oldArray := contentsArray. |
181 oldKeyArray := keyArray. |
169 oldSize := tally. |
182 |
170 |
183 keyArray := Array new:(self goodSizeFor:newSize). |
171 contentsArray := Array new:(self goodSizeFor:newSize). |
184 |
172 |
185 oldSize := oldKeyArray size. |
173 n := oldArray size. |
186 1 to:oldSize do:[:srcIndex | |
174 1 to:n do:[:srcIndex | |
187 elem := oldKeyArray basicAt:srcIndex. |
175 elem := oldArray basicAt:srcIndex. |
|
176 elem notNil ifTrue:[ |
188 elem notNil ifTrue:[ |
177 "cannot be already there" |
189 "cannot be already there" |
178 contentsArray basicAt:(self findNil:elem) put:elem |
190 keyArray basicAt:(self findNil:elem) put:elem |
179 ]. |
191 ]. |
180 ]. |
192 ] |
181 tally := oldSize |
|
182 ! |
193 ! |
183 |
194 |
184 rehash |
195 rehash |
185 "rehash is done by re-adding all elements to a new empty set." |
196 "rehash is done by re-adding all elements to a new empty set." |
186 |
197 |
187 | oldArray element |
198 | oldArray element |
188 n "{ Class:SmallInteger }" | |
199 n "{ Class:SmallInteger }" | |
189 |
200 |
190 oldArray := contentsArray. |
201 oldArray := keyArray. |
191 n := oldArray size. |
202 n := oldArray size. |
192 contentsArray := Array new:n. |
203 keyArray := Array new:n. |
193 1 to:n do:[:index | |
204 1 to:n do:[:index | |
194 element := oldArray at:index. |
205 element := oldArray at:index. |
195 element notNil ifTrue:[ |
206 element notNil ifTrue:[ |
196 "cannot be already there" |
207 "cannot be already there" |
197 contentsArray basicAt:(self findNil:element) put:element |
208 keyArray basicAt:(self findNil:element) put:element |
198 ]. |
209 ]. |
199 ] |
210 ] |
200 ! |
211 ! |
201 |
212 |
202 rehashFrom:startIndex |
213 rehashFrom:startIndex |
204 |
215 |
205 |element i "{ Class:SmallInteger }" |
216 |element i "{ Class:SmallInteger }" |
206 length |
217 length |
207 index "{ Class:SmallInteger }" | |
218 index "{ Class:SmallInteger }" | |
208 |
219 |
209 length := contentsArray basicSize. |
220 length := keyArray basicSize. |
210 index := startIndex. |
221 index := startIndex. |
211 element := contentsArray basicAt:index. |
222 element := keyArray basicAt:index. |
212 [element notNil] whileTrue:[ |
223 [element notNil] whileTrue:[ |
213 i := self findNil:element. |
224 i := self findNil:element. |
214 i == index ifTrue:[ |
225 i == index ifTrue:[ |
215 ^ self |
226 ^ self |
216 ]. |
227 ]. |
217 contentsArray basicAt:i put:element. |
228 keyArray basicAt:i put:element. |
218 contentsArray basicAt:index put:nil. |
229 keyArray basicAt:index put:nil. |
219 |
230 |
220 index == length ifTrue:[ |
231 index == length ifTrue:[ |
221 index := 1 |
232 index := 1 |
222 ] ifFalse:[ |
233 ] ifFalse:[ |
223 index := index + 1. |
234 index := index + 1. |
224 ]. |
235 ]. |
225 element := contentsArray basicAt:index. |
236 element := keyArray basicAt:index. |
226 ] |
237 ] |
227 ! ! |
238 ! ! |
228 |
239 |
229 !Set methodsFor:'accessing'! |
240 !Set methodsFor:'accessing'! |
230 |
241 |
298 If it was not in the collection return the value of exceptionBlock." |
309 If it was not in the collection return the value of exceptionBlock." |
299 |
310 |
300 |index next| |
311 |index next| |
301 |
312 |
302 index := self find:oldObject ifAbsent:[^ exceptionBlock value]. |
313 index := self find:oldObject ifAbsent:[^ exceptionBlock value]. |
303 contentsArray basicAt:index put:nil. |
314 keyArray basicAt:index put:nil. |
304 tally := tally - 1. |
315 tally := tally - 1. |
305 tally == 0 ifTrue:[ |
316 tally == 0 ifTrue:[ |
306 contentsArray := Array new:(self goodSizeFor:0). |
317 keyArray := Array new:(self goodSizeFor:0). |
307 ] ifFalse:[ |
318 ] ifFalse:[ |
308 index == contentsArray basicSize ifTrue:[ |
319 index == keyArray basicSize ifTrue:[ |
309 next := 1 |
320 next := 1 |
310 ] ifFalse:[ |
321 ] ifFalse:[ |
311 next := index + 1. |
322 next := index + 1. |
312 ]. |
323 ]. |
313 "this check is redundant - is also done in rehashFrom:, |
324 "this check is redundant - is also done in rehashFrom:, |
314 however, since there is some probability that the next |
325 however, since there is some probability that the next |
315 element is nil, this saves a send sometimes |
326 element is nil, this saves a send sometimes |
316 " |
327 " |
317 (contentsArray basicAt:next) notNil ifTrue:[ |
328 (keyArray basicAt:next) notNil ifTrue:[ |
318 self rehashFrom:next. |
329 self rehashFrom:next. |
319 ] |
330 ] |
320 ]. |
331 ]. |
321 ^ oldObject |
332 ^ oldObject |
322 ! ! |
333 ! ! |