Set.st
changeset 13 62303f84ff5f
parent 5 67342904af11
child 41 a14247b04d03
equal deleted inserted replaced
12:8e03bd717355 13:62303f84ff5f
     9  other person.  No title to or ownership of the software is
     9  other person.  No title to or ownership of the software is
    10  hereby transferred.
    10  hereby transferred.
    11 "
    11 "
    12 
    12 
    13 Collection subclass:#Set
    13 Collection subclass:#Set
    14        instanceVariableNames:'tally contentsArray'
    14        instanceVariableNames:'tally keyArray'
    15        classVariableNames:''
    15        classVariableNames:''
    16        poolDictionaries:''
    16        poolDictionaries:''
    17        category:'Collections-Unordered'
    17        category:'Collections-Unordered'
    18 !
    18 !
    19 
    19 
    22 COPYRIGHT (c) 1991 by Claus Gittinger
    22 COPYRIGHT (c) 1991 by Claus Gittinger
    23               All Rights Reserved
    23               All Rights Reserved
    24 
    24 
    25 a Set is a collection where each element occurs at most once.
    25 a Set is a collection where each element occurs at most once.
    26 
    26 
    27 $Header: /cvs/stx/stx/libbasic/Set.st,v 1.4 1993-10-13 02:13:35 claus Exp $
    27 $Header: /cvs/stx/stx/libbasic/Set.st,v 1.5 1993-12-11 00:56:26 claus Exp $
    28 written jun 91 by claus
    28 written jun 91 by claus
    29 jan 93 claus: changed to use hashing
    29 jan 93 claus: changed to use hashing
    30 '!
    30 '!
    31 
    31 
    32 !Set class methodsFor:'instance creation'!
    32 !Set class methodsFor:'instance creation'!
    43     ^ self basicNew setTally:anInteger
    43     ^ self basicNew setTally:anInteger
    44 ! !
    44 ! !
    45 
    45 
    46 !Set methodsFor:'private'!
    46 !Set methodsFor:'private'!
    47 
    47 
       
    48 keyContainerOfSize:n
       
    49     "return a container for keys of size n.
       
    50      Extracted to make life of weak subclasses easier ..."
       
    51 
       
    52     ^ Array new:n
       
    53 !
       
    54 
    48 fullCheck
    55 fullCheck
    49     "check if dictionary is full, grow if so.
    56     "check if dictionary is full, grow if so.
    50      Definition of full is currently:'filled more than 70%'"
    57      Definition of full is currently:'filled more than 70%'"
    51 
    58 
    52     "grow if filled more than 70% "
    59     "grow if filled more than 70% "
    53     tally > (contentsArray basicSize * 7 // 10) ifTrue:[
    60     tally > (keyArray basicSize * 7 // 10) ifTrue:[
    54        self grow
    61        self grow
    55     ]
    62     ]
    56 !
    63 !
    57 
    64 
    58 goodSizeFor:arg
    65 goodSizeFor:arg
    70 setTally:count
    77 setTally:count
    71     "initialize the contents array (for at least count slots) 
    78     "initialize the contents array (for at least count slots) 
    72      and set tally to zero.
    79      and set tally to zero.
    73      The size is increased to the next prime for better hashing behavior."
    80      The size is increased to the next prime for better hashing behavior."
    74 
    81 
    75     contentsArray := Array new:(self goodSizeFor:count). 
    82     keyArray := Array new:(self goodSizeFor:count). 
    76     tally := 0
    83     tally := 0
    77 !
    84 !
    78 
    85 
    79 find:key ifAbsent:aBlock
    86 find:key ifAbsent:aBlock
    80     "Look for the key in the receiver.  If it is found, return
    87     "Look for the key in the receiver.  If it is found, return
    82      return the value of evaluating aBlock."
    89      return the value of evaluating aBlock."
    83 
    90 
    84     |index      "{ Class:SmallInteger }"
    91     |index      "{ Class:SmallInteger }"
    85      length startIndex probe|
    92      length startIndex probe|
    86 
    93 
    87     length := contentsArray basicSize.
    94     length := keyArray basicSize.
       
    95     length < 10 ifTrue:[
       
    96         "assuming, that for small collections the overhead of hashing
       
    97          is large ... maybe that proves wrong (if overhead of comparing
       
    98          is high)"
       
    99         ^ keyArray indexOf:key ifAbsent:aBlock
       
   100     ].
       
   101 
    88     startIndex := key hash \\ length + 1.
   102     startIndex := key hash \\ length + 1.
    89     index := startIndex.
   103     index := startIndex.
    90 
   104 
    91     [true] whileTrue:[
   105     [true] whileTrue:[
    92         probe := (contentsArray basicAt:index).
   106         probe := (keyArray basicAt:index).
    93         key = probe ifTrue:[^ index].
   107         key = probe ifTrue:[^ index].
    94         probe isNil ifTrue:[^ aBlock value].
       
    95 
   108 
    96         index == length ifTrue:[
   109         index == length ifTrue:[
    97             index := 1
   110             index := 1
    98         ] ifFalse:[
   111         ] ifFalse:[
    99             index := index + 1
   112             index := index + 1
   100         ].
   113         ].
   101         index == startIndex ifTrue:[^ aBlock value].
   114         ((probe isNil) or:[index == startIndex]) ifTrue:[^ aBlock value].
   102     ]
   115     ]
   103 !
   116 !
   104 
   117 
   105 findElementOrNil:key
   118 findKeyOrNil:key
   106     "Look for the key in the receiver.  If it is found, return
   119     "Look for the key in the receiver.  If it is found, return
   107      the index of the slot containing the key, otherwise
   120      the index of the slot containing the key, otherwise
   108      return the index of the first unused slot. Grow the receiver,
   121      return the index of the first unused slot. Grow the receiver,
   109      if key was not found, and no unused slots where present"
   122      if key was not found, and no unused slots where present"
   110 
   123 
   111     |index      "{ Class:SmallInteger }"
   124     |index      "{ Class:SmallInteger }"
   112      length startIndex probe|
   125      length startIndex probe|
   113 
   126 
   114     length := contentsArray basicSize.
   127     length := keyArray basicSize.
   115     startIndex := key hash \\ length + 1.
   128     startIndex := key hash \\ length + 1.
   116     index := startIndex.
   129     index := startIndex.
   117 
   130 
   118     [true] whileTrue:[
   131     [true] whileTrue:[
   119         probe := contentsArray basicAt:index.
   132         probe := keyArray basicAt:index.
   120         (probe isNil or: [key = probe]) ifTrue:[^ index].
   133         (probe isNil or: [key = probe]) ifTrue:[^ index].
   121 
   134 
   122         index == length ifTrue:[
   135         index == length ifTrue:[
   123             index := 1
   136             index := 1
   124         ] ifFalse:[
   137         ] ifFalse:[
   125             index := index + 1
   138             index := index + 1
   126         ].
   139         ].
   127         index == startIndex ifTrue:[^ self grow findElementOrNil:key].
   140         index == startIndex ifTrue:[^ self grow findKeyOrNil:key].
   128     ]
   141     ]
   129 !
   142 !
   130 
   143 
   131 findNil:key
   144 findNil:key
   132     "Look for the next slot usable for key.  This method assumes that
   145     "Look for the next slot usable for key.  This method assumes that
   133      key is not already in the receiver - used only while growing/rehashing"
   146      key is not already in the receiver - used only while growing/rehashing"
   134 
   147 
   135     |index  "{ Class:SmallInteger }"
   148     |index  "{ Class:SmallInteger }"
   136      length|
   149      length|
   137 
   150 
   138     length := contentsArray basicSize.
   151     length := keyArray basicSize.
   139     index := key hash \\ length + 1.
   152     index := key hash \\ length + 1.
   140 
   153 
   141     [(contentsArray basicAt:index) notNil] whileTrue:[
   154     [(keyArray basicAt:index) notNil] whileTrue:[
   142         index == length ifTrue:[
   155         index == length ifTrue:[
   143             index := 1
   156             index := 1
   144         ] ifFalse:[
   157         ] ifFalse:[
   145             index := index + 1
   158             index := index + 1
   146         ].
   159         ].
   152 
   165 
   153 grow
   166 grow
   154     "change the number of element slots of the collection to a useful
   167     "change the number of element slots of the collection to a useful
   155      new size"
   168      new size"
   156 
   169 
   157     self grow:(contentsArray basicSize * 2)
   170     self grow:(keyArray basicSize * 2)
   158 !
   171 !
   159 
   172 
   160 grow:newSize
   173 grow:newSize
   161     "change the number of element slots of the collection - to do this,
   174     "change the number of element slots of the collection - to do this,
   162      we have to rehash (which is done by re-adding all elements to a new
   175      we have to rehash (which is done by re-adding all elements to a new
   163      empty set)."
   176      empty set)."
   164 
   177 
   165     |oldArray oldSize elem
   178     |oldKeyArray elem
   166      n "{ Class:SmallInteger }" |
   179      oldSize "{ Class:SmallInteger }" |
   167 
   180 
   168     oldArray := contentsArray.
   181     oldKeyArray := keyArray.
   169     oldSize := tally.
   182 
   170 
   183     keyArray := Array new:(self goodSizeFor:newSize). 
   171     contentsArray := Array new:(self goodSizeFor:newSize). 
   184 
   172 
   185     oldSize := oldKeyArray size.
   173     n := oldArray size.
   186     1 to:oldSize do:[:srcIndex |
   174     1 to:n do:[:srcIndex |
   187         elem := oldKeyArray basicAt:srcIndex.
   175         elem := oldArray basicAt:srcIndex.
       
   176         elem notNil ifTrue:[
   188         elem notNil ifTrue:[
   177             "cannot be already there"
   189             "cannot be already there"
   178             contentsArray basicAt:(self findNil:elem) put:elem
   190             keyArray basicAt:(self findNil:elem) put:elem
   179         ].
   191         ].
   180     ].
   192     ]
   181     tally := oldSize
       
   182 !
   193 !
   183 
   194 
   184 rehash
   195 rehash
   185     "rehash is done by re-adding all elements to a new empty set."
   196     "rehash is done by re-adding all elements to a new empty set."
   186 
   197 
   187     | oldArray element 
   198     | oldArray element 
   188       n "{ Class:SmallInteger }" |
   199       n "{ Class:SmallInteger }" |
   189 
   200 
   190     oldArray := contentsArray.
   201     oldArray := keyArray.
   191     n := oldArray size.
   202     n := oldArray size.
   192     contentsArray := Array new:n.
   203     keyArray := Array new:n.
   193     1 to:n do:[:index |
   204     1 to:n do:[:index |
   194         element := oldArray at:index.
   205         element := oldArray at:index.
   195         element notNil ifTrue:[
   206         element notNil ifTrue:[
   196             "cannot be already there"
   207             "cannot be already there"
   197             contentsArray basicAt:(self findNil:element) put:element
   208             keyArray basicAt:(self findNil:element) put:element
   198         ].
   209         ].
   199     ]
   210     ]
   200 !
   211 !
   201 
   212 
   202 rehashFrom:startIndex
   213 rehashFrom:startIndex
   204 
   215 
   205     |element i "{ Class:SmallInteger }"
   216     |element i "{ Class:SmallInteger }"
   206      length
   217      length
   207      index "{ Class:SmallInteger }" |
   218      index "{ Class:SmallInteger }" |
   208 
   219 
   209     length := contentsArray basicSize.
   220     length := keyArray basicSize.
   210     index := startIndex.
   221     index := startIndex.
   211     element := contentsArray basicAt:index.
   222     element := keyArray basicAt:index.
   212     [element notNil] whileTrue:[
   223     [element notNil] whileTrue:[
   213         i := self findNil:element.
   224         i := self findNil:element.
   214         i == index ifTrue:[
   225         i == index ifTrue:[
   215             ^ self
   226             ^ self
   216         ].
   227         ].
   217         contentsArray basicAt:i put:element.
   228         keyArray basicAt:i put:element.
   218         contentsArray basicAt:index put:nil.
   229         keyArray basicAt:index put:nil.
   219 
   230 
   220         index == length ifTrue:[
   231         index == length ifTrue:[
   221             index := 1
   232             index := 1
   222         ] ifFalse:[
   233         ] ifFalse:[
   223             index := index + 1.
   234             index := index + 1.
   224         ].
   235         ].
   225         element := contentsArray basicAt:index.
   236         element := keyArray basicAt:index.
   226     ]
   237     ]
   227 ! !
   238 ! !
   228 
   239 
   229 !Set methodsFor:'accessing'!
   240 !Set methodsFor:'accessing'!
   230 
   241 
   280     "add the argument, anObject to the receiver"
   291     "add the argument, anObject to the receiver"
   281 
   292 
   282     |index|
   293     |index|
   283 
   294 
   284     anObject notNil ifTrue:[
   295     anObject notNil ifTrue:[
   285         index := self findElementOrNil:anObject.
   296         index := self findKeyOrNil:anObject.
   286         (contentsArray basicAt:index) isNil ifTrue:[
   297         (keyArray basicAt:index) isNil ifTrue:[
   287             contentsArray basicAt:index put:anObject.
   298             keyArray basicAt:index put:anObject.
   288             tally := tally + 1.
   299             tally := tally + 1.
   289 
   300 
   290             self fullCheck.
   301             self fullCheck.
   291         ]
   302         ]
   292     ].
   303     ].
   298      If it was not in the collection return the value of exceptionBlock."
   309      If it was not in the collection return the value of exceptionBlock."
   299 
   310 
   300     |index next|
   311     |index next|
   301 
   312 
   302     index := self find:oldObject ifAbsent:[^ exceptionBlock value].
   313     index := self find:oldObject ifAbsent:[^ exceptionBlock value].
   303     contentsArray basicAt:index put:nil.
   314     keyArray basicAt:index put:nil.
   304     tally := tally - 1.
   315     tally := tally - 1.
   305     tally == 0 ifTrue:[
   316     tally == 0 ifTrue:[
   306         contentsArray := Array new:(self goodSizeFor:0). 
   317         keyArray := Array new:(self goodSizeFor:0). 
   307     ] ifFalse:[
   318     ] ifFalse:[
   308         index == contentsArray basicSize ifTrue:[
   319         index == keyArray basicSize ifTrue:[
   309             next := 1
   320             next := 1
   310         ] ifFalse:[
   321         ] ifFalse:[
   311             next := index + 1.
   322             next := index + 1.
   312         ].
   323         ].
   313         "this check is redundant - is also done in rehashFrom:,
   324         "this check is redundant - is also done in rehashFrom:,
   314          however, since there is some probability that the next
   325          however, since there is some probability that the next
   315          element is nil, this saves a send sometimes
   326          element is nil, this saves a send sometimes
   316         "
   327         "
   317         (contentsArray basicAt:next) notNil ifTrue:[
   328         (keyArray basicAt:next) notNil ifTrue:[
   318             self rehashFrom:next.
   329             self rehashFrom:next.
   319         ]
   330         ]
   320     ].
   331     ].
   321     ^ oldObject
   332     ^ oldObject
   322 ! !
   333 ! !
   324 !Set methodsFor:'enumerating'!
   335 !Set methodsFor:'enumerating'!
   325 
   336 
   326 do:aBlock
   337 do:aBlock
   327     "perform the block for all members in the collection."
   338     "perform the block for all members in the collection."
   328 
   339 
   329     contentsArray do:[:each |
   340     keyArray do:[:each |
   330         each notNil ifTrue:[
   341         each notNil ifTrue:[
   331             aBlock value:each
   342             aBlock value:each
   332         ]
   343         ]
   333     ]
   344     ]
   334 ! !
   345 ! !