Set.st
author Claus Gittinger <cg@exept.de>
Wed, 12 Feb 1997 14:17:41 +0100
changeset 2383 8b2957f5ec90
parent 2328 0fd1d715e5a9
child 2427 d09f8539fa47
permissions -rw-r--r--
checkin from browser

"
 COPYRIGHT (c) 1991 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

Collection subclass:#Set
	instanceVariableNames:'tally keyArray'
	classVariableNames:'DeletedEntry'
	poolDictionaries:''
	category:'Collections-Unordered'
!

Object subclass:#EmptySlot
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	privateIn:Set
!

!Set class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1991 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    a Set is a collection where each element occurs at most once.
    The inclusion test is done using = for comparison; 
    see IdentitySet for sets using identity compare.
    Sets use hashing for fast access, this access is considerably faster,
    if a good hash-number is returned by the elements.

    Notice that the default hash (Object>>hash) is not perfect; due to
    the implementation of hash-keys in ST/X, increased hash collisions
    are to be expected for large sets (say: > 20000 element). 
    If your objects are heavyly used in sets or dictionaries, and you need
    big collections, your instances may provide a better hash values.

    Performance hints: 
      If only symbols or smallIntegers are used as keys, 
      use an instance of IdentitySet for slightly better performance, 
      since both hashing and comparison is faster.

      If you have a rough idea how big the set is going to grow,
      create it using #new: instead of #new. Even if the size given is a
      poor guess (say half of the real size), there is some 20-30% performance
      win to expect, since many resizing operations of the set are avoided.

    Examples:

        |s|
        s := Set new.
        s add:'hello'.
        s add:'world'.
        s add:#foo.
        s add:1.2345678.
        s add:'hello'.

        s printNL.
        's size -> ' print. s size printNL.
        '(s includes:''hello'') -> ' print. (s includes:'hello') printNL.
        '(s includes:#foo)    -> ' print. (s includes:#foo) printNL.
        '(s includes:''foo'')   -> ' print. (s includes:'foo') printNL.
        '(s includes:#bar)    -> ' print. (s includes:#bar) printNL.

    [author:]
        Claus Gittinger
"
! !

!Set class methodsFor:'initialization'!

initialize
    "initialize the Set class"

    DeletedEntry isNil ifTrue:[
        DeletedEntry := EmptySlot new
    ].

    "Set initialize"

    "Modified: 24.1.1997 / 21:09:00 / cg"
! !

!Set class methodsFor:'instance creation'!

new
    "return a new empty Set"

    ^ self new:7
!

new:anInteger
    "return a new empty Set with space for anInteger elements"

    "
     make it somewhat bigger; hashing works better if fill grade is
     below 10% (make it 75% here ..)
    "
    ^ self basicNew setTally:(anInteger * 4 // 3)
! !

!Set class methodsFor:'queries'!

goodSizeFrom:arg 
    "return a good array size for the given argument.
     Returns the next prime after arg, since prime sizes are good for hashing."

    |n|

    arg <= 11 ifTrue:[^ 11].

    n := arg * 3 // 2.

    "
     mhmh - this returns good numbers for collections with up-to about
     500k elements; if you have bigger ones, add some more primes here ...
    "
    n <= 524288 ifTrue:[
	   "2  4  8  16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288"
	^ #(11 11 11 17 37 67 131 257 521 1031 2053 4099 8209 16411 32771 65537 131101 262147 524309) at:(n highBit)
    ].
    "
     make it odd - at least
    "
    ^ n bitOr:1
! !

!Set methodsFor:'accessing'!

at:index
    "report an error: at: is not allowed for Sets"

    ^ self errorNotKeyed
!

at:index put:anObject
    "report an error: at:put: is not allowed for Sets"

    ^ self errorNotKeyed
! !

!Set methodsFor:'adding & removing'!

add:anObject
    "add the argument, anObject to the receiver.

     WARNING: do not add elements while iterating over the receiver.
              Iterate over a copy to do this."

    |index "{ Class: SmallInteger }"|

    anObject notNil ifTrue:[
        index := self findKeyOrNil:anObject.
        (keyArray basicAt:index) isNil ifTrue:[
            "/ not already there
            keyArray basicAt:index put:anObject.
            tally := tally + 1.

            self fullCheck.
        ]
    ].
    ^ anObject

    "Modified: 30.1.1997 / 14:58:08 / cg"
!

remove:oldObject ifAbsent:exceptionBlock
    "remove oldObject from the collection and return it.
     If it was not in the collection return the value of exceptionBlock.

     WARNING: do not remove elements while iterating over the receiver.
              See #saveRemove: to do this."

    |index next|

    index := self find:oldObject ifAbsent:0.
    index == 0 ifTrue:[^ exceptionBlock value].

    keyArray basicAt:index put:nil.
    tally := tally - 1.
    tally == 0 ifTrue:[
        keyArray := self keyContainerOfSize:(self class goodSizeFrom:0). 
    ] ifFalse:[
        index == keyArray basicSize ifTrue:[
            next := 1
        ] ifFalse:[
            next := index + 1.
        ].
        (keyArray basicAt:next) notNil ifTrue:[
            keyArray basicAt:index put:DeletedEntry.
        ].
        self emptyCheck
    ].
    ^ oldObject

    "Modified: 29.1.1997 / 21:34:33 / cg"
!

removeAll
    "remove all elements from the receiver. Returns the receiver."

    self setTally:7.

    "Modified: 12.4.1996 / 13:35:06 / cg"
!

saveRemove:oldObject 
    "remove the element, oldObject from the collection.
     Return the element.
     If it was not in the collection return nil.

     In contrast to #remove:, this does not resize the underlying collection
     and therefore does NOT rehash & change the elements order.
     Therefor this can be used while enumerating the receiver,
     which is not possible if #remove: is used.

     WARNING: since no resizing is done, the physical amount of memory used
              by the container remains the same, although the logical size shrinks.
              You may want to manually resize the receiver using #emptyCheck.
              (after the loop)"

    |index "{ Class:SmallInteger }"
     next  "{ Class:SmallInteger }"|

    oldObject isNil ifTrue:[^ nil].

    index := self find:oldObject ifAbsent:0.
    index == 0 ifTrue:[^ nil].

    keyArray basicAt:index put:nil.

    tally := tally - 1.
    tally ~~ 0 ifTrue:[
        index == keyArray basicSize ifTrue:[
            next := 1
        ] ifFalse:[
            next := index + 1.
        ].
        (keyArray basicAt:next) notNil ifTrue:[
            keyArray basicAt:index put:DeletedEntry
        ].
    ].
    ^ oldObject

    "does NOT work:

        |s|

        s := Set new.
        s add:1.
        s add:2.
        s add:3.
        s add:4.
        s add:5.
        s add:6.
        s add:7.
        s add:8.
        s add:9.
        s do:[:v |
            v odd ifTrue:[
                s remove:v 
            ]
        ].
        s inspect
    "

    "DOES work:

        |s|

        s := Set new.
        s add:1.
        s add:2.
        s add:3.
        s add:4.
        s add:5.
        s add:6.
        s add:7.
        s add:8.
        s add:9.
        s do:[:v |
            v odd ifTrue:[
                s saveRemove:v 
            ]
        ].
        s inspect
    "

    "Created: 1.3.1996 / 21:14:26 / cg"
    "Modified: 29.1.1997 / 21:35:55 / cg"
! !

!Set methodsFor:'binary storage'!

readBinaryContentsFrom: stream manager: manager
    "must rehash after reload"

    super readBinaryContentsFrom: stream manager: manager.
    self rehash
! !

!Set methodsFor:'copying'!

postCopy
    "have to copy the keyArray too"

    keyArray := keyArray shallowCopy
! !

!Set methodsFor:'enumerating'!

do:aBlock
    "perform the block for all members in the collection.

     WARNING: do not add/remove elements while iterating over the receiver.
	      Iterate over a copy to do this."

    |sz "{ Class: SmallInteger }"
     element|

    sz := keyArray size.
    1 to:sz do:[:index |
	element := keyArray at:index.
	(element notNil and:[element ~~ DeletedEntry]) ifTrue:[
	    aBlock value:element
	]
    ]

    "Modified: 1.3.1996 / 21:41:13 / cg"
! !

!Set methodsFor:'inspecting'!

inspectorClass
    "redefined to use SetInspector
     (instead of the default Inspector)."

    ^ SetInspectorView
! !

!Set methodsFor:'private'!

emptyCheck
    "check if the receiver has become too empty (after a remove)
     and shrink if it makes sense.
     Definition of 'too empty' is 'filled less than 12.5% (i.e. 1/8th)'"

    |sz      "{Class: SmallInteger}"
     newSize "{Class: SmallInteger}" |

    sz := keyArray basicSize.
    sz > 56 ifTrue:[
        "
         shrink if too empty
        "
        tally < (sz // 8) ifTrue:[
            newSize := sz // 7.
            self grow:newSize
        ]
    ]

    "Modified: 4.9.1996 / 14:39:53 / cg"
!

find:key ifAbsent:aBlock
    "Look for the key in the receiver.  If it is found, return
     the index of the slot containing the key, otherwise
     return the value of evaluating aBlock."

    |index  "{ Class:SmallInteger }"
     length "{ Class:SmallInteger }"
     startIndex probe|

    length := keyArray basicSize.

"/
"/  length < 10 ifTrue:[
"/      "assuming, that for small collections the overhead of hashing
"/       is larger ... maybe that proves wrong 
"/       (if overhead of comparing is higher)"
"/      ^ keyArray indexOf:key ifAbsent:aBlock
"/  ].

    index := key hash.
    index < 16r1FFFFFFF ifTrue:[
        index := index * 2
    ].
    index := index \\ length + 1.
    startIndex := index.

    [true] whileTrue:[
	probe := (keyArray basicAt:index).
	probe isNil ifTrue:[^ aBlock value].
	key = probe ifTrue:[^ index].

	index == length ifTrue:[
	    index := 1
	] ifFalse:[
	    index := index + 1
	].
	index == startIndex ifTrue:[^ aBlock value].
    ]
!

findKeyOrNil:key
    "Look for the key in the receiver.  
     If it is found, return return the index of the first unused slot. 
     Grow the receiver, if key was not found, and no unused slots were present"

    |index  "{ Class:SmallInteger }"
     length "{ Class:SmallInteger }"
     startIndex probe 
     delIndex "{ Class:SmallInteger }" |

    delIndex := 0.

    length := keyArray basicSize.
    index := key hash.
    index < 16r1FFFFFFF ifTrue:[
        index := index * 2
    ].
    index := index \\ length + 1.
    startIndex := index.

    [true] whileTrue:[
        probe := keyArray basicAt:index.
        probe isNil ifTrue:[
            delIndex == 0 ifTrue:[^ index].
            keyArray basicAt:delIndex put:nil.
            ^ delIndex
        ].
        key = probe ifTrue:[^ index].

        probe == DeletedEntry ifTrue:[
            delIndex == 0 ifTrue:[
                delIndex := index
            ]
        ].

        index == length ifTrue:[
            index := 1
        ] ifFalse:[
            index := index + 1
        ].
        index == startIndex ifTrue:[
            delIndex ~~ 0 ifTrue:[
                keyArray basicAt:delIndex put:nil.
                ^ delIndex
            ].
            ^ self grow findKeyOrNil:key
        ].
    ]

    "Modified: 4.6.1996 / 11:15:16 / cg"
!

findNil:key
    "Look for the next slot usable for key.  This method assumes that
     key is not already in the receiver and that keyArray does not have
     previously removed entries.
     To be used only while growing/rehashing to enter elements into a fresh
     collection."

    |startIndex
     index  "{ Class:SmallInteger }"
     length "{ Class:SmallInteger }"|

    length := keyArray basicSize.
    index := key hash.
    index < 16r1FFFFFFF ifTrue:[
        index := index * 2
    ].
    index := index \\ length + 1.
    startIndex := index.

    [(keyArray basicAt:index) notNil] whileTrue:[
        index == length ifTrue:[
            index := 1
        ] ifFalse:[
            index := index + 1
        ].
        index == startIndex ifTrue:[
            "notice: should not be reached
             - we must find one since this is only to be 
               called after growing"
            ^ 0
        ].
    ].
    ^ index

    "Modified: 30.4.1996 / 14:15:20 / cg"
!

fullCheck
    "check if collection is full (after an add); grow if so.
     Definition of 'full' is currently: 'filled more than 75% (i.e. 3/4th)'"

    |sz "{Class: SmallInteger}" |

    "
     grow if filled more than 75% 
    "
    sz := keyArray basicSize.
    tally > (sz * 3 // 4) ifTrue:[
       self grow
    ]
!

grow
    "change the number of element slots of the collection to a useful
     new size"

    self grow:(keyArray basicSize * 2)
!

grow:newSize
    "change the number of element slots of the collection - to do this,
     we have to rehash (which is done by re-adding all elements to a new
     empty set)."

    |elem oldKeyArray newKeyArray deletedEntry
     containerSize oldSize "{ Class:SmallInteger }"|

    oldKeyArray := keyArray.
    oldSize := oldKeyArray size.
    containerSize := (self class goodSizeFrom:newSize).
    containerSize == oldSize ifTrue:[^ self].

    keyArray := newKeyArray := self keyContainerOfSize:containerSize. 

    deletedEntry := DeletedEntry.
    1 to:oldSize do:[:srcIndex |
	elem := oldKeyArray basicAt:srcIndex.
	(elem notNil and:[elem ~~ deletedEntry]) ifTrue:[
	    "cannot be already there"
	    newKeyArray basicAt:(self findNil:elem) put:elem
	].
    ].
!

initialIndexFor:hashKey boundedBy:length
    "for ST-80 compatibility only; it is (currently) not used in this
     implementation of sets. Therefore, in ST/X it does not make sense
     to redefine it. (which may be a bad design decision, but slightly
     improves performance, by avoiding an extra message send ...)"

    ^ (hashKey * 2 \\ length) + 1.
!

keyContainerOfSize:n
    "return a container for keys of size n.
     Extracted to make life of weak subclasses easier ..."

    ^ Array basicNew:n
!

rehash
    "rehash is done by re-adding all elements to a new empty set.
     Rehash is needed after a binaryRead, for example."

    |element oldKeyArray newKeyArray
     n "{ Class:SmallInteger }"|

    oldKeyArray := keyArray.
    n := oldKeyArray size.
    keyArray := newKeyArray := self keyContainerOfSize:n.

    1 to:n do:[:index |
	element := oldKeyArray at:index.
	(element notNil and:[element ~~ DeletedEntry]) ifTrue:[
	    "cannot be already there"
	    newKeyArray basicAt:(self findNil:element) put:element
	].
    ]
!

rehashFrom:startIndex
    "rehash elements starting at index - after a remove.
     Notice: due to the new implementation of remove, 
	     this is no longer needed"

    |element i "{ Class:SmallInteger }"
     length
     index "{ Class:SmallInteger }" |

    length := keyArray basicSize.
    index := startIndex.
    element := keyArray basicAt:index.
    [element notNil] whileTrue:[
	i := self findNil:element.
	i == index ifTrue:[
	    ^ self
	].
	keyArray basicAt:i put:element.
	keyArray basicAt:index put:nil.

	index == length ifTrue:[
	    index := 1
	] ifFalse:[
	    index := index + 1.
	].
	element := keyArray basicAt:index.
    ]
!

setTally:count
    "initialize the contents array (for at least count slots) 
     and set tally to zero.
     The size is increased to the next prime for better hashing behavior."

    keyArray := self keyContainerOfSize:(self class goodSizeFrom:count). 
    tally := 0
! !

!Set methodsFor:'testing'!

capacity 
    "return the number of elements, that the receiver is
     prepared to take.
     Not used by the system; added for ST-80 compatibility."

    ^ keyArray size
!

includes:anObject
    "return true if the argument anObject is in the receiver"

    ^ (self find:anObject ifAbsent:0) ~~ 0
!

isEmpty
    "return true if the receiver is empty"

    ^ tally == 0
!

isFixedSize
    "return true if the receiver cannot grow - this will vanish once
     Arrays and Strings learn how to grow ..."

    ^ false
!

notEmpty
    "return true if the receiver is not empty"

    ^ tally ~~ 0

    "Created: 12.2.1997 / 12:39:02 / cg"
!

occurrencesOf:anObject
    "return the number of occurrences of anObject in the receiver"

    (self find:anObject ifAbsent:0) == 0 ifTrue:[^ 0].
    ^ 1
!

size
    "return the number of set elements"

    ^ tally
! !

!Set class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/Set.st,v 1.47 1997-02-12 13:16:59 cg Exp $'
! !
Set initialize!