Set.st
author claus
Mon, 04 Oct 1993 11:32:33 +0100
changeset 2 6526dde5f3ac
parent 1 a27a279701f8
child 3 24d81bf47225
permissions -rw-r--r--
2.7.2

"
 COPYRIGHT (c) 1991-93 by Claus Gittinger
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

Collection subclass:#Set
       instanceVariableNames:'tally contentsArray'
       classVariableNames:''
       poolDictionaries:''
       category:'Collections-Unordered'
!

Set comment:'

COPYRIGHT (c) 1991-93 by Claus Gittinger
              All Rights Reserved

a Set is a collection where each element occurs at most once.

%W% %E%
written jun 91 by claus
jan 93 claus: changed to use hashing
'!

!Set class methodsFor:'instance creation'!

new
    "return a new empty Set"

    ^ self new:7
!

new:anInteger
    "return a new empty Set with space for anInteger elements"

    ^ self basicNew setTally:anInteger
! !

!Set methodsFor:'private'!

fullCheck
    "check if dictionary is full, grow if so.
     Definition of full is currently:'filled more than 70%'"

    "grow if filled more than 70% "
    tally > (contentsArray basicSize * 7 // 10) ifTrue:[
       self grow
    ]
!

goodSizeFor:arg
    "return a good array size for the given argument.
     Returns the next prime after arg"

    arg <= 7 ifTrue:[^ 7].
    arg <= 131072 ifTrue:[
           "2 4 8  16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072"
        ^ #(7 7 11 17 37 67 131 257 521 1031 2053 4099 8209 16411 32771 65537 131101) at:(arg highBit)
    ].
    ^ arg bitOr:1
!

setTally:count
    "initialize the contents array (for at least count slots) 
     and set tally to zero.
     The size is increased to the next prime for better hashing behavior."

    contentsArray := Array new:(self goodSizeFor:count). 
    tally := 0
!

find:key ifAbsent:aBlock
    "Look for the key in the receiver.  If it is found, return
     the index of the slot containing the key, otherwise
     return the value of evaluating aBlock."

    |index      "{ Class:SmallInteger }"
     length     "{ Class:SmallInteger }"
     startIndex "{ Class:SmallInteger }" 
     probe|

    length := contentsArray basicSize.
    startIndex := key hash \\ length + 1.
    index := startIndex.

    [true] whileTrue:[
        probe := (contentsArray basicAt:index).
        key = probe ifTrue:[^ index].
        probe isNil ifTrue:[^ aBlock value].

        index == length ifTrue:[
            index := 1
        ] ifFalse:[
            index := index + 1
        ].
        index == startIndex ifTrue:[^ aBlock value].
    ]
!

findElementOrNil:key
    "Look for the key in the receiver.  If it is found, return
     the index of the slot containing the key, otherwise
     return the index of the first unused slot. Grow the receiver,
     if key was not found, and no unused slots where present"

    |index      "{ Class:SmallInteger }"
     length     "{ Class:SmallInteger }"
     startIndex "{ Class:SmallInteger }"
     probe|

    length := contentsArray basicSize.
    startIndex := key hash \\ length + 1.
    index := startIndex.

    [true] whileTrue:[
        probe := contentsArray basicAt:index.
        (probe isNil or: [key = probe]) ifTrue:[^ index].

        index == length ifTrue:[
            index := 1
        ] ifFalse:[
            index := index + 1
        ].
        index == startIndex ifTrue:[^ self grow findElementOrNil:key].
    ]
!

findNil:key
    "Look for the next slot usable for key.  This method assumes that
     key is not already in the receiver - used only while growing/rehashing"

    |index  "{ Class:SmallInteger }"
     length "{ Class:SmallInteger }"|

    length := contentsArray basicSize.
    index := key hash \\ length + 1.

    [(contentsArray basicAt:index) notNil] whileTrue:[
        index == length ifTrue:[
            index := 1
        ] ifFalse:[
            index := index + 1
        ].
        "notice: no check for no nil found - we must find one since
         this is only called after growing"
    ].
    ^ index
!

grow
    "change the number of element slots of the collection to a useful
     new size"

    self grow:(contentsArray basicSize * 2)
!

grow:newSize
    "change the number of element slots of the collection - to do this,
     we have to rehash (which is done by re-adding all elements to a new
     empty set)."

    |oldElements oldSize
     srcIndex "{ Class:SmallInteger }"|

    oldElements := contentsArray.
    oldSize := tally.

    contentsArray := Array new:(self goodSizeFor:newSize). 

    srcIndex := 1.
    oldElements do:[:elem |
        elem notNil ifTrue:[
            "cannot be already there"
            contentsArray basicAt:(self findNil:elem) put:elem
        ].
        srcIndex := srcIndex + 1
    ].
    tally := oldSize
!

rehash
    "rehash is done by re-adding all elements to a new empty set."

    | oldArray element 
      n "{ Class:SmallInteger }" |

    oldArray := contentsArray.
    n := oldArray size.
    contentsArray := Array new:n.
    1 to:n do:[:index |
        element := oldArray at:index.
        element notNil ifTrue:[
            "cannot be already there"
            contentsArray basicAt:(self findNil:element) put:element
        ].
    ]
!

rehashFrom:startIndex
    "rehash elements starting at index - after a remove"

    |element i length
     index "{ Class:SmallInteger }" |

    length := contentsArray basicSize.
    index := startIndex.
    element := contentsArray basicAt:index.
    [element notNil] whileTrue:[
        i := self findNil:element.
        i == index ifTrue:[
            ^ self
        ].
        contentsArray basicAt:i put:element.
        contentsArray basicAt:index put:nil.

        index == length ifTrue:[
            index := 1
        ] ifFalse:[
            index := index + 1.
        ].
        element := contentsArray basicAt:index.
    ]
! !

!Set methodsFor:'accessing'!

at:index
    "report an error: at: is not allowed for Sets"

    ^ self errorNotKeyed
!

at:index put:anObject
    "report an error: at:put: is not allowed for Sets"

    ^ self errorNotKeyed
! !

!Set methodsFor:'testing'!

size
    "return the number of set elements"

    ^ tally
!

includes:anObject
    "return true if the argument anObject is in the receiver"

    ^ (self find:anObject ifAbsent:[0]) ~~ 0
!

isEmpty
    "return true if the receiver is empty"

    ^ tally == 0
!

occurrencesOf:anObject
    "return the number of occurrences of anObject in the receiver"

    (self find:anObject ifAbsent:[0]) == 0 ifTrue:[^ 0].
    ^ 1
!

isFixedSize
    "return true if the receiver cannot grow - this will vanish once
     Arrays and Strings learn how to grow ..."

    ^ false
! !

!Set methodsFor:'adding & removing'!

add:anObject
    "add the argument, anObject to the receiver"

    |index|

    anObject notNil ifTrue:[
        index := self findElementOrNil:anObject.
        (contentsArray basicAt:index) isNil ifTrue:[
            contentsArray basicAt:index put:anObject.
            tally := tally + 1.

            self fullCheck.
        ]
    ].
    ^ anObject
!

remove:oldObject ifAbsent:exceptionBlock
    "remove oldObject from the collection and return it.
     If it was not in the collection return the value of exceptionBlock."

    |index next|

    index := self find:oldObject ifAbsent:[^ exceptionBlock value].
    contentsArray basicAt:index put:nil.
    tally := tally - 1.
    tally == 0 ifTrue:[
        contentsArray := Array new:(self goodSizeFor:0). 
    ] ifFalse:[
        index == contentsArray basicSize ifTrue:[
            next := 1
        ] ifFalse:[
            next := index + 1.
        ].
        "this check is redundant - is also done in rehashFrom:,
         however, since there is some probability that the next
         element is nil, this saves a send sometimes
        "
        (contentsArray basicAt:next) notNil ifTrue:[
            self rehashFrom:next.
        ]
    ].
    ^ oldObject
! !

!Set methodsFor:'enumerating'!

do:aBlock
    "perform the block for all members in the collection."

    contentsArray do:[:each |
        each notNil ifTrue:[
            aBlock value:each
        ]
    ]
! !

!Set methodsFor: 'binary storage'!

readBinaryContentsFrom: stream manager: manager
    "must rehash after reload"

    super readBinaryContentsFrom: stream manager: manager.
    self rehash
! !