SortedCollection.st
author Claus Gittinger <cg@exept.de>
Sat, 13 Apr 1996 13:27:24 +0200
changeset 1167 a24366edb65e
parent 1162 3995101dfa44
child 1173 1d831f2c0d44
permissions -rw-r--r--
commentary

"
 COPYRIGHT (c) 1993 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

OrderedCollection subclass:#SortedCollection
	instanceVariableNames:'sortBlock'
	classVariableNames:'DefaultSortBlock'
	poolDictionaries:''
	category:'Collections-Sequenceable'
!

!SortedCollection class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1993 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    I keep my elements sorted. The sort order is defined by a sortblock,
    a two-argument block which, when given two elements of the collection, 
    should return true if the element given as first arg has to come before the 
    element given as second arg.

    Equal elements may occur multiple times.

    SortedCollection uses quickSort to resort and a binary search when adding/removing
    elements. 
    Because insertion/removal may require that remaining elements have to
    be shifted within the container, adding many individual elements may be done faster
    by creating a completely new collection from the unsorted elements.
    (see examples)

    Thus a sortBlock of [:a :b | a < b] defines ascending sort-order,
    while [:a :b | a > b] defines descening order.
    The default sortBlock for SortedCollections is the first one.
"
!

examples
"
    when many individual elements are to be added, it may be
    better to add them all unsorted & resort the collection completely.
    The reason is that for each individual #add:, the contents has to be
    moved around, to create an empty slot for the new element.

    timing example:

        |o rnd|

        o := SortedCollection new.
        rnd := Random new.
        1000 timesRepeat:[
            o add:rnd next.
        ]

    takes 488ms on a DS3100 (admitted: this is a very slow machine ;-)
    In contrast:

        |o rnd|

        o := OrderedCollection new.
        rnd := Random new.
        1000 timesRepeat:[
            o add:rnd next.
        ].
        o := o asSortedCollection

    takes 336ms on the same machine.
    Things become more drastic with more elements; with 10000 elements,
    the times are: 15418 ms  vs.  4332 ms
"
! !

!SortedCollection class methodsFor:'initialization'!

initialize
    "setup the default sortBlock."

    "/ only do this once at early startup
    DefaultSortBlock isNil ifTrue:[
        DefaultSortBlock := [:a :b | a < b ]
    ]

    "
     SortedCollection initialize
    "

    "Modified: 12.4.1996 / 12:29:27 / cg"
! !

!SortedCollection class methodsFor:'instance creation'!

new
    "return a new sortedCollection, the sorting is done using
     a compare for a < b, in ascending order"

    ^ super new setSortBlock:DefaultSortBlock

    "Modified: 12.4.1996 / 12:28:18 / cg"
!

new:size
    "return a new sortedCollection with preallocated size.
     The sorting is done using a compare for a < b, in ascending order"

    ^ (super new:size) setSortBlock:DefaultSortBlock

    "Modified: 12.4.1996 / 12:28:22 / cg"
!

sortBlock:aBlock
    "return a new sortedCollection, whe the sort order is defined
     by aBlock. 
     This must be a two-argument block which returns true if its arg1 has to come before
     its arg2 in the collection."

    ^ super new setSortBlock:aBlock

    "default:
     |s|

     s := SortedCollection new.
     s add:15; add:99; add:3; add:-29; add:17; add:-6.
     Transcript showCr:s
    "

    "sorting by absolute values:
     |s|

     s := SortedCollection sortBlock:[:a :b | a abs < b abs].
     s add:15; add:99; add:3; add:29; add:17; add:-6.
     Transcript showCr:s
    "

    "default again:
     |s|

     s := SortedCollection new.
     s add:'foo'; add:'Bar'; add:'baz'; add:'hello'; add:'world'; add:'Wow'.
     Transcript showCr:s
    "

    "sorting strings caseless:
     |s|

     s := SortedCollection sortBlock:[:a :b | a asLowercase < b asLowercase].
     s add:'foo'; add:'Bar'; add:'baz'; add:'hello'; add:'world'; add:'Wow'.
     Transcript showCr:s
    "

    "Modified: 12.4.1996 / 12:26:28 / cg"
!

withAll:aCollection sortBlock:aBlock
    "initialize from aCollection and set the sort-block"

    ^ (self sortBlock:aBlock) addAll:aCollection

    "
     SortedCollection withAll:#(1 2 3 4 5 6 7 8 9 0)
                      sortBlock:[:a :b | a > b] 
    "

    "default:
     |s|

     s := SortedCollection withAll:#(15 99 3 29 17 -6).
     Transcript showCr:s
    "

    "sorting by absolute values:
     |s|

     s := SortedCollection withAll:#(15 99 3 29 17 -6) sortBlock:[:a :b | a abs < b abs].
     Transcript showCr:s
    "

    "default again:
     |s|

     s := SortedCollection withAll:#('foo' 'Bar' 'baz' 'hello' 'world' 'Wow').
     Transcript showCr:s
    "

    "sorting strings caseless:
     |s|

     s := SortedCollection withAll:#('foo' 'Bar' 'baz' 'hello' 'world' 'Wow') sortBlock:[:a :b | a asLowercase < b asLowercase].
     Transcript showCr:s
    "

    "Modified: 12.4.1996 / 12:28:09 / cg"
! !

!SortedCollection methodsFor:'adding & removing'!

add:anObject
    "add the argument, anObject at the proper place in the
     receiver. Returns the argument, anObject."

    |index|

    lastIndex < firstIndex "i.e. self size == 0" ifTrue:[
        super add:anObject
    ] ifFalse:[
        index := self indexForInserting:anObject. 
        index := self makeRoomAtIndex:index.
        contentsArray at:index put:anObject
    ].
    ^ anObject

    "
     |c| #(7 3 9 10 99) asSortedCollection add:5; yourself    
     #(7 3 9 10 99) asSortedCollection add:1; yourself        
     #(7 3 9 10 99) asSortedCollection add:1000; yourself     
    "

    "Modified: 12.4.1996 / 16:47:41 / cg"
!

add:newObject after:oldObject
    "catch this - its not allowed for sortedCollections"

    self shouldNotImplement
!

add:newObject before:oldObject
    "catch this - its not allowed for sortedCollections"

    self shouldNotImplement
!

addAll:aCollection
    "add all elements of the argument, aCollection to the
     receiver"

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }"
     dstIndex  "{ Class: SmallInteger }"
     newSize newContents|

    "if aCollection is bigger than a threshhold, its faster
     to add all en-bloque and resort the whole collection.
     Question: what is a good treshhold ?"

    mySize := self size.
    otherSize := aCollection size.
    ((mySize == 0) or:[otherSize > 5]) ifTrue:[
	newSize := mySize + otherSize.
	newContents := Array new:newSize.
	newContents replaceFrom:1 to:mySize with:contentsArray startingAt:1.
	aCollection isSequenceable ifTrue:[
	    "maybe we can do it in one big move"
	    newContents replaceFrom:(mySize + 1) to:newSize with:aCollection startingAt:1.
	] ifFalse:[
	    dstIndex := mySize + 1.
	    aCollection do:[:element |
		newContents at:dstIndex put:element.
		dstIndex := dstIndex + 1
	    ]
	].
	firstIndex := 1.
	lastIndex := newSize.
	contentsArray := newContents.
	contentsArray sort:sortBlock.
	^ self
    ].
    super addAll:aCollection

    "
     #(7 3 9 10 99) asSortedCollection addAll:#(77 0 1 16 5) 
    "
!

addFirst:anObject
    "catch this - its not allowed for sortedCollections"

    self shouldNotImplement
!

addLast:anObject
    "catch this - its not allowed for sortedCollections"

    self shouldNotImplement
!

at:index put:anObject
    "catch this - its not allowed for sortedCollections"

    self shouldNotImplement
! !

!SortedCollection methodsFor:'converting'!

asSortedCollection
    "return the receiver as a sorted collection"

    "could be an instance of a subclass..."
    self class == SortedCollection ifTrue:[
	^ self
    ].
    ^ super asSortedCollection
! !

!SortedCollection methodsFor:'copying'!

copyEmpty:size
    "return a copy of the receiver with no elements, but
     the same size. This method has been be redefined to
     preserve the sortBlock."

    ^ (self species new:size) sortBlock:sortBlock
!

postCopyFrom:anOriginal
    "sent after a copy or when a new collection species has been created.
     The new collection should have the same sortBlock as the original."

    sortBlock := anOriginal sortBlock

    "
     #(4 7 1 99 -1 17) asSortedCollection inspect
     #(4 7 1 99 -1 17) asSortedCollection copy inspect
     (#(4 7 1 99 -1 17) asSortedCollection sortBlock:[:a :b | a > b]) inspect
     (#(4 7 1 99 -1 17) asSortedCollection sortBlock:[:a :b | a > b]) copy inspect
     (#(4 7 1 99 -1 17) asSortedCollection select:[:e| e even]) inspect
    "
! !

!SortedCollection methodsFor:'enumerating'!

collect:aBlock
    "evaluate the argument, aBlock for every element in the collection
     and return a collection of the results. Redefined to return an OrderedCollection;
     see X3J20 spec. (SortedCollection>>collect: should return an OrderedCollection)"

    |newCollection
     start  "{ Class:SmallInteger }"
     stop   "{ Class:SmallInteger }" |

    newCollection := OrderedCollection new:(self size).
    stop := lastIndex.
    start := firstIndex.
    start to:stop do:[:index |
	newCollection add:(aBlock value:(contentsArray at:index)).
    ].
    ^ newCollection
! !

!SortedCollection methodsFor:'instance protocol'!

sortBlock
    "return the block used for sorting"

    ^ sortBlock
!

sortBlock:aSortBlock
    "change the sort criteria for a sorted collection, resort the elements of 
    the collection, and return the receiver. The argument, aSortBlock must
    be a two-argument block which returns true if its arg1 has to come before
    its arg2 in the collection."

    sortBlock := aSortBlock.
    lastIndex > firstIndex ifTrue:[
	contentsArray quickSortFrom:firstIndex to:lastIndex sortBlock:aSortBlock
    ]

    "
     #(9 8 7 6 5 4 3) asSortedCollection
     #(9 8 7 6 5 4 3) asSortedCollection sortBlock:[:a : b | a < b]
     #(9 8 7 6 5 4 3) asSortedCollection sortBlock:[:a : b | a > b]
     #($f $G $z $Y $o $H) asSortedCollection
     #($f $G $z $Y $o $H) asSortedCollection sortBlock:[:a : b | a asUppercase < b asUppercase]
    "
! !

!SortedCollection methodsFor:'private'!

indexForInserting:anObject
    "search the index at which to insert anObject. 
     Can also be used to search for an existing element
     by checking if the element at the returned index is the one we look for.
     Uses a binarySearch since we can depend on the elements being on sorted order.
     The returned index is a physical one, for accessing contentsArray."

    |low    "{ Class: SmallInteger}"
     high   "{ Class: SmallInteger}"
     middle "{ Class: SmallInteger}"
     element|

    "
     we can of course use a binary search - since the elements are sorted
    "
    low := firstIndex.
    high := lastIndex.
    [low <= high] whileTrue:[
        middle := (low + high) // 2.
        element := contentsArray at:middle.
        (sortBlock value:element value:anObject) ifTrue:[
            "middleelement is smaller than object"
            low := middle + 1
        ] ifFalse:[
            high := middle - 1
        ]
    ].
    ^ low

    "
     #(1 2 3 4 7 99 1313 981989 898989898) asSortedCollection indexForInserting:50      
     #(1.0 2.0 3 4 7 49.0 51.0 99 1313 981989 898989898) asSortedCollection indexForInserting:50 
    "

    "Modified: 12.4.1996 / 13:22:03 / cg"
!

setSortBlock: aSortBlock
    "set the sortblock without resorting - private only"

    sortBlock := aSortBlock
! !

!SortedCollection methodsFor:'searching'!

after:anObject
    "return the element after the argument, anObject; or nil if there is none.
     If anObject is contained multiple times in the collection, return the
     the first element which is non-equal to anObject.
     If the receiver does not contain anObject, report an error"

    ^ self after:anObject ifAbsent:[self errorValueNotFound:anObject]

    "
     #(7 3 9 10 99) asSortedCollection after:50
     #(7 3 9 10 99) asSortedCollection after:1 
     #(7 3 9 10 99) asSortedCollection after:10
     #(7 3 9 10 99) asSortedCollection after:7 
     #(7 3 9 10 99) asSortedCollection after:99 
     #(7 10 3 10 9 10 10 99) asSortedCollection after:9  
     #(7 10 3 10 9 10 10 99) asSortedCollection after:10 
    "
!

after:anObject ifAbsent:exceptionBlock
    "return the element after the argument, anObject; or nil if there is none.
     If anObject is contained multiple times in the collection, return the
     the first element which is non-equal to anObject.
     If the receiver does not contain anObject, return the result from evaluating
     exceptionBlock."

    |index      "{ Class: SmallInteger }"
     last       "{ Class: SmallInteger }"|

    index := self indexForInserting:anObject.
    ((index < firstIndex) 
     or:[(contentsArray at:index) ~= anObject]) ifTrue:[^ exceptionBlock value].

    "skip multiple occurences of the same ..."

    last := lastIndex.
    [(index <= last) and:[(contentsArray at:index) = anObject]] whileTrue:[
        index := index + 1
    ].
    (index > last) ifTrue:[^ nil].
    ^ contentsArray at:index

    "Modified: 12.4.1996 / 13:20:33 / cg"
!

before:anObject
    "return the element before the argument, anObject; or nil if there is none.
     If the receiver does not contain anObject, report an error"

    ^ self before:anObject ifAbsent:[self errorValueNotFound:anObject]

    "
     #(7 3 9 10 99) asSortedCollection before:50
     #(7 3 9 10 99) asSortedCollection before:1 
     #(7 3 9 10 99) asSortedCollection before:1000 
     #(7 3 9 10 99) asSortedCollection before:10
     #(7 3 9 10 99) asSortedCollection before:7 
     #(7 3 9 10 99) asSortedCollection before:99 
     #(7 10 3 10 9 10 10 99) asSortedCollection before:9
     #(7 10 3 10 9 10 10 99) asSortedCollection before:10
    "
!

before:anObject ifAbsent:exceptionBlock
    "return the element before the argument, anObject; or nil if there is none.
     If the receiver does not contain anObject, return the result from evaluating
     exceptionBlock."

    |index      "{ Class: SmallInteger }"|

    index := self indexForInserting:anObject.
    ((index <= firstIndex) 
     or:[(contentsArray at:index) ~= anObject]) ifTrue:[^ exceptionBlock value].

    ^ contentsArray at:index - 1

    "
     #(7 3 9 10 99) asSortedCollection before:50
     #(7 3 9 10 99) asSortedCollection before:1 
     #(7 3 9 10 99) asSortedCollection before:10  
     #(7 3 9 10 99) asSortedCollection before:7   
     #(7 3 9 10 99) asSortedCollection before:99   
     #(7 10 3 10 9 10 10 99) asSortedCollection before:9  
     #(7 10 3 10 9 10 10 99) asSortedCollection before:10   
    "
! !

!SortedCollection methodsFor:'testing'!

includes:anObject
    "return true, if the argument, anObject is in the collection.
     Redefined, since due to being sorted, the inclusion check can
     be done with log-n compares i.e. much faster."

    |index "{ Class: SmallInteger }"|

    index := self indexForInserting:anObject.
    ((index < firstIndex) or:[index > lastIndex]) ifTrue:[^ false].
    ^ (contentsArray at:index) = anObject

    "
     #(7 3 9 10 99) asSortedCollection includes:50
     #(7 3 9 10 99) asSortedCollection includes:10
    "
!

occurrencesOf:anObject
    "return how many times the argument, anObject is in the collection.
     Redefined, since due to being sorted, the range of checked objects
     can be limited i.e. it can be done much faster."

    |index      "{ Class: SmallInteger }"
     tally      "{ Class: SmallInteger }"
     last       "{ Class: SmallInteger }" |

    index := self indexForInserting:anObject.
    last := lastIndex.    
    ((index < firstIndex) or:[index > last]) ifTrue:[^ 0].

    "/ there may be multiple of them; count 'em

    tally := 0.
    [(index <= last) and:[(contentsArray at:index) = anObject]] whileTrue:[
        tally := tally + 1.
        index := index + 1
    ].
    ^ tally

    "
     #(7 3 9 10 99) asSortedCollection occurrencesOf:50
     #(7 3 9 10 99) asSortedCollection occurrencesOf:10
     #(7 10 3 10 9 10 10 99) asSortedCollection occurrencesOf:10
    "

    "Modified: 12.4.1996 / 18:48:40 / cg"
! !

!SortedCollection class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/SortedCollection.st,v 1.28 1996-04-13 11:27:24 cg Exp $'
! !
SortedCollection initialize!