--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/RunArray.st Sat May 11 12:47:55 1996 +0200
@@ -0,0 +1,803 @@
+"
+ This class is not covered by or part of the ST/X licence.
+
+
+ COPYRIGHT.
+ The above file is a Manchester Goodie protected by copyright.
+ These conditions are imposed on the whole Goodie, and on any significant
+ part of it which is separately transmitted or stored:
+ * You must ensure that every copy includes this notice, and that
+ source and author(s) of the material are acknowledged.
+ * These conditions must be imposed on anyone who receives a copy.
+ * The material shall not be used for commercial gain without the prior
+ written consent of the author(s).
+ Further information on the copyright conditions may be obtained by
+ sending electronic mail:
+ To: goodies-lib@cs.man.ac.uk
+ Subject: copyright
+ or by writing to The Smalltalk Goodies Library Manager, Dept of
+ Computer Science, The University, Manchester M13 9PL, UK
+
+ (C) Copyright 1993 University of Manchester
+ For more information about the Manchester Goodies Library (from which
+ this file was distributed) send e-mail:
+ To: goodies-lib@cs.man.ac.uk
+ Subject: help
+"
+
+
+
+SequenceableCollection subclass:#RunArray
+ instanceVariableNames:'runs'
+ classVariableNames:''
+ poolDictionaries:''
+ category:'Collections-Sequenceable'
+!
+
+RunArray comment:'This implements an ordered collection which uses runs to minimise the amount
+ of data that it holds. Basically it should be used if you know that an ordered
+ collections is giong to contain a lot of runs of eactly the same data. Implemented
+ to allow simultation playback, since the ordered collctions which that generates
+ are so big that the complier falls over, though most of it is extremely repetetive.
+ This should be totally abstracted. The user should not be a ble to see the difference
+ between an ordered collection and a ComrpessedOrderedCollection. This has a
+ lot in common with RunArray, and the two should probably share implementation.
+ but I could not do some of the things I wanted with the RunArray code, so this
+ is all done on its own.
+ Some of this could be made faster by adding a cache of the start and finish
+ indices of each run, but since I envisage that most additions etc. will be to
+ and from the end those are not included. In addition I have implemented the
+ bare essentials of this for what I need it for - i.e. add to the end and take
+ off the beginning.'!
+
+!RunArray class methodsFor:'documentation'!
+
+copyright
+"
+ This class is not covered by or part of the ST/X licence.
+
+
+ COPYRIGHT.
+ The above file is a Manchester Goodie protected by copyright.
+ These conditions are imposed on the whole Goodie, and on any significant
+ part of it which is separately transmitted or stored:
+ * You must ensure that every copy includes this notice, and that
+ source and author(s) of the material are acknowledged.
+ * These conditions must be imposed on anyone who receives a copy.
+ * The material shall not be used for commercial gain without the prior
+ written consent of the author(s).
+ Further information on the copyright conditions may be obtained by
+ sending electronic mail:
+ To: goodies-lib@cs.man.ac.uk
+ Subject: copyright
+ or by writing to The Smalltalk Goodies Library Manager, Dept of
+ Computer Science, The University, Manchester M13 9PL, UK
+
+ (C) Copyright 1993 University of Manchester
+ For more information about the Manchester Goodies Library (from which
+ this file was distributed) send e-mail:
+ To: goodies-lib@cs.man.ac.uk
+ Subject: help
+"
+
+
+!
+
+documentation
+"
+ This implements an array which uses runs to minimise the amount
+ of data that it holds. Basically it should be used if you know that an array
+ is going to contain a lot of runs of exactly the same data.
+
+ The user should not be able to see the difference between an Array and a RunArray. This has a
+
+ Notice:
+ there is only a space saving if there are really runs (i.e. multiple
+ elements which compare same).
+ Otherwise, an Array is more compact.
+
+
+ [author:]
+ Claus Gittinger
+
+ [see also:]
+ Array OrderedCollection CompressedorderedCollection
+"
+!
+
+examples
+"
+ this eats up a lot of memory ...
+ ... but its relatively fast:
+ [exBegin]
+ |coll|
+
+ coll := OrderedCollection new.
+ Transcript showCr:(
+ Time millisecondsToRun:[
+ 100000 timesRepeat:[coll add:'hello'].
+ 100000 timesRepeat:[coll add:'world'].
+ ]
+ ).
+ coll inspect.
+ [exEnd]
+
+
+ this is very space efficient ...
+ ... but much slower:
+ [exBegin]
+ |coll|
+
+ coll := RunArray new.
+ Transcript showCr:(
+ Time millisecondsToRun:[
+ 100000 timesRepeat:[coll add:'hello'].
+ 100000 timesRepeat:[coll add:'world'].
+ ]
+ ).
+ coll inspect.
+ [exEnd]
+
+
+ this is very space efficient ...
+ ... AND much faster:
+ [exBegin]
+ |coll|
+
+ coll := RunArray new.
+ Transcript showCr:(
+ Time millisecondsToRun:[
+ coll add:'hello' withCount:100000.
+ coll add:'world' withCount:100000.
+ ]
+ ).
+ coll inspect.
+ [exEnd]
+
+
+ this is very space INEFFICIENT (a real memory pig ;-) ...
+ ... and much slower:
+ [exBegin]
+ |coll|
+
+ coll := RunArray new.
+ Transcript showCr:(
+ Time millisecondsToRun:[
+ 1 to:1000 do:[:i | coll add:i].
+ ]
+ ).
+ coll inspect.
+ [exEnd]
+
+
+ things like this are VERY slow:
+ [exBegin]
+ |coll|
+
+ coll := RunArray new.
+ 1 to:1000 do:[:i | coll add:i].
+ Transcript showCr:(
+ Time millisecondsToRun:[
+ 1000 to:1 by:-2 do:[:i | coll removeIndex:i].
+ ]
+ )
+ [exEnd]
+
+ much faster with OCs (although these are not optimal for this, too):
+ [exBegin]
+ |coll|
+
+ coll := OrderedCollection new.
+ 1 to:1000 do:[:i | coll add:i].
+ Transcript showCr:(
+ Time millisecondsToRun:[
+ 1000 to:1 by:-2 do:[:i | coll removeIndex:i].
+ ]
+ )
+ [exEnd]
+
+ in general, such things are better done by constructing a new collection
+ from scratch (use #select: or #collect:)
+"
+! !
+
+!RunArray class methodsFor:'instance creation'!
+
+new:size
+ "ignore the size argument - we dont know how many runs are
+ needed - anyway"
+
+ ^ self new
+! !
+
+!RunArray methodsFor:'accessing'!
+
+at:anInteger
+ "Answer the element at index anInteger.
+ at: is used by a knowledgeable client to access an existing element
+ This is a pretty dumb thing to do a compressed collection and it is
+ not at all efficient (think of that as a discouragement)."
+
+ |position "{ Class: SmallInteger }"
+ nRuns "{ Class: SmallInteger }"|
+
+ (anInteger > 0) ifTrue:[
+ position := 1.
+ nRuns := runs size.
+ 1 to:nRuns by:2 do:[:runIndex |
+ |runLen|
+
+ runLen := runs at:runIndex.
+ anInteger >= position ifTrue:[
+ anInteger < (position + runLen) ifTrue:[
+ ^ runs at:(runIndex + 1)
+ ].
+ ].
+ position := position + runLen
+ ]
+ ].
+ ^ self errorInvalidKey:anInteger
+
+ "
+ |c|
+
+ c := RunArray new.
+ c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5.
+ c at:1.
+ c at:2.
+ c at:3.
+ c at:4.
+ c at:5.
+ c at:6.
+ c at:7.
+ c at:8.
+ c at:9.
+ c at:10.
+ "
+
+ "Modified: 10.5.1996 / 16:59:03 / cg"
+!
+
+at:index put:anObject
+ "Put anObject at element index anInteger.
+ at:put: can not be used to append, front or back, to an ordered
+ collection; it is used by a knowledgeable client to replace an
+ element. It doesn't make a lot of sense for a compressed collection,
+ and as you can see, the implementation is awful - still if you will
+ insist on using this what can you expect. Basically this just copies
+ itself up to the start point then adds the required element then
+ copies the rest of itself"
+
+ |runSz runIndex runOffset len l1 l2 prevIdx nextIdx
+ val newRuns newValues prevLen prevVal nextLen nextVal idx|
+
+ runSz := runs size.
+
+ runIndex := nil.
+ (index > 0) ifTrue:[
+ runOffset := 1.
+ idx := 1.
+ [runIndex isNil and:[idx < runSz]] whileTrue:[
+ len := runs at:idx.
+ nextIdx := runOffset + len.
+ index >= runOffset ifTrue:[
+ index < nextIdx ifTrue:[
+ runIndex := idx.
+ nextIdx := runOffset. "/ dont advance
+ ].
+ ].
+ runOffset := nextIdx.
+ idx := idx + 2.
+ ]
+ ].
+ runIndex isNil ifTrue:[
+ ^ self errorInvalidKey:index
+ ].
+
+ val := runs at:(runIndex + 1).
+
+ "/ easiest: value there is the same ...
+
+ val = anObject ifTrue:[
+ ^ anObject
+ ].
+
+ "/ if the length is 1, this is an island ...
+ "/ ... which is either easy, or requires a merge.
+
+ len := runs at:runIndex.
+ len = 1 ifTrue:[
+ "/ check if it can be merged into the next or previous run
+
+ runIndex > 1 ifTrue:[
+ prevIdx := runIndex - 2.
+ prevVal := runs at:(prevIdx + 1).
+ prevVal = anObject ifTrue:[
+ "/ can merge it into previous
+
+ prevLen := runs at:prevIdx.
+
+ "/ check if merge into next is also possible (filling an island)
+
+ runIndex < (runSz - 1) ifTrue:[
+ nextIdx := runIndex + 2.
+ nextVal := runs at:(nextIdx + 1).
+ nextVal = anObject ifTrue:[
+ "/ can merge with both.
+
+ nextLen := runs at:nextIdx.
+
+ runs at:prevIdx put:prevLen+nextLen+1.
+ runSz := (runSz - 4).
+ newRuns := Array new:runSz.
+ newRuns replaceFrom:1 to:(prevIdx + 1) with:runs startingAt:1.
+ newRuns replaceFrom:runIndex to:runSz with:runs startingAt:nextIdx+2.
+ runs := newRuns.
+ ^ anObject
+ ]
+ ].
+
+ runs at:prevIdx put:prevLen+1.
+
+ runSz := (runSz - 2).
+ newRuns := Array new:runSz.
+ newRuns replaceFrom:1 to:(runIndex - 1) with:runs startingAt:1.
+ newRuns replaceFrom:runIndex to:runSz with:runs startingAt:runIndex+2.
+ runs := newRuns.
+
+ ^ anObject
+ ].
+ ].
+
+ "/ check if merge into next is possible
+
+ runIndex < runSz ifTrue:[
+ nextIdx := runIndex + 2.
+ nextVal := runs at:nextIdx+1.
+ nextVal = anObject ifTrue:[
+ nextLen := runs at:nextIdx.
+ runs at:nextIdx put:nextLen + 1.
+
+ runSz := (runSz - 2).
+ newRuns := Array new:runSz.
+ newRuns replaceFrom:1 to:(runIndex - 1) with:runs startingAt:1.
+ newRuns replaceFrom:runIndex to:runSz with:runs startingAt:nextIdx.
+ runs := newRuns.
+ ^ anObject
+ ].
+ ].
+
+ "/ no merge; island remains
+
+ runs at:(runIndex+1) put:anObject.
+ ^ anObject
+ ].
+
+ runOffset == index ifTrue:[
+ "/ at the beginning of that run ...
+
+ "/ check if its better added to the previous run ...
+
+ runIndex > 1 ifTrue:[
+ prevIdx := runIndex - 2.
+ prevVal := runs at:prevIdx+1.
+ prevVal = anObject ifTrue:[
+ prevLen := runs at:prevIdx.
+ runs at:prevIdx put:prevLen + 1.
+ runs at:runIndex put:len - 1.
+ ^ anObject.
+ ].
+ ].
+
+ "/ must cut off 1 & insert a new run before ..
+
+ runs at:runIndex put:len - 1.
+
+ runSz := (runSz + 2).
+ newRuns := Array new:runSz.
+ newRuns replaceFrom:1 to:(runIndex - 1) with:runs startingAt:1.
+ newRuns replaceFrom:runIndex+2 to:runSz with:runs startingAt:runIndex.
+ runs := newRuns.
+
+ runs at:runIndex put:1.
+ runs at:runIndex+1 put:anObject.
+ ^ anObject
+ ].
+
+ (runOffset + len - 1) == index ifTrue:[
+ "/ at the end ...
+
+ "/ check if its better added to the next run ...
+
+ runIndex < runSz ifTrue:[
+ nextIdx := runIndex + 2.
+ nextVal := runs at:nextIdx+1.
+ nextVal = anObject ifTrue:[
+ nextLen := runs at:nextIdx.
+ runs at:nextIdx put:nextLen + 1.
+ runs at:runIndex put:len - 1.
+ ^ anObject.
+ ].
+ ].
+
+ "/ must cut off 1 & insert a new run after ..
+
+ runs at:runIndex put:len - 1.
+
+ runSz := (runSz + 2).
+ newRuns := Array new:runSz.
+ newRuns replaceFrom:1 to:(runIndex + 1) with:runs startingAt:1.
+ newRuns replaceFrom:runIndex+4 to:runSz with:runs startingAt:runIndex+2.
+ runs := newRuns.
+
+ runs at:runIndex+2 put:1.
+ runs at:runIndex+2+1 put:anObject.
+ ^ anObject
+ ].
+
+ "/ hardest - split run into two, insert new run in-between
+
+ runSz := (runSz + 4).
+ newRuns := Array new:runSz.
+
+ runIndex > 1 ifTrue:[
+ newRuns replaceFrom:1 to:runIndex-1 with:runs.
+ ].
+ newRuns replaceFrom:runIndex+6 to:(runSz+4) with:runs startingAt:runIndex+2.
+
+ l2 := len - (index - runOffset).
+ l1 := len - l2.
+ l2 := l2 - 1.
+
+ newRuns at:runIndex put:l1.
+ newRuns at:runIndex+1 put:val.
+
+ newRuns at:runIndex+4 put:l2.
+ newRuns at:runIndex+5 put:val.
+
+ "/ insert
+ newRuns at:runIndex+2 put:1.
+ newRuns at:runIndex+3 put:anObject.
+
+ runs := newRuns.
+ ^ anObject
+
+ "
+ |c|
+
+ Transcript cr.
+
+ c := RunArray new.
+ c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5; yourself.
+ Transcript showCr:c.
+
+ c at:1 put:$a.
+ Transcript showCr:c.
+ c.
+
+ c at:3 put:$a.
+ Transcript showCr:c.
+ c.
+
+ c at:4 put:$a.
+ Transcript showCr:c.
+ c.
+
+ c at:5 put:$a.
+ Transcript showCr:c.
+ c.
+
+ c at:2 put:$0.
+ Transcript showCr:c.
+ c.
+
+ c at:2 put:$a.
+ Transcript showCr:c.
+ c.
+
+ Transcript showCr:c.
+ "
+
+ "Modified: 11.5.1996 / 08:41:49 / cg"
+!
+
+size
+ "Answer how many elements the receiver contains."
+
+ |n "{ Class: SmallInteger }"
+ runSz "{ Class: SmallInteger }"|
+
+ n := 0.
+ runSz := runs size.
+ 1 to:runSz by:2 do:[:i |
+ n := n + (runs at:i)
+ ].
+ ^ n
+! !
+
+!RunArray methodsFor:'adding'!
+
+add:newObject
+ "add newObject at the end; returns the object (sigh)"
+
+ ^ self add:newObject withOccurrences:1.
+
+ "
+ |c|
+
+ c := RunArray new.
+ c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5; yourself.
+ "
+
+ "Modified: 10.5.1996 / 17:01:20 / cg"
+!
+
+add:newObject withOccurrences:n
+ "add newObject n times at the end; returns the object (sigh)"
+
+ |lastIdx runSz newRuns|
+
+ runs notNil ifTrue:[
+ "/ check for merge
+
+ runSz := runs size.
+
+ (runs at:runSz) = newObject ifTrue:[
+ lastIdx := runSz - 1.
+ runs at:lastIdx put:(runs at:lastIdx) + n.
+ ^ newObject
+ ].
+
+ newRuns := Array new:(runSz + 2).
+ newRuns replaceFrom:1 to:runSz with:runs.
+ newRuns at:runSz+1 put:n.
+ newRuns at:runSz+2 put:newObject.
+ runs := newRuns.
+ ] ifFalse:[
+ runs := Array with:n with:newObject.
+ ].
+ ^ newObject
+
+ "
+ |c|
+
+ c := RunArray new.
+ c add:1 withOccurrences:1000; yourself.
+ c add:2 withOccurrences:1000; yourself.
+ "
+
+ "Modified: 10.5.1996 / 18:29:56 / cg"
+!
+
+grow:howBig
+ "grow or shrink the receiver to contain howBig elements.
+ If growing, new slots will be filled with nil."
+
+ |sz info runIndex runOffset runSz newRuns|
+
+ sz := self size.
+
+ howBig == sz ifTrue:[^ self].
+
+ howBig == 0 ifTrue:[
+ runs := nil.
+ ^ self.
+ ].
+
+ runs isNil ifTrue:[
+ runs := Array with:howBig with:nil.
+ ^ self
+ ].
+
+ runSz := runs size.
+
+ howBig > sz ifTrue:[
+ newRuns := Array new:(runSz + 2).
+ newRuns replaceFrom:1 to:runSz with:runs startingAt:1.
+ newRuns at:(runSz+1) put:(howBig - sz).
+ runs := newRuns.
+ ^ self
+ ].
+
+ "/ shrinking; possibly cut of a run
+
+ info := self runIndexAndStartIndexForIndex:howBig.
+ runIndex := info at:1.
+ runOffset := info at:2.
+
+ howBig == (runOffset - 1) ifTrue:[
+ "/ we are lucky - new size is up-to previous run
+
+ runs := runs copyFrom:1 to:runIndex-1.
+ ] ifFalse:[
+ runs := runs copyFrom:1 to:(runIndex+1).
+ runs at:runIndex put:(howBig - runOffset + 1)
+ ].
+
+ "
+ |c|
+
+ c := RunArray new.
+ c addAll:#(1 2 3 4 4 4 4 5 5 5 1 2 3); yourself.
+
+ c grow:50; yourself.
+
+ c grow:7; yourself.
+
+ c grow:6; yourself.
+
+ c grow:0; yourself.
+ "
+
+ "Modified: 10.5.1996 / 18:11:54 / cg"
+! !
+
+!RunArray methodsFor:'converting'!
+
+asOrderedCollection
+ "Uncompress this collection."
+
+ |newCollection|
+
+ newCollection := OrderedCollection new.
+ newCollection addAll:self.
+ ^ newCollection
+
+ "Modified: 10.5.1996 / 13:31:49 / cg"
+! !
+
+!RunArray methodsFor:'enumerating'!
+
+do:aBlock
+ "Evaluate aBlock with each of the receiver's elements as the
+ argument. "
+
+ runs notNil ifTrue:[
+ runs pairWiseDo:[:len :val |
+ len timesRepeat: [aBlock value:val]
+ ]
+ ]
+
+ "Modified: 10.5.1996 / 16:56:01 / cg"
+! !
+
+!RunArray methodsFor:'printing'!
+
+storeOn:aStream
+ "Append to aStream an expression which, if evaluated, will generate
+ an object similar to the receiver."
+
+ aStream nextPutAll: '(RunArray new'.
+ runs notNil ifTrue:[
+ runs pairWiseDo:[:len :val |
+ aStream nextPutAll: ' add:'; nextPutAll:val storeString.
+ len == 1 ifFalse:[
+ aStream nextPutAll:' withCount:'; nextPutAll:len printString.
+ ].
+ aStream nextPutAll:';'
+ ].
+ aStream nextPutAll:' yourself'
+ ].
+ aStream nextPutAll:')'
+
+ "
+ (RunArray new
+ add:1;
+ add:1;
+ add:2;
+ add:3;
+ add:4 withCount:100;
+ add:5;
+ yourself) storeString
+
+ RunArray new storeString
+ "
+! !
+
+!RunArray methodsFor:'private'!
+
+find:oldObject
+ "If I contain oldObject return its index, otherwise create an error
+ notifier. It will answer with the position of the very first element of
+ that value."
+
+ |position|
+
+ position := self find:oldObject ifAbsent:0.
+ position ~~ 0 ifTrue:[
+ ^ position
+ ].
+
+ self errorValueNotFound:oldObject
+
+ "
+ |c|
+
+ c := RunArray new.
+ c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5.
+ c find:2.
+
+ c find:99
+ "
+
+ "Modified: 10.5.1996 / 13:39:58 / cg"
+!
+
+find:oldObject ifAbsent:exceptionBlock
+ "If I contain oldObject return its index, otherwise execute the
+ exception block. It will answer with the position of the very first
+ element of that value."
+
+ |position|
+
+ position := 1.
+ runs notNil ifTrue:[
+ runs pairWiseDo:[:len :val |
+ val = oldObject ifTrue:[
+ ^ position
+ ].
+ position := position + len.
+ ].
+ ].
+ ^ exceptionBlock value
+
+ "Modified: 10.5.1996 / 13:54:25 / cg"
+!
+
+isEmpty
+ "Am I empty or not. Returns a boolean"
+
+ ^ runs notNil
+!
+
+runIndexAndStartIndexForIndex:anIndex
+ "given a logical index, find the index of that run and the logical index
+ of the first item in that run."
+
+ |position nRuns "{ Class: SmallInteger }"|
+
+ position := 1.
+ nRuns := runs size.
+ 1 to:nRuns by:2 do:[:runIndex |
+ |runLen runLast|
+
+ runLen := runs at:runIndex.
+ anIndex >= position ifTrue:[
+ runLast := position + runLen - 1.
+ anIndex <= runLast ifTrue:[
+ ^ Array with:runIndex with:position
+ ].
+ ].
+ position := position + runLen
+ ].
+
+ ^ #(0 0)
+
+ "Created: 10.5.1996 / 17:12:28 / cg"
+ "Modified: 10.5.1996 / 17:13:42 / cg"
+!
+
+runIndexForIndex:anIndex
+ "given a logical index, find the index of that run"
+
+ ^ (self runIndexAndStartIndexForIndex:anIndex) at:1
+
+ "Modified: 10.5.1996 / 17:13:45 / cg"
+! !
+
+!RunArray methodsFor:'user interface'!
+
+inspect
+ "Reimplement so that they don't get an ordered collection inspector
+ which would get very confused."
+
+ self basicInspect
+
+ "Modified: 10.5.1996 / 18:24:43 / cg"
+! !
+
+!RunArray class methodsFor:'documentation'!
+
+version
+ ^ '$Header: /cvs/stx/stx/libbasic2/RunArray.st,v 1.1 1996-05-11 10:47:55 cg Exp $'
+! !