RunArray.st
changeset 297 9526ea90d4f9
child 298 8a87aeffa1c0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/RunArray.st	Sat May 11 12:47:55 1996 +0200
@@ -0,0 +1,803 @@
+"
+ This class is not covered by or part of the ST/X licence.
+
+
+ COPYRIGHT.
+ The above file is a Manchester Goodie protected by copyright.
+ These conditions are imposed on the whole Goodie, and on any significant
+ part of it which is separately transmitted or stored:
+	* You must ensure that every copy includes this notice, and that
+	  source and author(s) of the material are acknowledged.
+	* These conditions must be imposed on anyone who receives a copy.
+	* The material shall not be used for commercial gain without the prior
+	  written consent of the author(s).
+ Further information on the copyright conditions may be obtained by
+ sending electronic mail:
+	To: goodies-lib@cs.man.ac.uk
+	Subject: copyright
+ or by writing to The Smalltalk Goodies Library Manager, Dept of
+ Computer Science, The University, Manchester M13 9PL, UK
+
+ (C) Copyright 1993 University of Manchester
+ For more information about the Manchester Goodies Library (from which 
+ this file was distributed) send e-mail:
+	To: goodies-lib@cs.man.ac.uk
+	Subject: help 
+"
+
+
+
+SequenceableCollection subclass:#RunArray
+	instanceVariableNames:'runs'
+	classVariableNames:''
+	poolDictionaries:''
+	category:'Collections-Sequenceable'
+!
+
+RunArray comment:'This implements an ordered collection which uses runs to minimise the amount
+ of data that it holds. Basically it should be used if you know that an ordered
+ collections is giong to contain a lot of runs of eactly the same data. Implemented
+ to allow simultation playback, since the ordered collctions which that generates
+ are so big that the complier falls over, though most of it is extremely repetetive.
+ This should be totally abstracted. The user should not be a ble to see the difference
+ between an ordered collection and a ComrpessedOrderedCollection.  This has a
+ lot in common with RunArray, and the two should probably share implementation.
+ but I could not do some of the things I wanted with the RunArray code, so this
+ is all done on its own.
+	Some of this could be made faster by adding a cache of the start and finish
+ indices of each run, but since I envisage that most additions etc. will be to
+ and from the end those are not included. In addition I have implemented the
+ bare essentials of this for what I need it for - i.e. add to the end and take
+ off the beginning.'!
+
+!RunArray class methodsFor:'documentation'!
+
+copyright
+"
+ This class is not covered by or part of the ST/X licence.
+
+
+ COPYRIGHT.
+ The above file is a Manchester Goodie protected by copyright.
+ These conditions are imposed on the whole Goodie, and on any significant
+ part of it which is separately transmitted or stored:
+	* You must ensure that every copy includes this notice, and that
+	  source and author(s) of the material are acknowledged.
+	* These conditions must be imposed on anyone who receives a copy.
+	* The material shall not be used for commercial gain without the prior
+	  written consent of the author(s).
+ Further information on the copyright conditions may be obtained by
+ sending electronic mail:
+	To: goodies-lib@cs.man.ac.uk
+	Subject: copyright
+ or by writing to The Smalltalk Goodies Library Manager, Dept of
+ Computer Science, The University, Manchester M13 9PL, UK
+
+ (C) Copyright 1993 University of Manchester
+ For more information about the Manchester Goodies Library (from which 
+ this file was distributed) send e-mail:
+	To: goodies-lib@cs.man.ac.uk
+	Subject: help 
+"
+
+
+!
+
+documentation
+"
+    This implements an array which uses runs to minimise the amount
+    of data that it holds. Basically it should be used if you know that an array
+    is going to contain a lot of runs of exactly the same data. 
+
+    The user should not be able to see the difference between an Array and a RunArray.  This has a
+
+    Notice:
+        there is only a space saving if there are really runs (i.e. multiple
+        elements which compare same).
+        Otherwise, an Array is more compact.
+
+
+    [author:]
+        Claus Gittinger
+
+    [see also:]
+        Array OrderedCollection CompressedorderedCollection
+"
+!
+
+examples
+"
+  this eats up a lot of memory ...
+  ... but its relatively fast:
+									[exBegin]
+    |coll|
+
+    coll := OrderedCollection new.
+    Transcript showCr:(    
+	Time millisecondsToRun:[
+	    100000 timesRepeat:[coll add:'hello'].
+	    100000 timesRepeat:[coll add:'world'].
+	]
+    ).
+    coll inspect.
+									[exEnd]
+
+
+  this is very space efficient ...
+  ... but much slower:
+									[exBegin]
+    |coll|
+
+    coll := RunArray new.
+    Transcript showCr:(    
+	Time millisecondsToRun:[
+	    100000 timesRepeat:[coll add:'hello'].
+	    100000 timesRepeat:[coll add:'world'].
+	]
+    ).
+    coll inspect.
+									[exEnd]
+
+
+  this is very space efficient ...
+  ... AND much faster:
+									[exBegin]
+    |coll|
+
+    coll := RunArray new.
+    Transcript showCr:(    
+	Time millisecondsToRun:[
+	    coll add:'hello' withCount:100000.
+	    coll add:'world' withCount:100000.
+	]
+    ).
+    coll inspect.
+									[exEnd]
+
+
+  this is very space INEFFICIENT (a real memory pig ;-) ...
+  ... and much slower:
+									[exBegin]
+    |coll|
+
+    coll := RunArray new.
+    Transcript showCr:(    
+	Time millisecondsToRun:[
+	    1 to:1000 do:[:i | coll add:i].
+	]
+    ).
+    coll inspect.
+									[exEnd]
+
+
+  things like this are VERY slow:
+									[exBegin]
+    |coll|
+
+    coll := RunArray new.
+    1 to:1000 do:[:i | coll add:i].
+    Transcript showCr:(    
+	Time millisecondsToRun:[
+	    1000 to:1 by:-2 do:[:i | coll removeIndex:i].
+	]
+    )
+									[exEnd]
+
+  much faster with OCs (although these are not optimal for this, too):
+									[exBegin]
+    |coll|
+
+    coll := OrderedCollection new.
+    1 to:1000 do:[:i | coll add:i].
+    Transcript showCr:(    
+	Time millisecondsToRun:[
+	    1000 to:1 by:-2 do:[:i | coll removeIndex:i].
+	]
+    )
+									[exEnd]
+
+  in general, such things are better done by constructing a new collection
+  from scratch (use #select: or #collect:)
+"
+! !
+
+!RunArray class methodsFor:'instance creation'!
+
+new:size
+    "ignore the size argument - we dont know how many runs are
+     needed - anyway"
+
+    ^ self new
+! !
+
+!RunArray methodsFor:'accessing'!
+
+at:anInteger 
+    "Answer the element at index anInteger. 
+     at: is used by a knowledgeable client to access an existing element 
+     This is a pretty dumb thing to do a compressed collection and it is 
+     not at all efficient (think of that as a discouragement)."
+
+    |position "{ Class: SmallInteger }"
+     nRuns    "{ Class: SmallInteger }"|
+
+    (anInteger > 0) ifTrue:[
+        position := 1.
+        nRuns := runs size.
+        1 to:nRuns by:2 do:[:runIndex |
+            |runLen|
+
+            runLen := runs at:runIndex.
+            anInteger >= position ifTrue:[
+                anInteger < (position + runLen) ifTrue:[
+                    ^ runs at:(runIndex + 1)
+                ].
+            ].
+            position := position + runLen
+        ]
+    ].
+    ^ self errorInvalidKey:anInteger
+
+    "
+     |c|
+
+     c := RunArray new.
+     c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5.
+     c at:1. 
+     c at:2. 
+     c at:3. 
+     c at:4.  
+     c at:5. 
+     c at:6. 
+     c at:7. 
+     c at:8.  
+     c at:9.  
+     c at:10.  
+    "
+
+    "Modified: 10.5.1996 / 16:59:03 / cg"
+!
+
+at:index put:anObject 
+    "Put anObject at element index anInteger.      
+     at:put: can not be used to append, front or back, to an ordered      
+     collection;  it is used by a knowledgeable client to replace an     
+     element. It doesn't make a lot of sense for a compressed collection,    
+     and as you can see, the implementation is awful - still if you will    
+     insist on using this what can you expect. Basically this just copies 
+     itself up to the start point then adds the required element then 
+     copies the rest of itself"
+
+    |runSz runIndex runOffset len l1 l2 prevIdx nextIdx
+     val newRuns newValues prevLen prevVal nextLen nextVal idx|
+
+    runSz := runs size.
+
+    runIndex := nil.
+    (index > 0) ifTrue:[
+        runOffset := 1.
+        idx := 1.
+        [runIndex isNil and:[idx < runSz]] whileTrue:[
+            len := runs at:idx.
+            nextIdx := runOffset + len.
+            index >= runOffset ifTrue:[
+                index < nextIdx ifTrue:[
+                    runIndex := idx.
+                    nextIdx := runOffset. "/ dont advance
+                ].
+            ].
+            runOffset := nextIdx.
+            idx := idx + 2.
+        ]
+    ].
+    runIndex isNil ifTrue:[
+        ^ self errorInvalidKey:index
+    ].
+
+    val := runs at:(runIndex + 1).
+
+    "/ easiest: value there is the same ...
+
+    val = anObject ifTrue:[
+        ^ anObject
+    ].
+
+    "/ if the length is 1, this is an island ...
+    "/ ... which is either easy, or requires a merge.
+
+    len := runs at:runIndex.
+    len = 1 ifTrue:[
+        "/ check if it can be merged into the next or previous run
+
+        runIndex > 1 ifTrue:[
+            prevIdx := runIndex - 2.
+            prevVal := runs at:(prevIdx + 1).
+            prevVal = anObject ifTrue:[
+                "/ can merge it into previous
+
+                prevLen := runs at:prevIdx.
+
+                "/ check if merge into next is also possible (filling an island)
+                
+                runIndex < (runSz - 1) ifTrue:[
+                    nextIdx := runIndex + 2.
+                    nextVal := runs at:(nextIdx + 1).
+                    nextVal = anObject ifTrue:[
+                        "/ can merge with both.
+                        
+                        nextLen := runs at:nextIdx.
+
+                        runs at:prevIdx put:prevLen+nextLen+1.
+                        runSz := (runSz - 4).
+                        newRuns := Array new:runSz.
+                        newRuns replaceFrom:1 to:(prevIdx + 1) with:runs startingAt:1.
+                        newRuns replaceFrom:runIndex to:runSz with:runs startingAt:nextIdx+2.
+                        runs := newRuns.
+                        ^ anObject
+                    ]
+                ].
+
+                runs at:prevIdx put:prevLen+1.
+
+                runSz := (runSz - 2).
+                newRuns := Array new:runSz.
+                newRuns replaceFrom:1 to:(runIndex - 1) with:runs startingAt:1.
+                newRuns replaceFrom:runIndex to:runSz with:runs startingAt:runIndex+2.
+                runs := newRuns.
+
+                ^ anObject
+            ].
+        ].
+
+        "/ check if merge into next is possible
+
+        runIndex < runSz ifTrue:[
+            nextIdx := runIndex + 2.
+            nextVal := runs at:nextIdx+1.
+            nextVal = anObject ifTrue:[
+                nextLen := runs at:nextIdx.
+                runs at:nextIdx put:nextLen + 1.
+
+                runSz := (runSz - 2).
+                newRuns := Array new:runSz.
+                newRuns replaceFrom:1 to:(runIndex - 1) with:runs startingAt:1.
+                newRuns replaceFrom:runIndex to:runSz with:runs startingAt:nextIdx.
+                runs := newRuns.
+                ^ anObject
+            ].
+        ].
+
+        "/ no merge; island remains
+
+        runs at:(runIndex+1) put:anObject.
+        ^ anObject
+    ].
+
+    runOffset == index ifTrue:[
+        "/ at the beginning of that run ...
+
+        "/ check if its better added to the previous run ...
+
+        runIndex > 1 ifTrue:[
+            prevIdx := runIndex - 2.
+            prevVal := runs at:prevIdx+1.
+            prevVal = anObject ifTrue:[
+                prevLen := runs at:prevIdx.
+                runs at:prevIdx put:prevLen + 1.
+                runs at:runIndex put:len - 1.
+                ^ anObject.
+            ].
+        ].
+
+        "/ must cut off 1 & insert a new run before ..
+
+        runs at:runIndex put:len - 1.
+
+        runSz := (runSz + 2).
+        newRuns := Array new:runSz.
+        newRuns replaceFrom:1 to:(runIndex - 1) with:runs startingAt:1.
+        newRuns replaceFrom:runIndex+2 to:runSz with:runs startingAt:runIndex.
+        runs := newRuns.
+
+        runs at:runIndex   put:1.
+        runs at:runIndex+1 put:anObject.
+        ^ anObject
+    ].
+
+    (runOffset + len - 1) == index ifTrue:[
+        "/ at the end ...
+
+        "/ check if its better added to the next run ...
+
+        runIndex < runSz ifTrue:[
+            nextIdx := runIndex + 2.
+            nextVal := runs at:nextIdx+1.
+            nextVal = anObject ifTrue:[
+                nextLen := runs at:nextIdx.
+                runs at:nextIdx put:nextLen + 1.
+                runs at:runIndex put:len - 1.
+                ^ anObject.
+            ].
+        ].
+
+        "/ must cut off 1 & insert a new run after ..
+
+        runs at:runIndex put:len - 1.
+
+        runSz := (runSz + 2).
+        newRuns := Array new:runSz.
+        newRuns replaceFrom:1 to:(runIndex + 1) with:runs startingAt:1.
+        newRuns replaceFrom:runIndex+4 to:runSz with:runs startingAt:runIndex+2.
+        runs := newRuns.
+
+        runs at:runIndex+2 put:1.
+        runs at:runIndex+2+1 put:anObject.
+        ^ anObject
+    ].
+
+    "/ hardest - split run into two, insert new run in-between
+
+    runSz := (runSz + 4).
+    newRuns := Array new:runSz.
+
+    runIndex > 1 ifTrue:[
+        newRuns replaceFrom:1 to:runIndex-1 with:runs.
+    ].
+    newRuns replaceFrom:runIndex+6 to:(runSz+4) with:runs startingAt:runIndex+2.
+
+    l2 := len - (index - runOffset).
+    l1 := len - l2.
+    l2 := l2 - 1.
+
+    newRuns at:runIndex   put:l1.
+    newRuns at:runIndex+1 put:val.
+
+    newRuns at:runIndex+4 put:l2.
+    newRuns at:runIndex+5 put:val.
+
+    "/ insert
+    newRuns at:runIndex+2 put:1.
+    newRuns at:runIndex+3 put:anObject.
+
+    runs := newRuns.
+    ^ anObject
+
+    "
+     |c|
+
+     Transcript cr.
+
+     c := RunArray new.
+     c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5; yourself.
+     Transcript showCr:c.   
+
+     c at:1 put:$a.
+     Transcript showCr:c.   
+     c.
+
+     c at:3 put:$a.
+     Transcript showCr:c.   
+     c.
+
+     c at:4 put:$a.   
+     Transcript showCr:c.   
+     c.
+
+     c at:5 put:$a.   
+     Transcript showCr:c.   
+     c.
+
+     c at:2 put:$0.   
+     Transcript showCr:c.   
+     c.
+
+     c at:2 put:$a.   
+     Transcript showCr:c.   
+     c.
+
+     Transcript showCr:c.   
+    "
+
+    "Modified: 11.5.1996 / 08:41:49 / cg"
+!
+
+size
+    "Answer how many elements the receiver contains."
+
+    |n     "{ Class: SmallInteger }"
+     runSz "{ Class: SmallInteger }"|
+
+    n := 0.
+    runSz := runs size.
+    1 to:runSz by:2 do:[:i |
+        n := n + (runs at:i)
+    ].
+    ^ n
+! !
+
+!RunArray methodsFor:'adding'!
+
+add:newObject
+    "add newObject at the end; returns the object (sigh)"
+
+    ^ self add:newObject withOccurrences:1.
+
+    "
+     |c|
+
+     c := RunArray new.
+     c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5; yourself.
+    "
+
+    "Modified: 10.5.1996 / 17:01:20 / cg"
+!
+
+add:newObject withOccurrences:n
+    "add newObject n times at the end; returns the object (sigh)"
+
+    |lastIdx runSz newRuns|
+
+    runs notNil ifTrue:[
+        "/ check for merge
+
+        runSz := runs size.
+
+        (runs at:runSz) = newObject ifTrue:[
+            lastIdx := runSz - 1.
+            runs at:lastIdx put:(runs at:lastIdx) + n.
+            ^ newObject
+        ].
+
+        newRuns := Array new:(runSz + 2).
+        newRuns replaceFrom:1 to:runSz with:runs.
+        newRuns at:runSz+1 put:n.
+        newRuns at:runSz+2 put:newObject.
+        runs := newRuns.
+    ] ifFalse:[
+        runs := Array with:n with:newObject.
+    ].
+    ^ newObject
+
+    "
+     |c|
+
+     c := RunArray new.
+     c add:1 withOccurrences:1000; yourself.
+     c add:2 withOccurrences:1000; yourself.
+    "
+
+    "Modified: 10.5.1996 / 18:29:56 / cg"
+!
+
+grow:howBig
+    "grow or shrink the receiver to contain howBig elements.
+     If growing, new slots will be filled with nil."
+
+    |sz info runIndex runOffset runSz newRuns|
+
+    sz := self size.
+
+    howBig == sz ifTrue:[^ self].
+
+    howBig == 0 ifTrue:[
+        runs := nil.
+        ^ self.
+    ].
+
+    runs isNil ifTrue:[
+        runs := Array with:howBig with:nil.
+        ^ self
+    ].
+
+    runSz := runs size.
+
+    howBig > sz ifTrue:[
+        newRuns := Array new:(runSz + 2).
+        newRuns replaceFrom:1 to:runSz with:runs startingAt:1.
+        newRuns at:(runSz+1) put:(howBig - sz).
+        runs := newRuns.
+        ^ self
+    ].
+
+    "/ shrinking; possibly cut of a run
+
+    info := self runIndexAndStartIndexForIndex:howBig.
+    runIndex := info at:1.
+    runOffset := info at:2.
+
+    howBig == (runOffset - 1) ifTrue:[
+        "/ we are lucky - new size is up-to previous run
+
+        runs := runs copyFrom:1 to:runIndex-1.
+    ] ifFalse:[
+        runs := runs copyFrom:1 to:(runIndex+1).
+        runs at:runIndex put:(howBig - runOffset + 1)
+    ].
+
+    "
+     |c|
+
+     c := RunArray new.
+     c addAll:#(1 2 3 4 4 4 4 5 5 5 1 2 3); yourself.
+
+     c grow:50; yourself.
+
+     c grow:7; yourself.
+
+     c grow:6; yourself.
+
+     c grow:0; yourself.
+    "
+
+    "Modified: 10.5.1996 / 18:11:54 / cg"
+! !
+
+!RunArray methodsFor:'converting'!
+
+asOrderedCollection
+    "Uncompress this collection."
+
+    |newCollection|
+
+    newCollection := OrderedCollection new.
+    newCollection addAll:self.
+    ^ newCollection
+
+    "Modified: 10.5.1996 / 13:31:49 / cg"
+! !
+
+!RunArray methodsFor:'enumerating'!
+
+do:aBlock 
+    "Evaluate aBlock with each of the receiver's elements as the 
+    argument. "
+
+    runs notNil ifTrue:[
+        runs pairWiseDo:[:len :val | 
+            len timesRepeat: [aBlock value:val]
+        ]
+    ]
+
+    "Modified: 10.5.1996 / 16:56:01 / cg"
+! !
+
+!RunArray methodsFor:'printing'!
+
+storeOn:aStream 
+    "Append to aStream an expression which, if evaluated, will generate   
+    an object similar to the receiver."
+
+    aStream nextPutAll: '(RunArray new'.
+    runs notNil ifTrue:[
+        runs pairWiseDo:[:len :val | 
+            aStream nextPutAll: ' add:'; nextPutAll:val storeString. 
+            len == 1 ifFalse:[
+                aStream nextPutAll:' withCount:'; nextPutAll:len printString.
+            ].
+            aStream nextPutAll:';'
+        ].
+        aStream nextPutAll:' yourself'
+    ].
+    aStream nextPutAll:')'
+
+    "
+     (RunArray new 
+        add:1; 
+        add:1; 
+        add:2; 
+        add:3; 
+        add:4 withCount:100; 
+        add:5; 
+        yourself) storeString
+
+     RunArray new storeString 
+    "
+! !
+
+!RunArray methodsFor:'private'!
+
+find:oldObject 
+    "If I contain oldObject return its index, otherwise create an error   
+    notifier. It will answer with the position of the very first element of  
+    that value."
+
+    |position|
+
+    position := self find:oldObject ifAbsent:0.
+    position ~~ 0 ifTrue:[
+	^ position
+    ].
+
+    self errorValueNotFound:oldObject
+
+    "
+     |c|
+
+     c := RunArray new.
+     c add:1; add:1; add:1; add:2; add:2; add:3; add:3; add:4; add:5.
+     c find:2.
+
+     c find:99
+    "
+
+    "Modified: 10.5.1996 / 13:39:58 / cg"
+!
+
+find:oldObject ifAbsent:exceptionBlock 
+    "If I contain oldObject return its index, otherwise execute the 
+     exception block. It will answer with the position of the very first 
+     element of that value."
+
+    |position|
+
+    position := 1.
+    runs notNil ifTrue:[
+        runs pairWiseDo:[:len :val | 
+            val = oldObject ifTrue:[
+                ^ position
+            ].
+            position := position + len.
+        ].
+    ].
+    ^ exceptionBlock value
+
+    "Modified: 10.5.1996 / 13:54:25 / cg"
+!
+
+isEmpty
+    "Am I empty or not. Returns a boolean"
+
+    ^ runs notNil
+!
+
+runIndexAndStartIndexForIndex:anIndex
+    "given a logical index, find the index of that run and the logical index
+     of the first item in that run."
+
+    |position nRuns "{ Class: SmallInteger }"|
+
+    position := 1.
+    nRuns := runs size.
+    1 to:nRuns by:2 do:[:runIndex | 
+        |runLen runLast|
+
+        runLen := runs at:runIndex.
+        anIndex >= position ifTrue:[
+            runLast := position + runLen - 1.
+            anIndex <= runLast ifTrue:[
+                ^ Array with:runIndex with:position 
+            ].
+        ].
+        position := position + runLen
+    ].
+
+    ^ #(0 0)
+
+    "Created: 10.5.1996 / 17:12:28 / cg"
+    "Modified: 10.5.1996 / 17:13:42 / cg"
+!
+
+runIndexForIndex:anIndex
+    "given a logical index, find the index of that run"
+
+    ^ (self runIndexAndStartIndexForIndex:anIndex) at:1
+
+    "Modified: 10.5.1996 / 17:13:45 / cg"
+! !
+
+!RunArray methodsFor:'user interface'!
+
+inspect
+    "Reimplement so that they don't get an ordered collection inspector 
+     which would get very confused."
+
+    self basicInspect
+
+    "Modified: 10.5.1996 / 18:24:43 / cg"
+! !
+
+!RunArray class methodsFor:'documentation'!
+
+version
+    ^ '$Header: /cvs/stx/stx/libbasic2/RunArray.st,v 1.1 1996-05-11 10:47:55 cg Exp $'
+! !