SequenceableCollection.st
changeset 14100 f9daa751b59a
parent 14099 e7938e9908f3
child 14102 93c008b8e304
--- a/SequenceableCollection.st	Wed Apr 11 22:01:30 2012 +0200
+++ b/SequenceableCollection.st	Thu Apr 12 21:14:06 2012 +0200
@@ -5478,50 +5478,111 @@
 
 !SequenceableCollection methodsFor:'private-sorting helpers'!
 
-mergeFirst: first middle: middle last: last into: dst by: aBlock
+mergeFirst:first middle:middle last:last into:dst by:aBlock 
     "Private!!
      Merge the sorted ranges [first..middle] and [middle+1..last] of the receiver into the range [first..last] of dst."
-
-    | i1 i2 val1 val2 out |
+    
+    |i1 i2 val1 val2 out|
 
     i1 := first.
+    out := first.
     i2 := middle + 1.
-    val1 := self at: i1.
-    val2 := self at: i2.
-    out := first - 1.  "will be pre-incremented"
-
-    "select 'lower' half of the elements based on comparator"
-    [(i1 <= middle) and: [i2 <= last]] whileTrue: [
-	    (aBlock value: val1 value: val2)
-		    ifTrue: [
-			    dst at: (out := out + 1) put: val1.
-			    val1 := self at: (i1 := i1 + 1)]
-		    ifFalse: [
-			    dst at: (out := out + 1) put: val2.
-			    i2 := i2 + 1.
-			    i2 <= last ifTrue: [val2 := self at: i2]]].
+    val1 := self at:i1.
+
+    i2 <= last ifTrue:[
+        val2 := self at:i2.
+        "select 'lower' half of the elements based on comparator"
+        [(i1 <= middle) and:[i2 <= last]] whileTrue:[
+            "this is stable if #< or #> ist used for comparison (and not #<= or #>=)"
+            (aBlock value:val2 value:val1) ifTrue:[
+                dst at:out put:val2.
+                i2 := i2 + 1.
+                i2 <= last ifTrue:[
+                    val2 := self at:i2
+                ]
+            ] ifFalse:[
+                dst at:out put:val1.
+                i1 := i1 + 1.
+                val1 := self at:i1.
+            ].
+            out := out + 1.
+        ].
+    ].
 
     "copy the remaining elements"
-    i1 <= middle
-	    ifTrue: [
-		    dst replaceFrom: out + 1 to: last with: self startingAt: i1]
-	    ifFalse: [
-		    dst replaceFrom: out + 1 to: last with: self startingAt: i2].
-
-!
-
-mergeSortFrom: first to: last src: src dst: dst by: aBlock
+    i1 <= middle ifTrue:[
+        dst 
+            replaceFrom:out
+            to:last
+            with:self
+            startingAt:i1
+    ] ifFalse:[
+        dst 
+            replaceFrom:out
+            to:last
+            with:self
+            startingAt:i2
+    ].
+!
+
+mergeSortFrom: first to: last by: aBlock
     "Private!! Split the range to be sorted in half, sort each half, and merge the two half-ranges into dst."
 
-    | middle |
-
-    first = last ifTrue: [^ self].
-    middle := (first + last) // 2.
-    self mergeSortFrom: first to: middle src: dst dst: src by: aBlock.
-    self mergeSortFrom: middle + 1 to: last src: dst dst: src by: aBlock.
-    src mergeFirst: first middle: middle last: last into: dst by: aBlock.
-
-
+    |size chunkSize idx nextIdx finished temp src dst|
+
+    size := last-first + 1.
+    size <= 1 ifTrue:[^ self].
+
+    "size of the base chunks"
+    chunkSize := 12.
+    idx := first.
+    finished := false.
+    [finished] whileFalse:[
+        nextIdx := idx + chunkSize.
+        nextIdx > last ifTrue:[
+            nextIdx := last+1.
+            finished := true.
+        ].
+        self insertionSort:aBlock from:idx to:nextIdx-1.
+        idx := nextIdx.
+    ].
+
+    chunkSize < size ifTrue:[
+        "now merge the chunks"
+        dst := Array new:self size.
+        src := self.
+
+        [chunkSize < size] whileTrue:[
+            "merge pairs of adjecant chunks"
+            idx := first.
+            finished := false.
+            [finished] whileFalse:[
+                nextIdx := idx + (2*chunkSize).
+                nextIdx > last ifTrue:[
+                    nextIdx := last+1.
+                    finished := true.
+                ].
+                src
+                    mergeFirst:idx 
+                    middle:idx+chunkSize-1 
+                    last:nextIdx-1 
+                    into:dst
+                    by:aBlock.
+                idx := nextIdx.
+            ].
+
+            chunkSize := chunkSize * 2.
+
+            "merged chunks are in dst. Swap source and destination for next step"
+            temp := dst.
+            dst := src.
+            src := temp.
+        ].
+        "note, the latest dst is now src (see above)"
+        src ~~ self ifTrue:[
+            self replaceFrom:first to:last with:src startingAt:first.
+        ].
+    ].
 !
 
 quickSortFrom:inBegin to:inEnd
@@ -5559,7 +5620,7 @@
         end := stack removeLast.
         begin := stack removeLast.
 
-        end - begin < 12 ifTrue:[
+        end - begin <= 12 ifTrue:[
             "for small number of elements do an insertion sort, which is faster"
             bRun := begin + 1.
             bRun to:end do:[:idx|
@@ -5575,7 +5636,7 @@
             ].
         ] ifFalse:[
            depthLimit <= 0 ifTrue:[
-               "this is apparently a degenerated quickSort - fall back to mergeSort with has O(log n)"
+               "this is apparently a degenerated quickSort - fall back to mergeSort with has O(n * log n)"
                ^ self mergeSort:[:a :b | a < b] from:inBegin to:inEnd.
            ].
            elB := self at:begin.
@@ -5673,7 +5734,7 @@
             ].
         ] ifFalse:[
            depthLimit <= 0 ifTrue:[
-               "this is apparently a degenerated quickSort - fall back to mergeSort with has O(log n)"
+               "this is apparently a degenerated quickSort - fall back to mergeSort with has O(n * log n)"
                ^ self mergeSort:sortBlock from:inBegin to:inEnd.
            ].
            elB := self at:begin.
@@ -5769,7 +5830,7 @@
         ] ifFalse:[
 "/ mergeSort does not handle policy yet
 "/           depthLimit <= 0 ifTrue:[
-"/               "this is apparently a degenerated quickSort - fall back to mergeSort with has O(log n)"
+"/               "this is apparently a degenerated quickSort - fall back to mergeSort with has O(n * log n)"
 "/               ^ self mergeSort:sortBlock from:inBegin to:inEnd.
 "/           ].
            elB := self at:begin.
@@ -7547,9 +7608,78 @@
     "
 !
 
+insertionSort
+    "sort the collection using a insertionSort algorithm.
+     The elements are compared using'#<'
+     i.e. they should offer a magnitude-like protocol.
+
+     Insertion sort sort is a stable sorting algorithm, i.e. elements with the same sort key
+     keep their order (if you use e.g. #<) for comparison.
+
+     The implementation uses the insertionSort algorithm, 
+     which is slow for large collections O(n*n), but good for small or
+     almost sorted collections O(N).
+
+     See also #quickSort for other sort algorithms
+     with different worst- and average case behavior)"
+
+    self insertionSort:[:a :b | a < b]
+!
+
+insertionSort:sortBlock
+    |stop|
+
+    stop := self size.
+    (stop > 1) ifTrue:[
+        self insertionSort:sortBlock from:1 to:stop
+    ].
+!
+
+insertionSort:sortBlock from:inBegin to:inEnd
+    "binary insertion sort.
+     The implementation uses the insertionSort algorithm, 
+     which is slow for large collections O(n*n), but good for small or
+     almost sorted collections O(N)."
+
+    |begin      "{Class: SmallInteger}"
+     end        "{Class: SmallInteger}"
+     prevIdx    "{Class: SmallInteger}"
+     temp|
+
+    begin := inBegin+1.
+    end := inEnd.
+    begin to:end do:[:idx|
+        temp := self at:idx.
+        prevIdx := idx-1.
+        "this is stable if #< or #> ist used for comparison (and not #<= or #>=)"
+        [prevIdx >= inBegin and:[sortBlock value:temp value:(self at:prevIdx)]] whileTrue:[
+            self at:prevIdx+1 put:(self at:prevIdx).
+            prevIdx := prevIdx - 1.
+        ].
+        (prevIdx+1) ~~ idx ifTrue:[
+            self at:prevIdx+1 put:temp.
+        ].
+    ].
+
+    "
+     |data|
+     data := Random new next:1000.
+     Transcript show:'merge random  '; showCR:(Time millisecondsToRun:[data mergeSort]).
+     Transcript show:'merge sorted  '; showCR:(Time millisecondsToRun:[data mergeSort]).
+     data reverse.
+     Transcript show:'merge reverse '; showCR:(Time millisecondsToRun:[data mergeSort]).
+
+     data := Random new next:1000.
+     Transcript show:'insert random  '; showCR:(Time millisecondsToRun:[data insertionSort]).
+     Transcript show:'insert sorted  '; showCR:(Time millisecondsToRun:[data insertionSort]).
+     data reverse.
+     Transcript show:'insert reverse '; showCR:(Time millisecondsToRun:[data insertionSort]).
+    "
+!
+
 mergeSort
     "sort the collection using a mergeSort algorithm.
-     The elements are compared using'<'
+     The elements are compared using '#<'
      i.e. they should offer a magnitude-like protocol.
 
      Merge sort is a stable sorting algorithm, i.e. elements with the same sort key
@@ -7577,6 +7707,12 @@
      Transcript show:'merge reverse '; showCR:(Time millisecondsToRun:[data mergeSort]).
 
      data := random copy.
+     Transcript show:'quick block random  '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a < b]]).
+     Transcript show:'quick block sorted  '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a < b]]).
+     data := data reverse.
+     Transcript show:'quick block reverse '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a < b]]).
+
+     data := random copy.
      Transcript show:'quick random  '; showCR:(Time millisecondsToRun:[data quickSort]).
      Transcript show:'quick sorted  '; showCR:(Time millisecondsToRun:[data quickSort]).
      data := data reverse.
@@ -7586,11 +7722,11 @@
 
 mergeSort:sortBlock
     "sort the collection using a mergeSort algorithm.
-     The elements are compared using'<'
+     The elements are compared using sortBlock
      i.e. they should offer a magnitude-like protocol.
 
      Merge sort is a stable sorting algorithm, i.e. elements with the same sort key
-     keep their order.
+     keep their order (if you use e.g. #< for comparison).
 
      The implementation uses the mergesort algorithm, which may not be
      the best possible for all situations
@@ -7602,39 +7738,34 @@
     stop := self size.
     (stop > 1) ifTrue:[
         self mergeSort:sortBlock from:1 to:stop
-    ]
-
-    "
-     #(1 16 7 98 3 19 4 0) mergeSort
-
-     |data|
-     data := Random new next:200000.
-     Transcript show:'merge random  '; showCR:(Time millisecondsToRun:[data mergeSort]).
-     Transcript show:'merge sorted  '; showCR:(Time millisecondsToRun:[data mergeSort]).
-     data reverse.
-     Transcript show:'merge reverse '; showCR:(Time millisecondsToRun:[data mergeSort]).
-
-     data := Random new next:200000.
-     Transcript show:'quick random  '; showCR:(Time millisecondsToRun:[data sort]).
-     Transcript show:'quick sorted  '; showCR:(Time millisecondsToRun:[data sort]).
-     data reverse.
-     Transcript show:'quick reverse '; showCR:(Time millisecondsToRun:[data sort]).
-
-     data := Random new next:200000.
-     Transcript show:'quickr random  '; showCR:(Time millisecondsToRun:[data randomizedSort]).
-     Transcript show:'quickr sorted  '; showCR:(Time millisecondsToRun:[data randomizedSort]).
-     data reverse.
-     Transcript show:'quickr reverse '; showCR:(Time millisecondsToRun:[data randomizedSort]).
-    "
-!
-
-mergeSort:aBlock from: startIndex to: stopIndex
+    ].
+
+    "
+     |random data|
+
+     random := Random new next:500000.
+
+     data := random copy.
+     Transcript show:'merge block random  '; showCR:(Time millisecondsToRun:[data mergeSort:[:a :b| a <= b]]).
+     Transcript show:'merge block sorted  '; showCR:(Time millisecondsToRun:[data mergeSort:[:a :b| a <= b]]).
+     data := data reverse.
+     Transcript show:'merge block reverse '; showCR:(Time millisecondsToRun:[data mergeSort:[:a :b| a <= b]]).
+
+     data := random copy.
+     Transcript show:'quick block random  '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a <= b]]).
+     Transcript show:'quick block sorted  '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a <= b]]).
+     data := data reverse.
+     Transcript show:'quick block reverse '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a <= b]]).
+   "
+!
+
+mergeSort:aBlock from:startIndex to:stopIndex 
     "Sort the given range of indices using the mergesort algorithm.
      Mergesort is a worst-case O(N log N) sorting algorithm that usually does only half
      as many comparisons as heapsort or quicksort.
 
      Merge sort is a stable sorting algorithm, i.e. elements with the same sort key
-     keep their order."
+     keep their order (if you use e.g. #< or #> for comparison)."
 
     "Details: recursively split the range to be sorted into two halves,
      mergesort each half, then merge the two halves together.
@@ -7642,17 +7773,26 @@
      and forth between the receiver and this copy.
      The recursion is set up so that the final merge is performed into the receiver,
      resulting in the receiver being completely sorted."
-
-    | temp |
-
-    self size <= 1 ifTrue: [^ self].  "nothing to do"
-    startIndex = stopIndex ifTrue: [^ self].
-    (startIndex >= 1 and: [startIndex < stopIndex])
-            ifFalse: [self error: 'bad start index'].
-    stopIndex <= self size
-            ifFalse: [self error: 'bad stop index'].
-    temp := self clone.
-    self mergeSortFrom: startIndex to: stopIndex src: temp dst: self by: aBlock.
+    
+    |mySize|
+
+    mySize := self size.
+    mySize <= 1 ifTrue:[
+        ^ self
+    ].
+    startIndex = stopIndex ifTrue:[
+        ^ self
+    ].
+    (startIndex >= 1 and:[ startIndex < stopIndex ]) ifFalse:[
+        self error:'bad start index'
+    ].
+    stopIndex > mySize ifTrue:[
+        self error:'bad stop index'
+    ].
+    self
+        mergeSortFrom:startIndex
+        to:stopIndex
+        by:aBlock.
 !
 
 quickSort
@@ -8094,6 +8234,50 @@
     "Modified: / 22-10-2008 / 21:25:07 / cg"
 !
 
+stableSort
+    "sort the collection inplace. The elements are compared using
+     '<=' i.e. they should offer a magnitude-like protocol.
+
+     Use a stable sort algorithm - i.e. elements having an equal key will keep
+     their previous order."
+
+    self mergeSort
+
+    "
+     #(1 16 7 98 3 19 4 0) stableSort
+
+     |data|
+     data := Random new next:100000.
+     Transcript show:'sort random  '; showCR:(Time millisecondsToRun:[data stableSort]).
+     Transcript show:'sort sorted  '; showCR:(Time millisecondsToRun:[data stableSort]).
+     data reverse.
+     Transcript show:'sort reverse '; showCR:(Time millisecondsToRun:[data stableSort]).
+    "
+!
+
+stableSort:sortBlock
+    "sort the collection inplace using the 2-arg block sortBlock
+     for comparison. This allows any sort criteria to be implemented.
+
+     Use a stable sort algorithm - i.e. elements having an equal key will keep
+     their previous order.
+
+     NOTE: the sort algorithm will be stable, if the sortblock uses #< or #> for comparison!!
+           Do not use #<= of #>= if you want stable behavior."
+
+    self mergeSort:sortBlock
+
+    "
+     The 4@bla points keep their order:
+         {(4@1). (8@2). (4@2). (3@3). (4@3). (-1@4). (17@17). (19@19).
+          (12 @ 12). (13@13). (14@14). (15@15). (10@10). (8@8).} stableSort:[:a :b | a x < b x]
+        
+     But not with quickSort:
+         {(4@1). (8@2). (4@2). (3@3). (4@3). (-1@4). (17@17). (19@19).
+          (12 @ 12). (13@13). (14@14). (15@15). (10@10). (8@8).} sort:[:a :b | a x < b x]
+    "
+!
+
 topologicalSort:sortBlock
     "sort the collection inplace using a sloooow sort algorithm.
      This algorithm has O-square runtime behavior and should be used only
@@ -8185,7 +8369,7 @@
 !SequenceableCollection class methodsFor:'documentation'!
 
 version_CVS
-    ^ '$Header: /cvs/stx/stx/libbasic/SequenceableCollection.st,v 1.315 2012-04-11 20:01:30 stefan Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/SequenceableCollection.st,v 1.316 2012-04-12 19:14:06 stefan Exp $'
 ! !
 
 SequenceableCollection initialize!