diff -r e7938e9908f3 -r f9daa751b59a SequenceableCollection.st --- a/SequenceableCollection.st Wed Apr 11 22:01:30 2012 +0200 +++ b/SequenceableCollection.st Thu Apr 12 21:14:06 2012 +0200 @@ -5478,50 +5478,111 @@ !SequenceableCollection methodsFor:'private-sorting helpers'! -mergeFirst: first middle: middle last: last into: dst by: aBlock +mergeFirst:first middle:middle last:last into:dst by:aBlock "Private!! Merge the sorted ranges [first..middle] and [middle+1..last] of the receiver into the range [first..last] of dst." - - | i1 i2 val1 val2 out | + + |i1 i2 val1 val2 out| i1 := first. + out := first. i2 := middle + 1. - val1 := self at: i1. - val2 := self at: i2. - out := first - 1. "will be pre-incremented" - - "select 'lower' half of the elements based on comparator" - [(i1 <= middle) and: [i2 <= last]] whileTrue: [ - (aBlock value: val1 value: val2) - ifTrue: [ - dst at: (out := out + 1) put: val1. - val1 := self at: (i1 := i1 + 1)] - ifFalse: [ - dst at: (out := out + 1) put: val2. - i2 := i2 + 1. - i2 <= last ifTrue: [val2 := self at: i2]]]. + val1 := self at:i1. + + i2 <= last ifTrue:[ + val2 := self at:i2. + "select 'lower' half of the elements based on comparator" + [(i1 <= middle) and:[i2 <= last]] whileTrue:[ + "this is stable if #< or #> ist used for comparison (and not #<= or #>=)" + (aBlock value:val2 value:val1) ifTrue:[ + dst at:out put:val2. + i2 := i2 + 1. + i2 <= last ifTrue:[ + val2 := self at:i2 + ] + ] ifFalse:[ + dst at:out put:val1. + i1 := i1 + 1. + val1 := self at:i1. + ]. + out := out + 1. + ]. + ]. "copy the remaining elements" - i1 <= middle - ifTrue: [ - dst replaceFrom: out + 1 to: last with: self startingAt: i1] - ifFalse: [ - dst replaceFrom: out + 1 to: last with: self startingAt: i2]. - -! - -mergeSortFrom: first to: last src: src dst: dst by: aBlock + i1 <= middle ifTrue:[ + dst + replaceFrom:out + to:last + with:self + startingAt:i1 + ] ifFalse:[ + dst + replaceFrom:out + to:last + with:self + startingAt:i2 + ]. +! + +mergeSortFrom: first to: last by: aBlock "Private!! Split the range to be sorted in half, sort each half, and merge the two half-ranges into dst." - | middle | - - first = last ifTrue: [^ self]. - middle := (first + last) // 2. - self mergeSortFrom: first to: middle src: dst dst: src by: aBlock. - self mergeSortFrom: middle + 1 to: last src: dst dst: src by: aBlock. - src mergeFirst: first middle: middle last: last into: dst by: aBlock. - - + |size chunkSize idx nextIdx finished temp src dst| + + size := last-first + 1. + size <= 1 ifTrue:[^ self]. + + "size of the base chunks" + chunkSize := 12. + idx := first. + finished := false. + [finished] whileFalse:[ + nextIdx := idx + chunkSize. + nextIdx > last ifTrue:[ + nextIdx := last+1. + finished := true. + ]. + self insertionSort:aBlock from:idx to:nextIdx-1. + idx := nextIdx. + ]. + + chunkSize < size ifTrue:[ + "now merge the chunks" + dst := Array new:self size. + src := self. + + [chunkSize < size] whileTrue:[ + "merge pairs of adjecant chunks" + idx := first. + finished := false. + [finished] whileFalse:[ + nextIdx := idx + (2*chunkSize). + nextIdx > last ifTrue:[ + nextIdx := last+1. + finished := true. + ]. + src + mergeFirst:idx + middle:idx+chunkSize-1 + last:nextIdx-1 + into:dst + by:aBlock. + idx := nextIdx. + ]. + + chunkSize := chunkSize * 2. + + "merged chunks are in dst. Swap source and destination for next step" + temp := dst. + dst := src. + src := temp. + ]. + "note, the latest dst is now src (see above)" + src ~~ self ifTrue:[ + self replaceFrom:first to:last with:src startingAt:first. + ]. + ]. ! quickSortFrom:inBegin to:inEnd @@ -5559,7 +5620,7 @@ end := stack removeLast. begin := stack removeLast. - end - begin < 12 ifTrue:[ + end - begin <= 12 ifTrue:[ "for small number of elements do an insertion sort, which is faster" bRun := begin + 1. bRun to:end do:[:idx| @@ -5575,7 +5636,7 @@ ]. ] ifFalse:[ depthLimit <= 0 ifTrue:[ - "this is apparently a degenerated quickSort - fall back to mergeSort with has O(log n)" + "this is apparently a degenerated quickSort - fall back to mergeSort with has O(n * log n)" ^ self mergeSort:[:a :b | a < b] from:inBegin to:inEnd. ]. elB := self at:begin. @@ -5673,7 +5734,7 @@ ]. ] ifFalse:[ depthLimit <= 0 ifTrue:[ - "this is apparently a degenerated quickSort - fall back to mergeSort with has O(log n)" + "this is apparently a degenerated quickSort - fall back to mergeSort with has O(n * log n)" ^ self mergeSort:sortBlock from:inBegin to:inEnd. ]. elB := self at:begin. @@ -5769,7 +5830,7 @@ ] ifFalse:[ "/ mergeSort does not handle policy yet "/ depthLimit <= 0 ifTrue:[ -"/ "this is apparently a degenerated quickSort - fall back to mergeSort with has O(log n)" +"/ "this is apparently a degenerated quickSort - fall back to mergeSort with has O(n * log n)" "/ ^ self mergeSort:sortBlock from:inBegin to:inEnd. "/ ]. elB := self at:begin. @@ -7547,9 +7608,78 @@ " ! +insertionSort + "sort the collection using a insertionSort algorithm. + The elements are compared using'#<' + i.e. they should offer a magnitude-like protocol. + + Insertion sort sort is a stable sorting algorithm, i.e. elements with the same sort key + keep their order (if you use e.g. #<) for comparison. + + The implementation uses the insertionSort algorithm, + which is slow for large collections O(n*n), but good for small or + almost sorted collections O(N). + + See also #quickSort for other sort algorithms + with different worst- and average case behavior)" + + self insertionSort:[:a :b | a < b] +! + +insertionSort:sortBlock + |stop| + + stop := self size. + (stop > 1) ifTrue:[ + self insertionSort:sortBlock from:1 to:stop + ]. +! + +insertionSort:sortBlock from:inBegin to:inEnd + "binary insertion sort. + The implementation uses the insertionSort algorithm, + which is slow for large collections O(n*n), but good for small or + almost sorted collections O(N)." + + |begin "{Class: SmallInteger}" + end "{Class: SmallInteger}" + prevIdx "{Class: SmallInteger}" + temp| + + begin := inBegin+1. + end := inEnd. + begin to:end do:[:idx| + temp := self at:idx. + prevIdx := idx-1. + "this is stable if #< or #> ist used for comparison (and not #<= or #>=)" + [prevIdx >= inBegin and:[sortBlock value:temp value:(self at:prevIdx)]] whileTrue:[ + self at:prevIdx+1 put:(self at:prevIdx). + prevIdx := prevIdx - 1. + ]. + (prevIdx+1) ~~ idx ifTrue:[ + self at:prevIdx+1 put:temp. + ]. + ]. + + " + |data| + data := Random new next:1000. + Transcript show:'merge random '; showCR:(Time millisecondsToRun:[data mergeSort]). + Transcript show:'merge sorted '; showCR:(Time millisecondsToRun:[data mergeSort]). + data reverse. + Transcript show:'merge reverse '; showCR:(Time millisecondsToRun:[data mergeSort]). + + data := Random new next:1000. + Transcript show:'insert random '; showCR:(Time millisecondsToRun:[data insertionSort]). + Transcript show:'insert sorted '; showCR:(Time millisecondsToRun:[data insertionSort]). + data reverse. + Transcript show:'insert reverse '; showCR:(Time millisecondsToRun:[data insertionSort]). + " +! + mergeSort "sort the collection using a mergeSort algorithm. - The elements are compared using'<' + The elements are compared using '#<' i.e. they should offer a magnitude-like protocol. Merge sort is a stable sorting algorithm, i.e. elements with the same sort key @@ -7577,6 +7707,12 @@ Transcript show:'merge reverse '; showCR:(Time millisecondsToRun:[data mergeSort]). data := random copy. + Transcript show:'quick block random '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a < b]]). + Transcript show:'quick block sorted '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a < b]]). + data := data reverse. + Transcript show:'quick block reverse '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a < b]]). + + data := random copy. Transcript show:'quick random '; showCR:(Time millisecondsToRun:[data quickSort]). Transcript show:'quick sorted '; showCR:(Time millisecondsToRun:[data quickSort]). data := data reverse. @@ -7586,11 +7722,11 @@ mergeSort:sortBlock "sort the collection using a mergeSort algorithm. - The elements are compared using'<' + The elements are compared using sortBlock i.e. they should offer a magnitude-like protocol. Merge sort is a stable sorting algorithm, i.e. elements with the same sort key - keep their order. + keep their order (if you use e.g. #< for comparison). The implementation uses the mergesort algorithm, which may not be the best possible for all situations @@ -7602,39 +7738,34 @@ stop := self size. (stop > 1) ifTrue:[ self mergeSort:sortBlock from:1 to:stop - ] - - " - #(1 16 7 98 3 19 4 0) mergeSort - - |data| - data := Random new next:200000. - Transcript show:'merge random '; showCR:(Time millisecondsToRun:[data mergeSort]). - Transcript show:'merge sorted '; showCR:(Time millisecondsToRun:[data mergeSort]). - data reverse. - Transcript show:'merge reverse '; showCR:(Time millisecondsToRun:[data mergeSort]). - - data := Random new next:200000. - Transcript show:'quick random '; showCR:(Time millisecondsToRun:[data sort]). - Transcript show:'quick sorted '; showCR:(Time millisecondsToRun:[data sort]). - data reverse. - Transcript show:'quick reverse '; showCR:(Time millisecondsToRun:[data sort]). - - data := Random new next:200000. - Transcript show:'quickr random '; showCR:(Time millisecondsToRun:[data randomizedSort]). - Transcript show:'quickr sorted '; showCR:(Time millisecondsToRun:[data randomizedSort]). - data reverse. - Transcript show:'quickr reverse '; showCR:(Time millisecondsToRun:[data randomizedSort]). - " -! - -mergeSort:aBlock from: startIndex to: stopIndex + ]. + + " + |random data| + + random := Random new next:500000. + + data := random copy. + Transcript show:'merge block random '; showCR:(Time millisecondsToRun:[data mergeSort:[:a :b| a <= b]]). + Transcript show:'merge block sorted '; showCR:(Time millisecondsToRun:[data mergeSort:[:a :b| a <= b]]). + data := data reverse. + Transcript show:'merge block reverse '; showCR:(Time millisecondsToRun:[data mergeSort:[:a :b| a <= b]]). + + data := random copy. + Transcript show:'quick block random '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a <= b]]). + Transcript show:'quick block sorted '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a <= b]]). + data := data reverse. + Transcript show:'quick block reverse '; showCR:(Time millisecondsToRun:[data quickSort:[:a :b| a <= b]]). + " +! + +mergeSort:aBlock from:startIndex to:stopIndex "Sort the given range of indices using the mergesort algorithm. Mergesort is a worst-case O(N log N) sorting algorithm that usually does only half as many comparisons as heapsort or quicksort. Merge sort is a stable sorting algorithm, i.e. elements with the same sort key - keep their order." + keep their order (if you use e.g. #< or #> for comparison)." "Details: recursively split the range to be sorted into two halves, mergesort each half, then merge the two halves together. @@ -7642,17 +7773,26 @@ and forth between the receiver and this copy. The recursion is set up so that the final merge is performed into the receiver, resulting in the receiver being completely sorted." - - | temp | - - self size <= 1 ifTrue: [^ self]. "nothing to do" - startIndex = stopIndex ifTrue: [^ self]. - (startIndex >= 1 and: [startIndex < stopIndex]) - ifFalse: [self error: 'bad start index']. - stopIndex <= self size - ifFalse: [self error: 'bad stop index']. - temp := self clone. - self mergeSortFrom: startIndex to: stopIndex src: temp dst: self by: aBlock. + + |mySize| + + mySize := self size. + mySize <= 1 ifTrue:[ + ^ self + ]. + startIndex = stopIndex ifTrue:[ + ^ self + ]. + (startIndex >= 1 and:[ startIndex < stopIndex ]) ifFalse:[ + self error:'bad start index' + ]. + stopIndex > mySize ifTrue:[ + self error:'bad stop index' + ]. + self + mergeSortFrom:startIndex + to:stopIndex + by:aBlock. ! quickSort @@ -8094,6 +8234,50 @@ "Modified: / 22-10-2008 / 21:25:07 / cg" ! +stableSort + "sort the collection inplace. The elements are compared using + '<=' i.e. they should offer a magnitude-like protocol. + + Use a stable sort algorithm - i.e. elements having an equal key will keep + their previous order." + + self mergeSort + + " + #(1 16 7 98 3 19 4 0) stableSort + + |data| + data := Random new next:100000. + Transcript show:'sort random '; showCR:(Time millisecondsToRun:[data stableSort]). + Transcript show:'sort sorted '; showCR:(Time millisecondsToRun:[data stableSort]). + data reverse. + Transcript show:'sort reverse '; showCR:(Time millisecondsToRun:[data stableSort]). + " +! + +stableSort:sortBlock + "sort the collection inplace using the 2-arg block sortBlock + for comparison. This allows any sort criteria to be implemented. + + Use a stable sort algorithm - i.e. elements having an equal key will keep + their previous order. + + NOTE: the sort algorithm will be stable, if the sortblock uses #< or #> for comparison!! + Do not use #<= of #>= if you want stable behavior." + + self mergeSort:sortBlock + + " + The 4@bla points keep their order: + {(4@1). (8@2). (4@2). (3@3). (4@3). (-1@4). (17@17). (19@19). + (12 @ 12). (13@13). (14@14). (15@15). (10@10). (8@8).} stableSort:[:a :b | a x < b x] + + But not with quickSort: + {(4@1). (8@2). (4@2). (3@3). (4@3). (-1@4). (17@17). (19@19). + (12 @ 12). (13@13). (14@14). (15@15). (10@10). (8@8).} sort:[:a :b | a x < b x] + " +! + topologicalSort:sortBlock "sort the collection inplace using a sloooow sort algorithm. This algorithm has O-square runtime behavior and should be used only @@ -8185,7 +8369,7 @@ !SequenceableCollection class methodsFor:'documentation'! version_CVS - ^ '$Header: /cvs/stx/stx/libbasic/SequenceableCollection.st,v 1.315 2012-04-11 20:01:30 stefan Exp $' + ^ '$Header: /cvs/stx/stx/libbasic/SequenceableCollection.st,v 1.316 2012-04-12 19:14:06 stefan Exp $' ! ! SequenceableCollection initialize!