CharacterArray.st
changeset 11861 99bf865f7b78
parent 11839 84dd112e1080
child 11865 59d0cc49b944
--- a/CharacterArray.st	Sun Aug 16 12:51:54 2009 +0200
+++ b/CharacterArray.st	Sun Aug 16 23:30:49 2009 +0200
@@ -1817,7 +1817,7 @@
 compareWith:aString
     "Compare the receiver with the argument and return 1 if the receiver is
      greater, 0 if equal and -1 if less than the argument.
-     This comparison is based on the elements ascii code -
+     This comparison is based on the elements' codepoints -
      i.e. upper/lowercase & national characters are NOT treated specially.
      'foo' compareWith: 'Foo' will return 1.
      while 'foo' sameAs:'Foo' will return true"
@@ -1832,10 +1832,10 @@
     n := mySize min:otherSize.
 
     1 to:n do:[:index |
-	c1 := self at:index.
-	c2 := aString at:index.
-	c1 > c2 ifTrue:[^ 1].
-	c1 < c2 ifTrue:[^ -1].
+        c1 := self at:index.
+        c2 := aString at:index.
+        c1 > c2 ifTrue:[^ 1].
+        c1 < c2 ifTrue:[^ -1].
     ].
     mySize > otherSize ifTrue:[^ 1].
     mySize < otherSize ifTrue:[^ -1].
@@ -1844,6 +1844,27 @@
     "Modified: 22.4.1996 / 15:56:07 / cg"
 !
 
+endsWith:aStringOrCharacter
+    "return true, if the receiver ends with something, aStringOrCharacter."
+
+    |s|
+
+    (s := self string) ~~ self ifTrue:[
+	^ s endsWith:aStringOrCharacter
+    ].
+    aStringOrCharacter isCharacter ifTrue:[
+	^ self last = aStringOrCharacter
+    ].
+    ^ super endsWith:aStringOrCharacter
+
+    "
+     'hello world' endsWith:'world'
+     'hello world' asText allBold endsWith:'world'
+    "
+
+    "Modified: 12.5.1996 / 15:49:18 / cg"
+!
+
 hammingDistanceTo:aString
     "return the hamming distance (the number of characters which are different).
      In information theory, the Hamming distance between two strings of equal length 
@@ -1900,6 +1921,73 @@
     "
 !
 
+levenshteinTo:aString
+    "return the levenshtein distance to the argument, aString;
+     this value corresponds to the number of replacements that have to be
+     made to get aString from the receiver.
+     See IEEE transactions on Computers 1976 Pg 172 ff."
+
+    "
+     in the following, we assume that ommiting a character
+     is less of an error than inserting an extra character.
+     Therefore the different insertion (i) and deletion (d) values.
+        s: substitution weight
+        k: keyboard weight (typing a nearby key) - or nil (then use s)
+        c: case weight                           - or nil (then use s)
+        e: exchange weight                       - or nil (then use s*2)
+        i: insertion of extra character weight
+        d: delete of a character weight
+    "
+
+    ^ StringUtilities
+            levenshteinDistanceFrom:self
+            to:aString 
+            s:4 k:4 c:4 e:nil i:2 d:6
+
+    "
+     'computer' levenshteinTo:'computer'    
+     'cOmputer' levenshteinTo:'computer'
+     'cOmpuTer' levenshteinTo:'computer'    
+     'cimputer' levenshteinTo:'computer'
+     'cumputer' levenshteinTo:'computer'
+
+     'cmputer' levenshteinTo:'computer'
+     'coomputer' levenshteinTo:'computer'
+
+     'ocmprt' levenshteinTo:'computer'
+     'computer' levenshteinTo:'computer'
+     'ocmputer' levenshteinTo:'computer'
+     'cmputer' levenshteinTo:'computer'
+     'computer' levenshteinTo:'cmputer'
+     'Computer' levenshteinTo:'computer'
+
+     'compiter' levenshteinTo:'computer'    
+     'compoter' levenshteinTo:'computer'    
+
+     'comptuer' levenshteinTo:'computer'    
+    "
+!
+
+levenshteinTo:aString s:substWeight k:kbdTypoWeight c:caseWeight i:insrtWeight d:deleteWeight
+    "parametrized levenshtein.
+     return the levenshtein distance to the argument, aString;
+     this value corrensponds to the number of replacements that have to be
+     made to get aString from the receiver.
+     The arguments are the costs for
+        s:substitution,
+        k:keyboard type (substitution),
+        c:case-change,
+        i:insertion
+        d:deletion
+     of a character.
+     See IEEE transactions on Computers 1976 Pg 172 ff"
+
+    ^ StringUtilities
+            levenshteinDistanceFrom:self
+            to:aString 
+            s:substWeight k:kbdTypoWeight c:caseWeight e:nil i:insrtWeight d:deleteWeight
+!
+
 sameAs:aString
     "Compare the receiver with the argument like =, but ignore case differences.
      Return true or false."
@@ -2027,6 +2115,81 @@
      'hello' sameStringAndEmphasisAs: 'fooba' asText allBold
      'hello' sameStringAndEmphasisAs: 'fooba' asText allItalic
     "
+!
+
+spellAgainst: aString
+    "return an integer between 0 and 100 indicating how similar
+     the argument is to the receiver.  No case conversion is done.
+     This algorithm is much simpler (but also less exact) than the
+     levenshtein distance. Experiment which is better for your
+     application."
+
+    | i1     "{ Class: SmallInteger }"
+      i2     "{ Class: SmallInteger }"
+      next1  "{ Class: SmallInteger }"
+      next2  "{ Class: SmallInteger }"
+      size1  "{ Class: SmallInteger }"
+      size2  "{ Class: SmallInteger }"
+      score  "{ Class: SmallInteger }"
+      maxLen "{ Class: SmallInteger }" |
+
+    size1 := self size.
+    size2 := aString size.
+    maxLen := size1 max:size2.
+    score := 0.
+    i1 := i2 := 1.
+    [i1 <= size1 and: [i2 <= size2]] whileTrue:[
+	next1 := i1 + 1.
+	next2 := i2 + 1.
+	(self at:i1) == (aString at:i2) ifTrue: [
+	    score := score+1.
+	    i1 := next1.
+	    i2 := next2
+	] ifFalse: [
+	    (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
+		i2 := next2
+	    ] ifFalse: [
+		(i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
+		    i1 := next1
+		] ifFalse: [
+		    i1 := next1.
+		    i2 := next2
+		]
+	    ]
+	]
+    ].
+
+    score == maxLen ifTrue: [^ 100].
+    ^ 100 * score // maxLen
+
+    "
+     'Smalltalk' spellAgainst: 'Smalltlak'
+     'Smalltalk' spellAgainst: 'smalltlak'
+     'Smalltalk' spellAgainst: 'smalltalk'
+     'Smalltalk' spellAgainst: 'smalltlk'
+     'Smalltalk' spellAgainst: 'Smalltolk'
+    "
+!
+
+startsWith:aString
+    "return true, if the receiver starts with something, aString.
+     If the argument is empty, true is returned."
+
+    |s|
+
+    (s := self string) ~~ self ifTrue:[
+	^ s startsWith:aString
+    ].
+    ^ super startsWith:aString
+
+    "
+     'hello world' startsWith:'hello'
+     'hello world' asText allBold startsWith:'hello'
+     'hello world' asText allBold startsWith:''
+    "
+
+    "Created: 12.5.1996 / 15:46:40 / cg"
+    "Modified: 12.5.1996 / 15:49:24 / cg"
 ! !
 
 !CharacterArray methodsFor:'converting'!
@@ -5160,27 +5323,6 @@
 
 !CharacterArray methodsFor:'testing'!
 
-endsWith:aStringOrCharacter
-    "return true, if the receiver ends with something, aStringOrCharacter."
-
-    |s|
-
-    (s := self string) ~~ self ifTrue:[
-	^ s endsWith:aStringOrCharacter
-    ].
-    aStringOrCharacter isCharacter ifTrue:[
-	^ self last = aStringOrCharacter
-    ].
-    ^ super endsWith:aStringOrCharacter
-
-    "
-     'hello world' endsWith:'world'
-     'hello world' asText allBold endsWith:'world'
-    "
-
-    "Modified: 12.5.1996 / 15:49:18 / cg"
-!
-
 isAlphaNumeric
     "return true, if the receiver is some alphanumeric word;
      i.e. consists of a letter followed by letters or digits."
@@ -5375,122 +5517,6 @@
     "
 !
 
-levenshteinTo:aString
-    "return the levenshtein distance to the argument, aString;
-     this value corresponds to the number of replacements that have to be
-     made to get aString from the receiver.
-     See IEEE transactions on Computers 1976 Pg 172 ff."
-
-    "
-     in the following, we assume that ommiting a character
-     is less of an error than inserting an extra character.
-     Therefore the different insertion (i) and deletion (d) values.
-     s: substitution weight
-     k: keyboard weight (typing a nearby key)
-     c: case weight
-     i: insertion of extra character weight
-     d: delete of a character weight
-    "
-
-    ^ self levenshteinTo:aString s:4 k:2 c:1 i:2 d:6
-
-    "
-     'computer' levenshteinTo:'computer'
-     'cOmputer' levenshteinTo:'computer'
-     'cOmpuTer' levenshteinTo:'computer'
-     'cimputer' levenshteinTo:'computer'
-     'cumputer' levenshteinTo:'computer'
-
-     'cmputer' levenshteinTo:'computer'
-     'coomputer' levenshteinTo:'computer'
-
-     'ocmprt' levenshteinTo:'computer'
-     'computer' levenshteinTo:'computer'
-     'ocmputer' levenshteinTo:'computer'
-     'cmputer' levenshteinTo:'computer'
-     'computer' levenshteinTo:'cmputer'
-     'Computer' levenshteinTo:'computer'
-    "
-!
-
-levenshteinTo:aString s:substWeight k:kbdTypoWeight c:caseWeight i:insrtWeight d:deleteWeight
-    "parametrized levenshtein.
-     return the levenshtein distance to the argument, aString;
-     this value corrensponds to the number of replacements that have to be
-     made to get aString from the receiver.
-     The arguments are the costs for
-	s:substitution,
-	k:keyboard type (substitution),
-	c:case-change,
-	i:insertion
-	d:deletion
-     of a character.
-     See IEEE transactions on Computers 1976 Pg 172 ff"
-
-    |d  "delta matrix"
-     len1 "{ Class: SmallInteger }"
-     len2 "{ Class: SmallInteger }"
-     dim  "{ Class: SmallInteger }"
-     prevRow row col
-     dimPlus1 "{ Class: SmallInteger }"
-     min pp c1 c2|
-
-    len1 := self size.
-    len2 := aString size.
-
-    "create the help-matrix"
-
-    dim := len1 max:len2.
-    dimPlus1 := dim + 1.
-
-    d := Array new:dimPlus1.
-    1 to:dimPlus1 do:[:i |
-	d at:i put:(Array new:dimPlus1)
-    ].
-
-    "init help-matrix"
-
-    (d at:1) at:1 put:0.
-    row := d at:1.
-    1 to:dim do:[:j |
-	row at:(j + 1) put:( (row at:j) + insrtWeight )
-    ].
-
-    1 to:dim do:[:i |
-	 (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
-    ].
-
-    1 to:len1 do:[:i |
-	c1 := self at:i.
-	1 to:len2 do:[:j |
-	    c2 := aString at:j.
-	    (c1 == c2) ifTrue:[
-		pp := 0
-	    ] ifFalse:[
-		(c1 asLowercase == c2 asLowercase) ifTrue:[
-		    pp := caseWeight
-		] ifFalse:[
-		    pp := substWeight.
-		    substWeight ~~ kbdTypoWeight ifTrue:[
-			(DoWhatIMeanSupport isKey:c1 asLowercase nextTo:c2 asLowercase) ifTrue:[
-			    pp := kbdTypoWeight.
-			].
-		    ].
-		]
-	    ].
-	    prevRow := d at:i.
-	    row := d at:(i + 1).
-	    col := j + 1.
-	    min := (prevRow at:j) + pp.
-	    min := min min:( (row at:j) + insrtWeight).
-	    min := min min:( (prevRow at:col) + deleteWeight).
-	    row at:col put: min
-	]
-    ].
-
-    ^ (d at:(len1 + 1)) at:(len2 + 1)
-!
-
 numArgs
     "treating the receiver as a message selector, return how many arguments would it take"
 
@@ -5547,81 +5573,6 @@
      'hello' partsIfSelector
      '+' partsIfSelector
     "
-!
-
-spellAgainst: aString
-    "return an integer between 0 and 100 indicating how similar
-     the argument is to the receiver.  No case conversion is done.
-     This algorithm is much simpler (but also less exact) than the
-     levenshtein distance. Experiment which is better for your
-     application."
-
-    | i1     "{ Class: SmallInteger }"
-      i2     "{ Class: SmallInteger }"
-      next1  "{ Class: SmallInteger }"
-      next2  "{ Class: SmallInteger }"
-      size1  "{ Class: SmallInteger }"
-      size2  "{ Class: SmallInteger }"
-      score  "{ Class: SmallInteger }"
-      maxLen "{ Class: SmallInteger }" |
-
-    size1 := self size.
-    size2 := aString size.
-    maxLen := size1 max:size2.
-    score := 0.
-    i1 := i2 := 1.
-    [i1 <= size1 and: [i2 <= size2]] whileTrue:[
-	next1 := i1 + 1.
-	next2 := i2 + 1.
-	(self at:i1) == (aString at:i2) ifTrue: [
-	    score := score+1.
-	    i1 := next1.
-	    i2 := next2
-	] ifFalse: [
-	    (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
-		i2 := next2
-	    ] ifFalse: [
-		(i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
-		    i1 := next1
-		] ifFalse: [
-		    i1 := next1.
-		    i2 := next2
-		]
-	    ]
-	]
-    ].
-
-    score == maxLen ifTrue: [^ 100].
-    ^ 100 * score // maxLen
-
-    "
-     'Smalltalk' spellAgainst: 'Smalltlak'
-     'Smalltalk' spellAgainst: 'smalltlak'
-     'Smalltalk' spellAgainst: 'smalltalk'
-     'Smalltalk' spellAgainst: 'smalltlk'
-     'Smalltalk' spellAgainst: 'Smalltolk'
-    "
-!
-
-startsWith:aString
-    "return true, if the receiver starts with something, aString.
-     If the argument is empty, true is returned."
-
-    |s|
-
-    (s := self string) ~~ self ifTrue:[
-	^ s startsWith:aString
-    ].
-    ^ super startsWith:aString
-
-    "
-     'hello world' startsWith:'hello'
-     'hello world' asText allBold startsWith:'hello'
-     'hello world' asText allBold startsWith:''
-    "
-
-    "Created: 12.5.1996 / 15:46:40 / cg"
-    "Modified: 12.5.1996 / 15:49:24 / cg"
 ! !
 
 !CharacterArray methodsFor:'tracing'!
@@ -5644,7 +5595,7 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.401 2009-08-10 13:39:08 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.402 2009-08-16 21:30:49 cg Exp $'
 ! !
 
 CharacterArray initialize!