--- a/CharacterArray.st Sun Aug 16 12:51:54 2009 +0200
+++ b/CharacterArray.st Sun Aug 16 23:30:49 2009 +0200
@@ -1817,7 +1817,7 @@
compareWith:aString
"Compare the receiver with the argument and return 1 if the receiver is
greater, 0 if equal and -1 if less than the argument.
- This comparison is based on the elements ascii code -
+ This comparison is based on the elements' codepoints -
i.e. upper/lowercase & national characters are NOT treated specially.
'foo' compareWith: 'Foo' will return 1.
while 'foo' sameAs:'Foo' will return true"
@@ -1832,10 +1832,10 @@
n := mySize min:otherSize.
1 to:n do:[:index |
- c1 := self at:index.
- c2 := aString at:index.
- c1 > c2 ifTrue:[^ 1].
- c1 < c2 ifTrue:[^ -1].
+ c1 := self at:index.
+ c2 := aString at:index.
+ c1 > c2 ifTrue:[^ 1].
+ c1 < c2 ifTrue:[^ -1].
].
mySize > otherSize ifTrue:[^ 1].
mySize < otherSize ifTrue:[^ -1].
@@ -1844,6 +1844,27 @@
"Modified: 22.4.1996 / 15:56:07 / cg"
!
+endsWith:aStringOrCharacter
+ "return true, if the receiver ends with something, aStringOrCharacter."
+
+ |s|
+
+ (s := self string) ~~ self ifTrue:[
+ ^ s endsWith:aStringOrCharacter
+ ].
+ aStringOrCharacter isCharacter ifTrue:[
+ ^ self last = aStringOrCharacter
+ ].
+ ^ super endsWith:aStringOrCharacter
+
+ "
+ 'hello world' endsWith:'world'
+ 'hello world' asText allBold endsWith:'world'
+ "
+
+ "Modified: 12.5.1996 / 15:49:18 / cg"
+!
+
hammingDistanceTo:aString
"return the hamming distance (the number of characters which are different).
In information theory, the Hamming distance between two strings of equal length
@@ -1900,6 +1921,73 @@
"
!
+levenshteinTo:aString
+ "return the levenshtein distance to the argument, aString;
+ this value corresponds to the number of replacements that have to be
+ made to get aString from the receiver.
+ See IEEE transactions on Computers 1976 Pg 172 ff."
+
+ "
+ in the following, we assume that ommiting a character
+ is less of an error than inserting an extra character.
+ Therefore the different insertion (i) and deletion (d) values.
+ s: substitution weight
+ k: keyboard weight (typing a nearby key) - or nil (then use s)
+ c: case weight - or nil (then use s)
+ e: exchange weight - or nil (then use s*2)
+ i: insertion of extra character weight
+ d: delete of a character weight
+ "
+
+ ^ StringUtilities
+ levenshteinDistanceFrom:self
+ to:aString
+ s:4 k:4 c:4 e:nil i:2 d:6
+
+ "
+ 'computer' levenshteinTo:'computer'
+ 'cOmputer' levenshteinTo:'computer'
+ 'cOmpuTer' levenshteinTo:'computer'
+ 'cimputer' levenshteinTo:'computer'
+ 'cumputer' levenshteinTo:'computer'
+
+ 'cmputer' levenshteinTo:'computer'
+ 'coomputer' levenshteinTo:'computer'
+
+ 'ocmprt' levenshteinTo:'computer'
+ 'computer' levenshteinTo:'computer'
+ 'ocmputer' levenshteinTo:'computer'
+ 'cmputer' levenshteinTo:'computer'
+ 'computer' levenshteinTo:'cmputer'
+ 'Computer' levenshteinTo:'computer'
+
+ 'compiter' levenshteinTo:'computer'
+ 'compoter' levenshteinTo:'computer'
+
+ 'comptuer' levenshteinTo:'computer'
+ "
+!
+
+levenshteinTo:aString s:substWeight k:kbdTypoWeight c:caseWeight i:insrtWeight d:deleteWeight
+ "parametrized levenshtein.
+ return the levenshtein distance to the argument, aString;
+ this value corrensponds to the number of replacements that have to be
+ made to get aString from the receiver.
+ The arguments are the costs for
+ s:substitution,
+ k:keyboard type (substitution),
+ c:case-change,
+ i:insertion
+ d:deletion
+ of a character.
+ See IEEE transactions on Computers 1976 Pg 172 ff"
+
+ ^ StringUtilities
+ levenshteinDistanceFrom:self
+ to:aString
+ s:substWeight k:kbdTypoWeight c:caseWeight e:nil i:insrtWeight d:deleteWeight
+!
+
sameAs:aString
"Compare the receiver with the argument like =, but ignore case differences.
Return true or false."
@@ -2027,6 +2115,81 @@
'hello' sameStringAndEmphasisAs: 'fooba' asText allBold
'hello' sameStringAndEmphasisAs: 'fooba' asText allItalic
"
+!
+
+spellAgainst: aString
+ "return an integer between 0 and 100 indicating how similar
+ the argument is to the receiver. No case conversion is done.
+ This algorithm is much simpler (but also less exact) than the
+ levenshtein distance. Experiment which is better for your
+ application."
+
+ | i1 "{ Class: SmallInteger }"
+ i2 "{ Class: SmallInteger }"
+ next1 "{ Class: SmallInteger }"
+ next2 "{ Class: SmallInteger }"
+ size1 "{ Class: SmallInteger }"
+ size2 "{ Class: SmallInteger }"
+ score "{ Class: SmallInteger }"
+ maxLen "{ Class: SmallInteger }" |
+
+ size1 := self size.
+ size2 := aString size.
+ maxLen := size1 max:size2.
+ score := 0.
+ i1 := i2 := 1.
+ [i1 <= size1 and: [i2 <= size2]] whileTrue:[
+ next1 := i1 + 1.
+ next2 := i2 + 1.
+ (self at:i1) == (aString at:i2) ifTrue: [
+ score := score+1.
+ i1 := next1.
+ i2 := next2
+ ] ifFalse: [
+ (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
+ i2 := next2
+ ] ifFalse: [
+ (i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
+ i1 := next1
+ ] ifFalse: [
+ i1 := next1.
+ i2 := next2
+ ]
+ ]
+ ]
+ ].
+
+ score == maxLen ifTrue: [^ 100].
+ ^ 100 * score // maxLen
+
+ "
+ 'Smalltalk' spellAgainst: 'Smalltlak'
+ 'Smalltalk' spellAgainst: 'smalltlak'
+ 'Smalltalk' spellAgainst: 'smalltalk'
+ 'Smalltalk' spellAgainst: 'smalltlk'
+ 'Smalltalk' spellAgainst: 'Smalltolk'
+ "
+!
+
+startsWith:aString
+ "return true, if the receiver starts with something, aString.
+ If the argument is empty, true is returned."
+
+ |s|
+
+ (s := self string) ~~ self ifTrue:[
+ ^ s startsWith:aString
+ ].
+ ^ super startsWith:aString
+
+ "
+ 'hello world' startsWith:'hello'
+ 'hello world' asText allBold startsWith:'hello'
+ 'hello world' asText allBold startsWith:''
+ "
+
+ "Created: 12.5.1996 / 15:46:40 / cg"
+ "Modified: 12.5.1996 / 15:49:24 / cg"
! !
!CharacterArray methodsFor:'converting'!
@@ -5160,27 +5323,6 @@
!CharacterArray methodsFor:'testing'!
-endsWith:aStringOrCharacter
- "return true, if the receiver ends with something, aStringOrCharacter."
-
- |s|
-
- (s := self string) ~~ self ifTrue:[
- ^ s endsWith:aStringOrCharacter
- ].
- aStringOrCharacter isCharacter ifTrue:[
- ^ self last = aStringOrCharacter
- ].
- ^ super endsWith:aStringOrCharacter
-
- "
- 'hello world' endsWith:'world'
- 'hello world' asText allBold endsWith:'world'
- "
-
- "Modified: 12.5.1996 / 15:49:18 / cg"
-!
-
isAlphaNumeric
"return true, if the receiver is some alphanumeric word;
i.e. consists of a letter followed by letters or digits."
@@ -5375,122 +5517,6 @@
"
!
-levenshteinTo:aString
- "return the levenshtein distance to the argument, aString;
- this value corresponds to the number of replacements that have to be
- made to get aString from the receiver.
- See IEEE transactions on Computers 1976 Pg 172 ff."
-
- "
- in the following, we assume that ommiting a character
- is less of an error than inserting an extra character.
- Therefore the different insertion (i) and deletion (d) values.
- s: substitution weight
- k: keyboard weight (typing a nearby key)
- c: case weight
- i: insertion of extra character weight
- d: delete of a character weight
- "
-
- ^ self levenshteinTo:aString s:4 k:2 c:1 i:2 d:6
-
- "
- 'computer' levenshteinTo:'computer'
- 'cOmputer' levenshteinTo:'computer'
- 'cOmpuTer' levenshteinTo:'computer'
- 'cimputer' levenshteinTo:'computer'
- 'cumputer' levenshteinTo:'computer'
-
- 'cmputer' levenshteinTo:'computer'
- 'coomputer' levenshteinTo:'computer'
-
- 'ocmprt' levenshteinTo:'computer'
- 'computer' levenshteinTo:'computer'
- 'ocmputer' levenshteinTo:'computer'
- 'cmputer' levenshteinTo:'computer'
- 'computer' levenshteinTo:'cmputer'
- 'Computer' levenshteinTo:'computer'
- "
-!
-
-levenshteinTo:aString s:substWeight k:kbdTypoWeight c:caseWeight i:insrtWeight d:deleteWeight
- "parametrized levenshtein.
- return the levenshtein distance to the argument, aString;
- this value corrensponds to the number of replacements that have to be
- made to get aString from the receiver.
- The arguments are the costs for
- s:substitution,
- k:keyboard type (substitution),
- c:case-change,
- i:insertion
- d:deletion
- of a character.
- See IEEE transactions on Computers 1976 Pg 172 ff"
-
- |d "delta matrix"
- len1 "{ Class: SmallInteger }"
- len2 "{ Class: SmallInteger }"
- dim "{ Class: SmallInteger }"
- prevRow row col
- dimPlus1 "{ Class: SmallInteger }"
- min pp c1 c2|
-
- len1 := self size.
- len2 := aString size.
-
- "create the help-matrix"
-
- dim := len1 max:len2.
- dimPlus1 := dim + 1.
-
- d := Array new:dimPlus1.
- 1 to:dimPlus1 do:[:i |
- d at:i put:(Array new:dimPlus1)
- ].
-
- "init help-matrix"
-
- (d at:1) at:1 put:0.
- row := d at:1.
- 1 to:dim do:[:j |
- row at:(j + 1) put:( (row at:j) + insrtWeight )
- ].
-
- 1 to:dim do:[:i |
- (d at:(i + 1)) at:1 put:( ((d at:i) at:1) + deleteWeight )
- ].
-
- 1 to:len1 do:[:i |
- c1 := self at:i.
- 1 to:len2 do:[:j |
- c2 := aString at:j.
- (c1 == c2) ifTrue:[
- pp := 0
- ] ifFalse:[
- (c1 asLowercase == c2 asLowercase) ifTrue:[
- pp := caseWeight
- ] ifFalse:[
- pp := substWeight.
- substWeight ~~ kbdTypoWeight ifTrue:[
- (DoWhatIMeanSupport isKey:c1 asLowercase nextTo:c2 asLowercase) ifTrue:[
- pp := kbdTypoWeight.
- ].
- ].
- ]
- ].
- prevRow := d at:i.
- row := d at:(i + 1).
- col := j + 1.
- min := (prevRow at:j) + pp.
- min := min min:( (row at:j) + insrtWeight).
- min := min min:( (prevRow at:col) + deleteWeight).
- row at:col put: min
- ]
- ].
-
- ^ (d at:(len1 + 1)) at:(len2 + 1)
-!
-
numArgs
"treating the receiver as a message selector, return how many arguments would it take"
@@ -5547,81 +5573,6 @@
'hello' partsIfSelector
'+' partsIfSelector
"
-!
-
-spellAgainst: aString
- "return an integer between 0 and 100 indicating how similar
- the argument is to the receiver. No case conversion is done.
- This algorithm is much simpler (but also less exact) than the
- levenshtein distance. Experiment which is better for your
- application."
-
- | i1 "{ Class: SmallInteger }"
- i2 "{ Class: SmallInteger }"
- next1 "{ Class: SmallInteger }"
- next2 "{ Class: SmallInteger }"
- size1 "{ Class: SmallInteger }"
- size2 "{ Class: SmallInteger }"
- score "{ Class: SmallInteger }"
- maxLen "{ Class: SmallInteger }" |
-
- size1 := self size.
- size2 := aString size.
- maxLen := size1 max:size2.
- score := 0.
- i1 := i2 := 1.
- [i1 <= size1 and: [i2 <= size2]] whileTrue:[
- next1 := i1 + 1.
- next2 := i2 + 1.
- (self at:i1) == (aString at:i2) ifTrue: [
- score := score+1.
- i1 := next1.
- i2 := next2
- ] ifFalse: [
- (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
- i2 := next2
- ] ifFalse: [
- (i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
- i1 := next1
- ] ifFalse: [
- i1 := next1.
- i2 := next2
- ]
- ]
- ]
- ].
-
- score == maxLen ifTrue: [^ 100].
- ^ 100 * score // maxLen
-
- "
- 'Smalltalk' spellAgainst: 'Smalltlak'
- 'Smalltalk' spellAgainst: 'smalltlak'
- 'Smalltalk' spellAgainst: 'smalltalk'
- 'Smalltalk' spellAgainst: 'smalltlk'
- 'Smalltalk' spellAgainst: 'Smalltolk'
- "
-!
-
-startsWith:aString
- "return true, if the receiver starts with something, aString.
- If the argument is empty, true is returned."
-
- |s|
-
- (s := self string) ~~ self ifTrue:[
- ^ s startsWith:aString
- ].
- ^ super startsWith:aString
-
- "
- 'hello world' startsWith:'hello'
- 'hello world' asText allBold startsWith:'hello'
- 'hello world' asText allBold startsWith:''
- "
-
- "Created: 12.5.1996 / 15:46:40 / cg"
- "Modified: 12.5.1996 / 15:49:24 / cg"
! !
!CharacterArray methodsFor:'tracing'!
@@ -5644,7 +5595,7 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.401 2009-08-10 13:39:08 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.402 2009-08-16 21:30:49 cg Exp $'
! !
CharacterArray initialize!