--- a/CharacterArray.st Thu May 22 21:41:38 2003 +0200
+++ b/CharacterArray.st Fri May 23 12:12:28 2003 +0200
@@ -6163,21 +6163,33 @@
levenshteinTo:aString
"return the levenshtein distance to the argument, aString;
- this value corrensponds to the number of replacements that have to be
+ this value corresponds to the number of replacements that have to be
made to get aString from the receiver.
See IEEE transactions on Computers 1976 Pg 172 ff."
"
- in the following, we assum that ommiting a character
+ in the following, we assume that ommiting a character
is less of an error than inserting an extra character.
- Therefore the different insertion (i) and deletion (d)
- values.
- "
-
- ^ self levenshteinTo:aString s:4 c:1 i:2 d:6
-
- "
- 'ocmprt' levenshteinTo:'computer'
+ Therefore the different insertion (i) and deletion (d) values.
+ s: substitution weight
+ c: case weight
+ i: insertion of extra character weight
+ d: delete of a character weight
+ "
+
+ ^ self levenshteinTo:aString s:4 k:2 c:1 i:2 d:6
+
+ "
+ 'computer' levenshteinTo:'computer'
+ 'cOmputer' levenshteinTo:'computer'
+ 'cOmpuTer' levenshteinTo:'computer'
+ 'cimputer' levenshteinTo:'computer'
+ 'cumputer' levenshteinTo:'computer'
+
+ 'cmputer' levenshteinTo:'computer'
+ 'coomputer' levenshteinTo:'computer'
+
+ 'ocmprt' levenshteinTo:'computer'
'computer' levenshteinTo:'computer'
'ocmputer' levenshteinTo:'computer'
'cmputer' levenshteinTo:'computer'
@@ -6191,8 +6203,31 @@
return the levenshtein distance to the argument, aString;
this value corrensponds to the number of replacements that have to be
made to get aString from the receiver.
- The arguments are the costs for substitution, case-change, insertion and
- deletion of a character.
+ The arguments are the costs for
+ substitution, case-change, insertion and deletion of a character.
+ See IEEE transactions on Computers 1976 Pg 172 ff"
+
+ ^ self
+ levenshteinTo:aString
+ s:substWeight
+ k:substWeight
+ c:caseWeight
+ i:insrtWeight
+ d:deleteWeight
+!
+
+levenshteinTo:aString s:substWeight k:kbdTypoWeight c:caseWeight i:insrtWeight d:deleteWeight
+ "parametrized levenshtein.
+ return the levenshtein distance to the argument, aString;
+ this value corrensponds to the number of replacements that have to be
+ made to get aString from the receiver.
+ The arguments are the costs for
+ s:substitution,
+ k:keyboard type (substitution),
+ c:case-change,
+ i:insertion
+ d:deletion
+ of a character.
See IEEE transactions on Computers 1976 Pg 172 ff"
|d "delta matrix"
@@ -6213,7 +6248,7 @@
d := Array new:dimPlus1.
1 to:dimPlus1 do:[:i |
- d at:i put:(Array new:dimPlus1)
+ d at:i put:(Array new:dimPlus1)
].
"init help-matrix"
@@ -6221,34 +6256,39 @@
(d at:1) at:1 put:0.
row := d at:1.
1 to:dim do:[:j |
- row at:(j + 1) put:( (row at:j) + insrtWeight )
+ row at:(j + 1) put:( (row at:j) + insrtWeight )
].
1 to:dim do:[:i |
- (d at:(i + 1)) at:1 put:( ((d at:i) at:1) + deleteWeight )
+ (d at:(i + 1)) at:1 put:( ((d at:i) at:1) + deleteWeight )
].
1 to:len1 do:[:i |
- c1 := self at:i.
- 1 to:len2 do:[:j |
- c2 := aString at:j.
- (c1 == c2) ifTrue:[
- pp := 0
- ] ifFalse:[
- (c1 asLowercase == c2 asLowercase) ifTrue:[
- pp := caseWeight
- ] ifFalse:[
- pp := substWeight
- ]
- ].
- prevRow := d at:i.
- row := d at:(i + 1).
- col := j + 1.
- min := (prevRow at:j) + pp.
- min := min min:( (row at:j) + insrtWeight).
- min := min min:( (prevRow at:col) + deleteWeight).
- row at:col put: min
- ]
+ c1 := self at:i.
+ 1 to:len2 do:[:j |
+ c2 := aString at:j.
+ (c1 == c2) ifTrue:[
+ pp := 0
+ ] ifFalse:[
+ (c1 asLowercase == c2 asLowercase) ifTrue:[
+ pp := caseWeight
+ ] ifFalse:[
+ pp := substWeight.
+ substWeight ~~ kbdTypoWeight ifTrue:[
+ (DoWhatIMeanSupport isKey:c1 asLowercase nextTo:c2 asLowercase) ifTrue:[
+ pp := kbdTypoWeight.
+ ].
+ ].
+ ]
+ ].
+ prevRow := d at:i.
+ row := d at:(i + 1).
+ col := j + 1.
+ min := (prevRow at:j) + pp.
+ min := min min:( (row at:j) + insrtWeight).
+ min := min min:( (prevRow at:col) + deleteWeight).
+ row at:col put: min
+ ]
].
^ (d at:(len1 + 1)) at:(len2 + 1)
@@ -6400,7 +6440,7 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.248 2003-05-20 15:35:43 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.249 2003-05-23 10:12:28 cg Exp $'
! !
CharacterArray initialize!