--- a/StringUtilities.st Wed Aug 08 14:00:17 2012 +0200
+++ b/StringUtilities.st Thu Aug 09 05:45:19 2012 +0200
@@ -48,6 +48,9 @@
!StringUtilities class methodsFor:'edit distance'!
editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
+ "another, simpler editing distance between two strings.
+ See also: levenshtein"
+
|editedS2 min d|
s2 size > s1 size ifTrue:[
@@ -63,7 +66,12 @@
^ min + insrtWeight
].
- ^ (1 to:s1 size) sum:[:i | ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) ifTrue:[0] ifFalse:[substWeight]]
+ ^ (1 to:s1 size) sum:
+ [:i |
+ ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)])
+ ifTrue:[0]
+ ifFalse:[substWeight]
+ ]
"
'comptuer' levenshteinTo:'computer'
@@ -72,6 +80,8 @@
'computr' levenshteinTo:'computer'
self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2
"
+
+ "Modified (format): / 09-08-2012 / 05:41:59 / cg"
!
isKey:k1 nextTo:k2
@@ -123,7 +133,7 @@
keyboardLayoutForLanguage:lang
"the keyboard layout (used with algorithms to find possible typing errors,
for example: edit distance in levenshtein).
- CAVEAT: hard coded us- and german keyboards here."
+ CAVEAT: hard coded us- and german keyboards here - should go into resource file."
"/ danish
lang == #da ifTrue:[
@@ -195,13 +205,16 @@
"
self keyboardLayoutForLanguage:#de
"
+
+ "Modified (comment): / 09-08-2012 / 05:39:19 / cg"
!
levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight
"parametrized levenshtein.
return the levenshtein distance of two strings;
this value corrensponds to the number of replacements that have to be
- made to get string2 from string1.
+ made to get string2 from string1. The smaller the returned number,
+ tbe more similar are the two strings.
The arguments are the costs for
s:substitution,
@@ -299,10 +312,12 @@
self levenshteinDistanceFrom:'comptuer' to:'computer'
s:4 k:2 c:1 e:nil2 i:2 d:6
"
+
+ "Modified (comment): / 09-08-2012 / 05:40:08 / cg"
! !
!StringUtilities class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.1 2009-08-16 21:07:08 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.2 2012-08-09 03:45:19 cg Exp $'
! !