# HG changeset patch # User Claus Gittinger # Date 1344483919 -7200 # Node ID 89a55abc0ff2ba42ef336076ad179dedb29748c6 # Parent bc70d2d7ab9f0a1ca9489b7ba88377a6d03d27cc comment/format in: #editDistanceFrom:to:s:k:c:e:i: #levenshteinDistanceFrom:to:s:k:c:e:i:d: #keyboardLayoutForLanguage: diff -r bc70d2d7ab9f -r 89a55abc0ff2 StringUtilities.st --- a/StringUtilities.st Wed Aug 08 14:00:17 2012 +0200 +++ b/StringUtilities.st Thu Aug 09 05:45:19 2012 +0200 @@ -48,6 +48,9 @@ !StringUtilities class methodsFor:'edit distance'! editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight + "another, simpler editing distance between two strings. + See also: levenshtein" + |editedS2 min d| s2 size > s1 size ifTrue:[ @@ -63,7 +66,12 @@ ^ min + insrtWeight ]. - ^ (1 to:s1 size) sum:[:i | ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) ifTrue:[0] ifFalse:[substWeight]] + ^ (1 to:s1 size) sum: + [:i | + ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) + ifTrue:[0] + ifFalse:[substWeight] + ] " 'comptuer' levenshteinTo:'computer' @@ -72,6 +80,8 @@ 'computr' levenshteinTo:'computer' self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2 " + + "Modified (format): / 09-08-2012 / 05:41:59 / cg" ! isKey:k1 nextTo:k2 @@ -123,7 +133,7 @@ keyboardLayoutForLanguage:lang "the keyboard layout (used with algorithms to find possible typing errors, for example: edit distance in levenshtein). - CAVEAT: hard coded us- and german keyboards here." + CAVEAT: hard coded us- and german keyboards here - should go into resource file." "/ danish lang == #da ifTrue:[ @@ -195,13 +205,16 @@ " self keyboardLayoutForLanguage:#de " + + "Modified (comment): / 09-08-2012 / 05:39:19 / cg" ! levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight "parametrized levenshtein. return the levenshtein distance of two strings; this value corrensponds to the number of replacements that have to be - made to get string2 from string1. + made to get string2 from string1. The smaller the returned number, + tbe more similar are the two strings. The arguments are the costs for s:substitution, @@ -299,10 +312,12 @@ self levenshteinDistanceFrom:'comptuer' to:'computer' s:4 k:2 c:1 e:nil2 i:2 d:6 " + + "Modified (comment): / 09-08-2012 / 05:40:08 / cg" ! ! !StringUtilities class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.1 2009-08-16 21:07:08 cg Exp $' + ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.2 2012-08-09 03:45:19 cg Exp $' ! !