StringUtilities.st
changeset 2804 89a55abc0ff2
parent 2214 9523a5ed7d8a
child 2853 217a303f28d1
--- a/StringUtilities.st	Wed Aug 08 14:00:17 2012 +0200
+++ b/StringUtilities.st	Thu Aug 09 05:45:19 2012 +0200
@@ -48,6 +48,9 @@
 !StringUtilities class methodsFor:'edit distance'!
 
 editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
+    "another, simpler editing distance between two strings. 
+     See also: levenshtein"
+
     |editedS2 min d|
 
     s2 size > s1 size ifTrue:[
@@ -63,7 +66,12 @@
         ^ min + insrtWeight
     ].
 
-    ^ (1 to:s1 size) sum:[:i | ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) ifTrue:[0] ifFalse:[substWeight]] 
+    ^ (1 to:s1 size) sum:
+        [:i | 
+            ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) 
+                ifTrue:[0] 
+                ifFalse:[substWeight]
+        ] 
 
     "
      'comptuer' levenshteinTo:'computer'      
@@ -72,6 +80,8 @@
      'computr' levenshteinTo:'computer'                                    
      self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2        
     "
+
+    "Modified (format): / 09-08-2012 / 05:41:59 / cg"
 !
 
 isKey:k1 nextTo:k2
@@ -123,7 +133,7 @@
 keyboardLayoutForLanguage:lang
     "the keyboard layout (used with algorithms to find possible typing errors,
      for example: edit distance in levenshtein).
-     CAVEAT: hard coded us- and german keyboards here."
+     CAVEAT: hard coded us- and german keyboards here - should go into resource file."
 
     "/ danish
     lang == #da ifTrue:[
@@ -195,13 +205,16 @@
     "
      self keyboardLayoutForLanguage:#de 
     "
+
+    "Modified (comment): / 09-08-2012 / 05:39:19 / cg"
 !
 
 levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight
     "parametrized levenshtein.
      return the levenshtein distance of two strings;
      this value corrensponds to the number of replacements that have to be
-     made to get string2 from string1.
+     made to get string2 from string1. The smaller the returned number,
+     tbe more similar are the two strings.
 
      The arguments are the costs for
         s:substitution,
@@ -299,10 +312,12 @@
      self levenshteinDistanceFrom:'comptuer' to:'computer' 
             s:4 k:2 c:1 e:nil2 i:2 d:6    
     "
+
+    "Modified (comment): / 09-08-2012 / 05:40:08 / cg"
 ! !
 
 !StringUtilities class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.1 2009-08-16 21:07:08 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.2 2012-08-09 03:45:19 cg Exp $'
 ! !