StringUtilities.st
changeset 2804 89a55abc0ff2
parent 2214 9523a5ed7d8a
child 2853 217a303f28d1
equal deleted inserted replaced
2803:bc70d2d7ab9f 2804:89a55abc0ff2
    46 ! !
    46 ! !
    47 
    47 
    48 !StringUtilities class methodsFor:'edit distance'!
    48 !StringUtilities class methodsFor:'edit distance'!
    49 
    49 
    50 editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
    50 editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
       
    51     "another, simpler editing distance between two strings. 
       
    52      See also: levenshtein"
       
    53 
    51     |editedS2 min d|
    54     |editedS2 min d|
    52 
    55 
    53     s2 size > s1 size ifTrue:[
    56     s2 size > s1 size ifTrue:[
    54         ^ self editDistanceFrom:s2 to:s1 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
    57         ^ self editDistanceFrom:s2 to:s1 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
    55     ].
    58     ].
    61             min := (min ? d) min:d.
    64             min := (min ? d) min:d.
    62         ].
    65         ].
    63         ^ min + insrtWeight
    66         ^ min + insrtWeight
    64     ].
    67     ].
    65 
    68 
    66     ^ (1 to:s1 size) sum:[:i | ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) ifTrue:[0] ifFalse:[substWeight]] 
    69     ^ (1 to:s1 size) sum:
       
    70         [:i | 
       
    71             ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) 
       
    72                 ifTrue:[0] 
       
    73                 ifFalse:[substWeight]
       
    74         ] 
    67 
    75 
    68     "
    76     "
    69      'comptuer' levenshteinTo:'computer'      
    77      'comptuer' levenshteinTo:'computer'      
    70      self editDistanceFrom:'comptuer' to:'computer' s:4 k:2 c:1 e:nil i:2        
    78      self editDistanceFrom:'comptuer' to:'computer' s:4 k:2 c:1 e:nil i:2        
    71 
    79 
    72      'computr' levenshteinTo:'computer'                                    
    80      'computr' levenshteinTo:'computer'                                    
    73      self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2        
    81      self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2        
    74     "
    82     "
       
    83 
       
    84     "Modified (format): / 09-08-2012 / 05:41:59 / cg"
    75 !
    85 !
    76 
    86 
    77 isKey:k1 nextTo:k2
    87 isKey:k1 nextTo:k2
    78     "return true, if k1 and k2 are adjacent keys on the keyboard.
    88     "return true, if k1 and k2 are adjacent keys on the keyboard.
    79      This is used to specially priorize plausible typing errors of adjacent keys."
    89      This is used to specially priorize plausible typing errors of adjacent keys."
   121 !
   131 !
   122 
   132 
   123 keyboardLayoutForLanguage:lang
   133 keyboardLayoutForLanguage:lang
   124     "the keyboard layout (used with algorithms to find possible typing errors,
   134     "the keyboard layout (used with algorithms to find possible typing errors,
   125      for example: edit distance in levenshtein).
   135      for example: edit distance in levenshtein).
   126      CAVEAT: hard coded us- and german keyboards here."
   136      CAVEAT: hard coded us- and german keyboards here - should go into resource file."
   127 
   137 
   128     "/ danish
   138     "/ danish
   129     lang == #da ifTrue:[
   139     lang == #da ifTrue:[
   130         ^ #( 
   140         ^ #( 
   131                '1234567890-'
   141                '1234567890-'
   193            '***zxcvbnm' ).
   203            '***zxcvbnm' ).
   194 
   204 
   195     "
   205     "
   196      self keyboardLayoutForLanguage:#de 
   206      self keyboardLayoutForLanguage:#de 
   197     "
   207     "
       
   208 
       
   209     "Modified (comment): / 09-08-2012 / 05:39:19 / cg"
   198 !
   210 !
   199 
   211 
   200 levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight
   212 levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight
   201     "parametrized levenshtein.
   213     "parametrized levenshtein.
   202      return the levenshtein distance of two strings;
   214      return the levenshtein distance of two strings;
   203      this value corrensponds to the number of replacements that have to be
   215      this value corrensponds to the number of replacements that have to be
   204      made to get string2 from string1.
   216      made to get string2 from string1. The smaller the returned number,
       
   217      tbe more similar are the two strings.
   205 
   218 
   206      The arguments are the costs for
   219      The arguments are the costs for
   207         s:substitution,
   220         s:substitution,
   208         k:keyboard type (substitution),   if nil, s is used
   221         k:keyboard type (substitution),   if nil, s is used
   209         c:case-change,                    if nil, s is used
   222         c:case-change,                    if nil, s is used
   297      'comptuer' levenshteinTo:'computer'       
   310      'comptuer' levenshteinTo:'computer'       
   298 
   311 
   299      self levenshteinDistanceFrom:'comptuer' to:'computer' 
   312      self levenshteinDistanceFrom:'comptuer' to:'computer' 
   300             s:4 k:2 c:1 e:nil2 i:2 d:6    
   313             s:4 k:2 c:1 e:nil2 i:2 d:6    
   301     "
   314     "
       
   315 
       
   316     "Modified (comment): / 09-08-2012 / 05:40:08 / cg"
   302 ! !
   317 ! !
   303 
   318 
   304 !StringUtilities class methodsFor:'documentation'!
   319 !StringUtilities class methodsFor:'documentation'!
   305 
   320 
   306 version
   321 version
   307     ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.1 2009-08-16 21:07:08 cg Exp $'
   322     ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.2 2012-08-09 03:45:19 cg Exp $'
   308 ! !
   323 ! !