equal
deleted
inserted
replaced
46 ! ! |
46 ! ! |
47 |
47 |
48 !StringUtilities class methodsFor:'edit distance'! |
48 !StringUtilities class methodsFor:'edit distance'! |
49 |
49 |
50 editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight |
50 editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight |
|
51 "another, simpler editing distance between two strings. |
|
52 See also: levenshtein" |
|
53 |
51 |editedS2 min d| |
54 |editedS2 min d| |
52 |
55 |
53 s2 size > s1 size ifTrue:[ |
56 s2 size > s1 size ifTrue:[ |
54 ^ self editDistanceFrom:s2 to:s1 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight |
57 ^ self editDistanceFrom:s2 to:s1 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight |
55 ]. |
58 ]. |
61 min := (min ? d) min:d. |
64 min := (min ? d) min:d. |
62 ]. |
65 ]. |
63 ^ min + insrtWeight |
66 ^ min + insrtWeight |
64 ]. |
67 ]. |
65 |
68 |
66 ^ (1 to:s1 size) sum:[:i | ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) ifTrue:[0] ifFalse:[substWeight]] |
69 ^ (1 to:s1 size) sum: |
|
70 [:i | |
|
71 ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) |
|
72 ifTrue:[0] |
|
73 ifFalse:[substWeight] |
|
74 ] |
67 |
75 |
68 " |
76 " |
69 'comptuer' levenshteinTo:'computer' |
77 'comptuer' levenshteinTo:'computer' |
70 self editDistanceFrom:'comptuer' to:'computer' s:4 k:2 c:1 e:nil i:2 |
78 self editDistanceFrom:'comptuer' to:'computer' s:4 k:2 c:1 e:nil i:2 |
71 |
79 |
72 'computr' levenshteinTo:'computer' |
80 'computr' levenshteinTo:'computer' |
73 self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2 |
81 self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2 |
74 " |
82 " |
|
83 |
|
84 "Modified (format): / 09-08-2012 / 05:41:59 / cg" |
75 ! |
85 ! |
76 |
86 |
77 isKey:k1 nextTo:k2 |
87 isKey:k1 nextTo:k2 |
78 "return true, if k1 and k2 are adjacent keys on the keyboard. |
88 "return true, if k1 and k2 are adjacent keys on the keyboard. |
79 This is used to specially priorize plausible typing errors of adjacent keys." |
89 This is used to specially priorize plausible typing errors of adjacent keys." |
121 ! |
131 ! |
122 |
132 |
123 keyboardLayoutForLanguage:lang |
133 keyboardLayoutForLanguage:lang |
124 "the keyboard layout (used with algorithms to find possible typing errors, |
134 "the keyboard layout (used with algorithms to find possible typing errors, |
125 for example: edit distance in levenshtein). |
135 for example: edit distance in levenshtein). |
126 CAVEAT: hard coded us- and german keyboards here." |
136 CAVEAT: hard coded us- and german keyboards here - should go into resource file." |
127 |
137 |
128 "/ danish |
138 "/ danish |
129 lang == #da ifTrue:[ |
139 lang == #da ifTrue:[ |
130 ^ #( |
140 ^ #( |
131 '1234567890-' |
141 '1234567890-' |
193 '***zxcvbnm' ). |
203 '***zxcvbnm' ). |
194 |
204 |
195 " |
205 " |
196 self keyboardLayoutForLanguage:#de |
206 self keyboardLayoutForLanguage:#de |
197 " |
207 " |
|
208 |
|
209 "Modified (comment): / 09-08-2012 / 05:39:19 / cg" |
198 ! |
210 ! |
199 |
211 |
200 levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight |
212 levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight |
201 "parametrized levenshtein. |
213 "parametrized levenshtein. |
202 return the levenshtein distance of two strings; |
214 return the levenshtein distance of two strings; |
203 this value corrensponds to the number of replacements that have to be |
215 this value corrensponds to the number of replacements that have to be |
204 made to get string2 from string1. |
216 made to get string2 from string1. The smaller the returned number, |
|
217 tbe more similar are the two strings. |
205 |
218 |
206 The arguments are the costs for |
219 The arguments are the costs for |
207 s:substitution, |
220 s:substitution, |
208 k:keyboard type (substitution), if nil, s is used |
221 k:keyboard type (substitution), if nil, s is used |
209 c:case-change, if nil, s is used |
222 c:case-change, if nil, s is used |
297 'comptuer' levenshteinTo:'computer' |
310 'comptuer' levenshteinTo:'computer' |
298 |
311 |
299 self levenshteinDistanceFrom:'comptuer' to:'computer' |
312 self levenshteinDistanceFrom:'comptuer' to:'computer' |
300 s:4 k:2 c:1 e:nil2 i:2 d:6 |
313 s:4 k:2 c:1 e:nil2 i:2 d:6 |
301 " |
314 " |
|
315 |
|
316 "Modified (comment): / 09-08-2012 / 05:40:08 / cg" |
302 ! ! |
317 ! ! |
303 |
318 |
304 !StringUtilities class methodsFor:'documentation'! |
319 !StringUtilities class methodsFor:'documentation'! |
305 |
320 |
306 version |
321 version |
307 ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.1 2009-08-16 21:07:08 cg Exp $' |
322 ^ '$Header: /cvs/stx/stx/libbasic2/StringUtilities.st,v 1.2 2012-08-09 03:45:19 cg Exp $' |
308 ! ! |
323 ! ! |