4459
|
1 |
"{ Package: 'stx:libbasic2' }"
|
|
2 |
|
|
3 |
"{ NameSpace: Smalltalk }"
|
|
4 |
|
|
5 |
Object subclass:#FuzzyMatcher
|
|
6 |
instanceVariableNames:'pattern lowercasePattern indexes'
|
|
7 |
classVariableNames:''
|
|
8 |
poolDictionaries:''
|
|
9 |
category:'Collections-Text-Support'
|
|
10 |
!
|
|
11 |
|
|
12 |
!FuzzyMatcher class methodsFor:'documentation'!
|
|
13 |
|
|
14 |
documentation
|
|
15 |
"
|
4474
|
16 |
FuzzyMatcher is an approximate string matching algorithm that can determine if a string includes a given pattern.
|
4459
|
17 |
For example, the string 'axby' matches both the pattern 'ab' and, 'ay', but not 'ba'.
|
|
18 |
|
|
19 |
The algorithm is based on lib_fts[1], and includes an optional scoring algorithm
|
|
20 |
that can be used to sort all the matches based on their similarity to the pattern.
|
|
21 |
It is used in the sublime text editor.
|
|
22 |
|
|
23 |
[see also:]
|
|
24 |
https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
|
|
25 |
https://github.com/forrestthewoods/lib_fts
|
|
26 |
|
|
27 |
"
|
4468
|
28 |
!
|
|
29 |
|
|
30 |
example
|
|
31 |
"
|
|
32 |
|top lv list field patternHolder names|
|
|
33 |
|
|
34 |
patternHolder := '' asValue.
|
|
35 |
list := List new.
|
|
36 |
|
|
37 |
top := StandardSystemView new.
|
|
38 |
lv := ListView origin:(0.0@30) corner:(1.0@1.0) in:top.
|
|
39 |
lv model:list.
|
|
40 |
|
|
41 |
field := EditField origin:(0.0@0.0) corner:(1.0@30) in:top.
|
|
42 |
field model:patternHolder.
|
|
43 |
field immediateAccept:true.
|
|
44 |
|
|
45 |
names := Smalltalk allClasses collect:#name.
|
|
46 |
|
|
47 |
patternHolder
|
|
48 |
onChangeEvaluate:[
|
|
49 |
|matcher pattern matches|
|
|
50 |
|
|
51 |
pattern := patternHolder value.
|
|
52 |
pattern notEmpty ifTrue:[
|
|
53 |
matcher := FuzzyMatcher pattern:pattern.
|
|
54 |
|
|
55 |
matches := OrderedCollection new.
|
|
56 |
|
|
57 |
names do:[:eachClassName |
|
|
58 |
matcher
|
|
59 |
match:eachClassName
|
|
60 |
ifScored: [ :score | matches add: eachClassName -> score ]
|
|
61 |
].
|
4470
|
62 |
matches sort:[:a :b |
|
|
63 |
a value < b value
|
|
64 |
or:[ a value = b value and:[ a key > b key]]
|
|
65 |
].
|
4468
|
66 |
|
|
67 |
list removeAll.
|
|
68 |
list addAllReversed:(matches
|
|
69 |
collect:[:nameScoreAssoc |
|
|
70 |
'[%1] %2' bindWith:nameScoreAssoc value with:nameScoreAssoc key])
|
|
71 |
].
|
|
72 |
].
|
|
73 |
top open.
|
|
74 |
patternHolder value:'mph'.
|
|
75 |
"
|
4459
|
76 |
! !
|
|
77 |
|
|
78 |
!FuzzyMatcher class methodsFor:'instance creation'!
|
|
79 |
|
|
80 |
new
|
|
81 |
"return an initialized instance"
|
|
82 |
|
|
83 |
^ self basicNew initialize.
|
4475
|
84 |
!
|
4459
|
85 |
|
|
86 |
pattern: aString
|
|
87 |
|
4476
|
88 |
^ self new pattern: aString
|
|
89 |
|
|
90 |
"
|
|
91 |
(self pattern:'mrp') matches:'ButtonMorph'
|
|
92 |
"
|
|
93 |
|
|
94 |
"Modified (comment): / 14-07-2017 / 15:02:43 / cg"
|
4459
|
95 |
! !
|
|
96 |
|
|
97 |
!FuzzyMatcher class methodsFor:'utilities api'!
|
|
98 |
|
|
99 |
allMatching: aPattern in: aCollection
|
4474
|
100 |
"Assumes that the collection is a collection of Strings"
|
|
101 |
|
|
102 |
| matcher |
|
4459
|
103 |
|
4474
|
104 |
matcher := self pattern: aPattern.
|
4459
|
105 |
|
4474
|
106 |
^ aCollection select: [ :each | matcher matches: each ]
|
4459
|
107 |
|
4474
|
108 |
"
|
|
109 |
self
|
|
110 |
allMatching:'clu'
|
|
111 |
in:(Smalltalk allClasses collect:#name)
|
|
112 |
"
|
4459
|
113 |
|
4474
|
114 |
"Modified (comment): / 14-07-2017 / 12:19:05 / cg"
|
4459
|
115 |
!
|
|
116 |
|
|
117 |
allMatching: aPattern in: aCollection by: aBlockReturningString
|
4475
|
118 |
"selects matching elements from aCollection.
|
|
119 |
aBlockReturningString is applied to elements to get the string representation
|
|
120 |
(can be used eg. to sort classes)"
|
|
121 |
|
4474
|
122 |
| matcher |
|
|
123 |
|
|
124 |
matcher := self pattern: aPattern.
|
|
125 |
|
|
126 |
^ aCollection select: [ :each | matcher matches: (aBlockReturningString value: each) ]
|
4459
|
127 |
|
4474
|
128 |
"
|
|
129 |
self
|
|
130 |
allMatching:'clu'
|
|
131 |
in:(Smalltalk allClasses)
|
|
132 |
by:[:cls | cls name]
|
|
133 |
"
|
|
134 |
"
|
|
135 |
self
|
|
136 |
allMatching:'clu'
|
|
137 |
in:(Smalltalk allClasses)
|
|
138 |
by:#name
|
|
139 |
"
|
|
140 |
|
4475
|
141 |
"Modified (comment): / 14-07-2017 / 12:21:40 / cg"
|
4459
|
142 |
!
|
|
143 |
|
|
144 |
allSortedByScoreMatching: aPattern in: aCollection
|
4475
|
145 |
"Assumes that the collection is a collection of Strings;
|
|
146 |
returns matching strings sorted by score (level of similarity)"
|
4474
|
147 |
|
|
148 |
^ self allSortedByScoreMatching: aPattern in: aCollection by: [ :each | each ]
|
|
149 |
|
|
150 |
"
|
|
151 |
self
|
|
152 |
allSortedByScoreMatching:'clu'
|
|
153 |
in:(Smalltalk allClasses collect:#name)
|
|
154 |
"
|
|
155 |
"
|
|
156 |
self
|
|
157 |
allSortedByScoreMatching:'nary'
|
|
158 |
in:(Smalltalk allClasses collect:#name)
|
|
159 |
"
|
|
160 |
|
4475
|
161 |
"Modified (comment): / 14-07-2017 / 12:22:14 / cg"
|
4459
|
162 |
!
|
|
163 |
|
|
164 |
allSortedByScoreMatching: aPattern in: aCollection by: aBlockReturningString
|
4475
|
165 |
"selects matching elements from aCollection.
|
|
166 |
aBlockReturningString is applied to elements to get the string representation.
|
|
167 |
Returns them sorted by score (i.e. similarity).
|
|
168 |
(can be used eg. to sort classes)"
|
4474
|
169 |
|
4475
|
170 |
| matchesAndScores |
|
4474
|
171 |
|
4475
|
172 |
matchesAndScores := self allWithScoresSortedByScoreMatching: aPattern in: aCollection by: aBlockReturningString.
|
|
173 |
^ matchesAndScores collect: [ :each | each value ]
|
4474
|
174 |
|
4475
|
175 |
"
|
|
176 |
self
|
|
177 |
allSortedByScoreMatching:''
|
|
178 |
in:(Smalltalk allClasses)
|
|
179 |
by:[:cls | cls name]
|
|
180 |
"
|
4474
|
181 |
"
|
|
182 |
self
|
|
183 |
allSortedByScoreMatching:'nary'
|
|
184 |
in:(Smalltalk allClasses)
|
|
185 |
by:[:cls | cls name]
|
|
186 |
"
|
|
187 |
"
|
|
188 |
self
|
|
189 |
allSortedByScoreMatching:'nary'
|
|
190 |
in:(Smalltalk allClasses)
|
|
191 |
by:#name
|
|
192 |
"
|
|
193 |
|
4475
|
194 |
"Modified: / 14-07-2017 / 12:43:14 / cg"
|
|
195 |
!
|
|
196 |
|
|
197 |
allWithScoresSortedByScoreMatching: aPattern in: aCollection by: aBlockReturningString
|
|
198 |
"selects matching elements from aCollection.
|
|
199 |
aBlockReturningString is applied to elements to get the string representation.
|
|
200 |
Returns them sorted by score (i.e. similarity) associated to their scores.
|
|
201 |
(can be used eg. to sort classes)"
|
|
202 |
|
|
203 |
|matcher matches|
|
|
204 |
|
|
205 |
|
|
206 |
matcher := self pattern: aPattern.
|
|
207 |
matches := OrderedCollection new: aCollection size // 2.
|
|
208 |
|
|
209 |
aCollection do: [ :each |
|
|
210 |
matcher
|
|
211 |
match: (aBlockReturningString value: each)
|
|
212 |
ifScored: [ :score | matches add: score -> each ]
|
|
213 |
].
|
|
214 |
matches sort: [ :a :b | a key > b key].
|
|
215 |
^ matches asArray
|
|
216 |
|
|
217 |
"
|
|
218 |
self
|
|
219 |
allWithScoresSortedByScoreMatching:''
|
|
220 |
in:(Smalltalk allClasses)
|
|
221 |
by:[:cls | cls name]
|
|
222 |
"
|
|
223 |
"
|
|
224 |
self
|
|
225 |
allWithScoresSortedByScoreMatching:'OC'
|
|
226 |
in:(Smalltalk allClasses)
|
|
227 |
by:[:cls | cls name]
|
|
228 |
"
|
|
229 |
"
|
|
230 |
self
|
|
231 |
allWithScoresSortedByScoreMatching:'nary'
|
|
232 |
in:(Smalltalk allClasses)
|
|
233 |
by:[:cls | cls name]
|
|
234 |
"
|
|
235 |
"
|
|
236 |
self
|
|
237 |
allWithScoresSortedByScoreMatching:'nary'
|
|
238 |
in:(Smalltalk allClasses)
|
|
239 |
by:#name
|
|
240 |
"
|
|
241 |
|
|
242 |
"Created: / 14-07-2017 / 12:25:19 / cg"
|
4459
|
243 |
! !
|
|
244 |
|
|
245 |
!FuzzyMatcher methodsFor:'accessing'!
|
|
246 |
|
4477
|
247 |
indexes
|
|
248 |
"only valid inside the match callback block"
|
|
249 |
|
|
250 |
^ indexes
|
|
251 |
|
|
252 |
"Created: / 15-07-2017 / 14:57:10 / cg"
|
|
253 |
!
|
|
254 |
|
4459
|
255 |
pattern
|
|
256 |
|
|
257 |
^ pattern
|
|
258 |
!
|
|
259 |
|
|
260 |
pattern: aString
|
|
261 |
|
4475
|
262 |
pattern := aString.
|
|
263 |
lowercasePattern := pattern asLowercase.
|
|
264 |
indexes := Array new: pattern size.
|
|
265 |
|
|
266 |
"Modified (format): / 14-07-2017 / 12:59:15 / cg"
|
4459
|
267 |
! !
|
|
268 |
|
|
269 |
!FuzzyMatcher methodsFor:'comparing'!
|
|
270 |
|
|
271 |
match: aString ifScored: aBlock
|
4475
|
272 |
|
|
273 |
| score |
|
|
274 |
|
|
275 |
pattern ifEmpty: [ aBlock value: "0" aString size negated. ^ self ].
|
|
276 |
(self matches: aString) ifFalse: [ ^ self ].
|
|
277 |
|
|
278 |
score := self firstScore: aString at: indexes first.
|
|
279 |
|
|
280 |
2 to: pattern size do: [ :pix |
|
|
281 |
score := score + (self score: aString at: (indexes at: pix) patternAt: pix)
|
|
282 |
].
|
|
283 |
|
|
284 |
score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty).
|
|
285 |
|
|
286 |
aBlock value: score.
|
|
287 |
|
|
288 |
"Modified: / 14-07-2017 / 12:44:50 / cg"
|
4459
|
289 |
!
|
|
290 |
|
|
291 |
matches: aString
|
|
292 |
|
|
293 |
| idx |
|
|
294 |
|
|
295 |
pattern size > aString size ifTrue: [ ^ false ].
|
|
296 |
|
|
297 |
idx := 0.
|
|
298 |
pattern withIndexDo: [ :each :i |
|
|
299 |
idx := aString
|
|
300 |
findString: each asString
|
|
301 |
startingAt: idx + 1
|
|
302 |
caseSensitive: false.
|
|
303 |
|
|
304 |
idx == 0 ifTrue: [ ^ false ].
|
|
305 |
indexes at: i put: idx.
|
|
306 |
].
|
|
307 |
|
|
308 |
^ true
|
|
309 |
! !
|
|
310 |
|
|
311 |
!FuzzyMatcher methodsFor:'initialization'!
|
|
312 |
|
|
313 |
initialize
|
|
314 |
|
4475
|
315 |
super initialize.
|
4459
|
316 |
|
4475
|
317 |
pattern := lowercasePattern := ''.
|
|
318 |
indexes := #().
|
|
319 |
|
|
320 |
"Modified (format): / 14-07-2017 / 13:23:26 / cg"
|
4459
|
321 |
! !
|
|
322 |
|
|
323 |
!FuzzyMatcher methodsFor:'private'!
|
|
324 |
|
|
325 |
firstScore: aString at: anIndex
|
|
326 |
|
|
327 |
| score |
|
|
328 |
|
|
329 |
score := (aString at: anIndex) = pattern first
|
|
330 |
ifTrue: [ self caseEqualBonus ]
|
|
331 |
ifFalse: [ 0 ].
|
|
332 |
|
|
333 |
anIndex = 1 ifTrue: [ ^ score + self firstLetterBonus ].
|
|
334 |
|
|
335 |
score := score + (((anIndex - 1) * self leadingLetterPenalty) max: self maxLeadingLetterPenalty).
|
|
336 |
|
|
337 |
^ score
|
|
338 |
!
|
|
339 |
|
|
340 |
indexScore
|
|
341 |
|
4475
|
342 |
| sum ramp |
|
|
343 |
|
|
344 |
ramp := 1.
|
|
345 |
sum := 0.
|
|
346 |
|
|
347 |
1 to: indexes size - 1 do: [ :ix |
|
|
348 |
ramp := (indexes at: ix) + 1 = (indexes at: ix + 1)
|
|
349 |
ifTrue: [ ramp + (ramp * self adjacencyIncrease) ]
|
|
350 |
ifFalse: [ 1 ].
|
|
351 |
|
|
352 |
sum := sum + ramp - 1
|
|
353 |
].
|
|
354 |
|
|
355 |
^ sum rounded
|
|
356 |
|
|
357 |
"Modified (format): / 14-07-2017 / 13:24:07 / cg"
|
4459
|
358 |
!
|
|
359 |
|
|
360 |
isSeparator: aCharacter
|
|
361 |
|
|
362 |
^ aCharacter = $_ or: [ aCharacter = $: ]
|
|
363 |
|
|
364 |
"Created: / 13-07-2017 / 13:30:34 / cg"
|
|
365 |
!
|
|
366 |
|
|
367 |
isSeperator: aCharacter
|
|
368 |
<resource: #obsolete>
|
|
369 |
^ self isSeparator: aCharacter
|
|
370 |
|
|
371 |
"Modified: / 13-07-2017 / 13:31:18 / cg"
|
|
372 |
!
|
|
373 |
|
|
374 |
score: aString at: stringIndex patternAt: patternIndex
|
|
375 |
|
|
376 |
| score prev |
|
|
377 |
|
|
378 |
prev := (aString at: stringIndex - 1).
|
|
379 |
|
|
380 |
score := (self isSeparator: prev)
|
|
381 |
ifTrue: [ self separatorBonus ]
|
|
382 |
ifFalse: [ (prev asLowercase = (lowercasePattern at: patternIndex - 1))
|
|
383 |
ifTrue: [
|
|
384 |
self adjacencyBonus +
|
|
385 |
((prev = (pattern at: patternIndex - 1)) ifTrue: [ self adjacentCaseEqualBonus ] ifFalse: [ 0 ])
|
|
386 |
]
|
|
387 |
ifFalse: [ 0 ]
|
|
388 |
].
|
|
389 |
|
|
390 |
(aString at: stringIndex) = (pattern at: patternIndex) ifTrue: [
|
|
391 |
score := score + self caseEqualBonus.
|
|
392 |
].
|
|
393 |
|
|
394 |
^ score
|
|
395 |
|
|
396 |
"Modified: / 13-07-2017 / 13:30:57 / cg"
|
|
397 |
! !
|
|
398 |
|
|
399 |
!FuzzyMatcher methodsFor:'scoring-bonus'!
|
|
400 |
|
|
401 |
adjacencyBonus
|
|
402 |
|
|
403 |
^ 5
|
|
404 |
!
|
|
405 |
|
|
406 |
adjacencyIncrease
|
|
407 |
|
|
408 |
^ 1.2
|
|
409 |
!
|
|
410 |
|
|
411 |
adjacentCaseEqualBonus
|
|
412 |
|
|
413 |
^ 3
|
|
414 |
!
|
|
415 |
|
|
416 |
caseEqualBonus
|
|
417 |
|
|
418 |
^ 7
|
|
419 |
!
|
|
420 |
|
|
421 |
firstLetterBonus
|
|
422 |
|
|
423 |
^ 12
|
|
424 |
!
|
|
425 |
|
|
426 |
separatorBonus
|
|
427 |
|
|
428 |
^ 5
|
|
429 |
! !
|
|
430 |
|
|
431 |
!FuzzyMatcher methodsFor:'scoring-penalty'!
|
|
432 |
|
|
433 |
leadingLetterPenalty
|
|
434 |
|
|
435 |
^ -3
|
|
436 |
!
|
|
437 |
|
|
438 |
maxLeadingLetterPenalty
|
|
439 |
|
|
440 |
^ -9
|
|
441 |
!
|
|
442 |
|
|
443 |
unmatchedLetterPenalty
|
|
444 |
|
|
445 |
^ -1
|
|
446 |
! !
|
|
447 |
|
|
448 |
!FuzzyMatcher class methodsFor:'documentation'!
|
|
449 |
|
|
450 |
version
|
|
451 |
^ '$Header$'
|
|
452 |
!
|
|
453 |
|
|
454 |
version_CVS
|
|
455 |
^ '$Header$'
|
|
456 |
! !
|
|
457 |
|