19 which matches the given search pattern or part of it. |
19 which matches the given search pattern or part of it. |
20 |
20 |
21 The algorithm is based on lib_fts[1], and includes an optional scoring algorithm |
21 The algorithm is based on lib_fts[1], and includes an optional scoring algorithm |
22 that can be used to sort all the matches based on their similarity to the pattern. |
22 that can be used to sort all the matches based on their similarity to the pattern. |
23 It is used (among others) in the sublime text editor. |
23 It is used (among others) in the sublime text editor. |
24 |
24 |
|
25 [caveat:] |
|
26 although this works great for class searches, |
|
27 it is strange that 'dabc' scores lower against 'abc' than 'adbc' |
|
28 (dabc has a longer common subsequence without interruptions...) |
|
29 |
25 [see also:] |
30 [see also:] |
26 https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb |
31 https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb |
27 https://github.com/forrestthewoods/lib_fts |
32 https://github.com/forrestthewoods/lib_fts |
28 |
33 |
29 " |
34 " |
30 ! |
35 ! |
31 |
36 |
32 example |
37 example |
33 " |
38 " |
|
39 |matcher| |
|
40 |
|
41 matcher := FuzzyMatcher pattern:'abc'. |
|
42 matcher |
|
43 match:'somearbitrarysequence' |
|
44 ifScored: [:score | Transcript show:('''somearbitrarysequence'' scores '); showCR:score]. |
|
45 |
|
46 matcher |
|
47 match:'someabcd' |
|
48 ifScored: [:score | Transcript show:('''someabcd'' scores '); showCR:score]. |
|
49 |
|
50 matcher |
|
51 match:'abcd' |
|
52 ifScored: [:score | Transcript show:('''abcd'' scores '); showCR:score]. |
|
53 |
|
54 matcher |
|
55 match:'dabc' |
|
56 ifScored: [:score | Transcript show:('''dabc'' scores '); showCR:score]. |
|
57 |
|
58 matcher |
|
59 match:'adbc' |
|
60 ifScored: [:score | Transcript show:('''adbc'' scores '); showCR:score]. |
|
61 |
|
62 matcher |
|
63 match:'abc' |
|
64 ifScored: [:score | Transcript show:('''abc'' scores '); showCR:score]. |
|
65 |
|
66 |
34 |top lv list field patternHolder names| |
67 |top lv list field patternHolder names| |
35 |
68 |
36 patternHolder := '' asValue. |
69 patternHolder := '' asValue. |
37 list := List new. |
70 list := List new. |
38 |
71 |
84 |
117 |
85 ^ self basicNew initialize. |
118 ^ self basicNew initialize. |
86 ! |
119 ! |
87 |
120 |
88 pattern: aString |
121 pattern: aString |
89 |
|
90 ^ self new pattern: aString |
122 ^ self new pattern: aString |
91 |
123 |
92 " |
124 " |
93 (self pattern:'mrp') matches:'ButtonMorph' |
125 (self pattern:'mrp') matches:'ButtonMorph' |
94 " |
126 (self pattern:'mrp') matches:'ButtonMorh' |
95 |
127 |
96 "Modified (comment): / 14-07-2017 / 15:02:43 / cg" |
128 (self pattern:'mrp') matches:'ButtonMorph' |
|
129 (self pattern:'mrp') matches:'ButtonMorh' |
|
130 " |
|
131 |
|
132 "Modified (comment): / 02-08-2017 / 15:57:07 / cg" |
97 ! ! |
133 ! ! |
98 |
134 |
99 !FuzzyMatcher class methodsFor:'utilities api'! |
135 !FuzzyMatcher class methodsFor:'utilities api'! |
100 |
136 |
101 allMatching: aPattern in: aCollection |
137 allMatching: aPattern in: aCollection |
102 "Assumes that the collection is a collection of Strings" |
138 "Assumes that the collection is a collection of Strings; |
|
139 return all those which match" |
103 |
140 |
104 | matcher | |
141 | matcher | |
105 |
142 |
106 matcher := self pattern: aPattern. |
143 matcher := self pattern: aPattern. |
107 |
|
108 ^ aCollection select: [ :each | matcher matches: each ] |
144 ^ aCollection select: [ :each | matcher matches: each ] |
109 |
145 |
110 " |
146 " |
111 self |
147 self |
112 allMatching:'clu' |
148 allMatching:'clu' |
113 in:(Smalltalk allClasses collect:#name) |
149 in:(Smalltalk allClasses collect:#name) |
114 " |
150 " |
115 |
151 |
116 "Modified (comment): / 14-07-2017 / 12:19:05 / cg" |
152 "Modified (comment): / 02-08-2017 / 16:02:44 / cg" |
117 ! |
153 ! |
118 |
154 |
119 allMatching: aPattern in: aCollection by: aBlockReturningString |
155 allMatching: aPattern in: aCollection by: aBlockReturningString |
120 "selects matching elements from aCollection. |
156 "selects matching elements from aCollection. |
121 aBlockReturningString is applied to elements to get the string representation |
157 aBlockReturningString is applied to elements to get the string representation |
266 indexes := Array new: pattern size. |
302 indexes := Array new: pattern size. |
267 |
303 |
268 "Modified (format): / 14-07-2017 / 12:59:15 / cg" |
304 "Modified (format): / 14-07-2017 / 12:59:15 / cg" |
269 ! ! |
305 ! ! |
270 |
306 |
271 !FuzzyMatcher methodsFor:'comparing'! |
307 !FuzzyMatcher methodsFor:'api - comparing'! |
272 |
308 |
273 match: aString ifScored: aBlock |
309 match: aString ifScored: aBlock |
274 |
310 "If there is a match, evaluate aBlock, passing the score value" |
275 | score | |
311 |
276 |
312 | scoreOrNil | |
277 pattern ifEmpty: [ aBlock value: "0" aString size negated. ^ self ]. |
313 |
278 (self matches: aString) ifFalse: [ ^ self ]. |
314 scoreOrNil := self matchScoreOrNil: aString. |
279 |
315 scoreOrNil notNil ifTrue:[ |
280 score := self firstScore: aString at: indexes first. |
316 aBlock value:scoreOrNil |
281 |
317 ]. |
282 2 to: pattern size do: [ :pix | |
318 |
283 score := score + (self score: aString at: (indexes at: pix) patternAt: pix) |
319 "Modified: / 02-08-2017 / 16:00:59 / cg" |
284 ]. |
320 ! |
285 |
321 |
286 score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty). |
322 matchScoreOrNil: aString |
287 |
323 "return the scrore if there is a match; nil otherwise." |
288 aBlock value: score. |
324 |
289 |
325 | score | |
290 "Modified: / 14-07-2017 / 12:44:50 / cg" |
326 |
|
327 pattern ifEmpty: [ ^ (aString size negated) ]. |
|
328 (self matches: aString) ifFalse: [ ^ nil ]. |
|
329 |
|
330 score := self firstScore: aString at: indexes first. |
|
331 |
|
332 2 to: pattern size do: [ :pix | |
|
333 score := score + (self score: aString at: (indexes at: pix) patternAt: pix) |
|
334 ]. |
|
335 |
|
336 score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty). |
|
337 |
|
338 ^ score. |
|
339 |
|
340 "Created: / 02-08-2017 / 15:59:56 / cg" |
291 ! |
341 ! |
292 |
342 |
293 matches: aString |
343 matches: aString |
294 |
344 "return true if there is a match; false otherwise." |
295 | idx | |
345 |
296 |
346 | idx | |
297 pattern size > aString size ifTrue: [ ^ false ]. |
347 |
298 |
348 pattern size > aString size ifTrue: [ ^ false ]. |
299 idx := 0. |
349 |
300 pattern withIndexDo: [ :each :i | |
350 idx := 0. |
301 idx := aString |
351 pattern withIndexDo: [ :each :i | |
302 findString: each asString |
352 idx := aString |
303 startingAt: idx + 1 |
353 findString: each asString |
304 caseSensitive: false. |
354 startingAt: idx + 1 |
305 |
355 caseSensitive: false. |
306 idx == 0 ifTrue: [ ^ false ]. |
356 |
307 indexes at: i put: idx. |
357 idx == 0 ifTrue: [ ^ false ]. |
308 ]. |
358 indexes at: i put: idx. |
309 |
359 ]. |
310 ^ true |
360 ^ true |
|
361 |
|
362 "Modified (format): / 02-08-2017 / 16:01:05 / cg" |
311 ! ! |
363 ! ! |
312 |
364 |
313 !FuzzyMatcher methodsFor:'initialization'! |
365 !FuzzyMatcher methodsFor:'initialization'! |
314 |
366 |
315 initialize |
367 initialize |