4459
|
1 |
"{ Package: 'stx:libbasic2' }"
|
|
2 |
|
|
3 |
"{ NameSpace: Smalltalk }"
|
|
4 |
|
|
5 |
Object subclass:#FuzzyMatcher
|
|
6 |
instanceVariableNames:'pattern lowercasePattern indexes'
|
|
7 |
classVariableNames:''
|
|
8 |
poolDictionaries:''
|
|
9 |
category:'Collections-Text-Support'
|
|
10 |
!
|
|
11 |
|
|
12 |
!FuzzyMatcher class methodsFor:'documentation'!
|
|
13 |
|
|
14 |
documentation
|
|
15 |
"
|
|
16 |
FuzzyMatcher is an approximate string matching algroithm that can determine if a string includes a given pattern.
|
|
17 |
For example, the string 'axby' matches both the pattern 'ab' and, 'ay', but not 'ba'.
|
|
18 |
|
|
19 |
The algorithm is based on lib_fts[1], and includes an optional scoring algorithm
|
|
20 |
that can be used to sort all the matches based on their similarity to the pattern.
|
|
21 |
It is used in the sublime text editor.
|
|
22 |
|
|
23 |
[see also:]
|
|
24 |
https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
|
|
25 |
https://github.com/forrestthewoods/lib_fts
|
|
26 |
|
|
27 |
"
|
4468
|
28 |
!
|
|
29 |
|
|
30 |
example
|
|
31 |
"
|
|
32 |
|top lv list field patternHolder names|
|
|
33 |
|
|
34 |
patternHolder := '' asValue.
|
|
35 |
list := List new.
|
|
36 |
|
|
37 |
top := StandardSystemView new.
|
|
38 |
lv := ListView origin:(0.0@30) corner:(1.0@1.0) in:top.
|
|
39 |
lv model:list.
|
|
40 |
|
|
41 |
field := EditField origin:(0.0@0.0) corner:(1.0@30) in:top.
|
|
42 |
field model:patternHolder.
|
|
43 |
field immediateAccept:true.
|
|
44 |
|
|
45 |
names := Smalltalk allClasses collect:#name.
|
|
46 |
|
|
47 |
patternHolder
|
|
48 |
onChangeEvaluate:[
|
|
49 |
|matcher pattern matches|
|
|
50 |
|
|
51 |
pattern := patternHolder value.
|
|
52 |
pattern notEmpty ifTrue:[
|
|
53 |
matcher := FuzzyMatcher pattern:pattern.
|
|
54 |
|
|
55 |
matches := OrderedCollection new.
|
|
56 |
|
|
57 |
names do:[:eachClassName |
|
|
58 |
matcher
|
|
59 |
match:eachClassName
|
|
60 |
ifScored: [ :score | matches add: eachClassName -> score ]
|
|
61 |
].
|
|
62 |
matches sortBySelector:#value.
|
|
63 |
|
|
64 |
list removeAll.
|
|
65 |
list addAllReversed:(matches
|
|
66 |
collect:[:nameScoreAssoc |
|
|
67 |
'[%1] %2' bindWith:nameScoreAssoc value with:nameScoreAssoc key])
|
|
68 |
].
|
|
69 |
].
|
|
70 |
top open.
|
|
71 |
patternHolder value:'mph'.
|
|
72 |
"
|
4459
|
73 |
! !
|
|
74 |
|
|
75 |
!FuzzyMatcher class methodsFor:'instance creation'!
|
|
76 |
|
|
77 |
new
|
|
78 |
"return an initialized instance"
|
|
79 |
|
|
80 |
^ self basicNew initialize.
|
|
81 |
! !
|
|
82 |
|
|
83 |
!FuzzyMatcher class methodsFor:'construction'!
|
|
84 |
|
|
85 |
pattern: aString
|
|
86 |
|
|
87 |
^self new pattern: aString
|
|
88 |
! !
|
|
89 |
|
|
90 |
!FuzzyMatcher class methodsFor:'utilities api'!
|
|
91 |
|
|
92 |
allMatching: aPattern in: aCollection
|
|
93 |
"Assumes that the collection is a collection of Strings"
|
|
94 |
|
|
95 |
| matcher |
|
|
96 |
|
|
97 |
matcher := self pattern: aPattern.
|
|
98 |
|
|
99 |
^ aCollection select: [ :each | matcher matches: each ]
|
|
100 |
|
|
101 |
"
|
|
102 |
self
|
|
103 |
allMatching:'clu'
|
|
104 |
in:(Smalltalk allClasses collect:#name)
|
|
105 |
"
|
|
106 |
|
|
107 |
"Modified (comment): / 13-07-2017 / 13:29:11 / cg"
|
|
108 |
!
|
|
109 |
|
|
110 |
allMatching: aPattern in: aCollection by: aBlockReturningString
|
|
111 |
|
|
112 |
| matcher |
|
|
113 |
|
|
114 |
matcher := self pattern: aPattern.
|
|
115 |
|
|
116 |
^ aCollection select: [ :each | matcher matches: (aBlockReturningString value: each) ]
|
|
117 |
!
|
|
118 |
|
|
119 |
allSortedByScoreMatching: aPattern in: aCollection
|
|
120 |
"Assumes that the collection is a collection of Strings"
|
|
121 |
|
|
122 |
^ self allSortedByScoreMatching: aPattern in: aCollection by: [ :each | each ]
|
|
123 |
!
|
|
124 |
|
|
125 |
allSortedByScoreMatching: aPattern in: aCollection by: aBlockReturningString
|
|
126 |
|
|
127 |
| matcher matches |
|
|
128 |
|
|
129 |
aPattern isEmpty ifTrue: [ ^ aCollection asArray ].
|
|
130 |
|
|
131 |
matcher := self pattern: aPattern.
|
|
132 |
matches := OrderedCollection new: aCollection size // 2.
|
|
133 |
|
|
134 |
aCollection do: [ :each |
|
|
135 |
matcher
|
|
136 |
match: (aBlockReturningString value: each)
|
|
137 |
ifScored: [ :score | matches add: score -> each ]
|
|
138 |
].
|
|
139 |
|
|
140 |
matches sort: [ :a :b | a key >= b key ].
|
|
141 |
|
|
142 |
^ matches collect: [ :each | each value ] as: Array
|
|
143 |
|
|
144 |
! !
|
|
145 |
|
|
146 |
!FuzzyMatcher methodsFor:'accessing'!
|
|
147 |
|
|
148 |
pattern
|
|
149 |
|
|
150 |
^ pattern
|
|
151 |
!
|
|
152 |
|
|
153 |
pattern: aString
|
|
154 |
|
|
155 |
pattern := aString.
|
|
156 |
lowercasePattern := pattern asLowercase.
|
|
157 |
indexes := Array new: pattern size.
|
|
158 |
|
|
159 |
! !
|
|
160 |
|
|
161 |
!FuzzyMatcher methodsFor:'comparing'!
|
|
162 |
|
|
163 |
match: aString ifScored: aBlock
|
|
164 |
|
|
165 |
| score |
|
|
166 |
|
|
167 |
score := 0.
|
|
168 |
|
|
169 |
pattern ifEmpty: [ aBlock value: score. ^ self ].
|
|
170 |
|
|
171 |
(self matches: aString) ifFalse: [ ^ self ].
|
|
172 |
|
|
173 |
score := self firstScore: aString at: indexes first.
|
|
174 |
|
|
175 |
2 to: pattern size do: [ :pix |
|
|
176 |
score := score + (self score: aString at: (indexes at: pix) patternAt: pix)
|
|
177 |
].
|
|
178 |
|
|
179 |
score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty).
|
|
180 |
|
|
181 |
aBlock value: score.
|
|
182 |
!
|
|
183 |
|
|
184 |
matches: aString
|
|
185 |
|
|
186 |
| idx |
|
|
187 |
|
|
188 |
pattern size > aString size ifTrue: [ ^ false ].
|
|
189 |
|
|
190 |
idx := 0.
|
|
191 |
pattern withIndexDo: [ :each :i |
|
|
192 |
idx := aString
|
|
193 |
findString: each asString
|
|
194 |
startingAt: idx + 1
|
|
195 |
caseSensitive: false.
|
|
196 |
|
|
197 |
idx == 0 ifTrue: [ ^ false ].
|
|
198 |
indexes at: i put: idx.
|
|
199 |
].
|
|
200 |
|
|
201 |
^ true
|
|
202 |
! !
|
|
203 |
|
|
204 |
!FuzzyMatcher methodsFor:'initialization'!
|
|
205 |
|
|
206 |
initialize
|
|
207 |
|
|
208 |
super initialize.
|
|
209 |
|
|
210 |
pattern := lowercasePattern := ''.
|
|
211 |
indexes := #().
|
|
212 |
|
|
213 |
! !
|
|
214 |
|
|
215 |
!FuzzyMatcher methodsFor:'private'!
|
|
216 |
|
|
217 |
firstScore: aString at: anIndex
|
|
218 |
|
|
219 |
| score |
|
|
220 |
|
|
221 |
score := (aString at: anIndex) = pattern first
|
|
222 |
ifTrue: [ self caseEqualBonus ]
|
|
223 |
ifFalse: [ 0 ].
|
|
224 |
|
|
225 |
anIndex = 1 ifTrue: [ ^ score + self firstLetterBonus ].
|
|
226 |
|
|
227 |
score := score + (((anIndex - 1) * self leadingLetterPenalty) max: self maxLeadingLetterPenalty).
|
|
228 |
|
|
229 |
^ score
|
|
230 |
!
|
|
231 |
|
|
232 |
indexScore
|
|
233 |
|
|
234 |
| sum ramp |
|
|
235 |
|
|
236 |
ramp := 1.
|
|
237 |
sum := 0.
|
|
238 |
|
|
239 |
1 to: indexes size - 1 do: [ :ix |
|
|
240 |
ramp := (indexes at: ix) + 1 = (indexes at: ix + 1)
|
|
241 |
ifTrue: [ ramp + (ramp * self adjacencyIncrease) ]
|
|
242 |
ifFalse: [ 1 ].
|
|
243 |
|
|
244 |
sum := sum + ramp - 1
|
|
245 |
].
|
|
246 |
|
|
247 |
^ sum rounded
|
|
248 |
!
|
|
249 |
|
|
250 |
isSeparator: aCharacter
|
|
251 |
|
|
252 |
^ aCharacter = $_ or: [ aCharacter = $: ]
|
|
253 |
|
|
254 |
"Created: / 13-07-2017 / 13:30:34 / cg"
|
|
255 |
!
|
|
256 |
|
|
257 |
isSeperator: aCharacter
|
|
258 |
<resource: #obsolete>
|
|
259 |
^ self isSeparator: aCharacter
|
|
260 |
|
|
261 |
"Modified: / 13-07-2017 / 13:31:18 / cg"
|
|
262 |
!
|
|
263 |
|
|
264 |
score: aString at: stringIndex patternAt: patternIndex
|
|
265 |
|
|
266 |
| score prev |
|
|
267 |
|
|
268 |
prev := (aString at: stringIndex - 1).
|
|
269 |
|
|
270 |
score := (self isSeparator: prev)
|
|
271 |
ifTrue: [ self separatorBonus ]
|
|
272 |
ifFalse: [ (prev asLowercase = (lowercasePattern at: patternIndex - 1))
|
|
273 |
ifTrue: [
|
|
274 |
self adjacencyBonus +
|
|
275 |
((prev = (pattern at: patternIndex - 1)) ifTrue: [ self adjacentCaseEqualBonus ] ifFalse: [ 0 ])
|
|
276 |
]
|
|
277 |
ifFalse: [ 0 ]
|
|
278 |
].
|
|
279 |
|
|
280 |
(aString at: stringIndex) = (pattern at: patternIndex) ifTrue: [
|
|
281 |
score := score + self caseEqualBonus.
|
|
282 |
].
|
|
283 |
|
|
284 |
^ score
|
|
285 |
|
|
286 |
"Modified: / 13-07-2017 / 13:30:57 / cg"
|
|
287 |
! !
|
|
288 |
|
|
289 |
!FuzzyMatcher methodsFor:'scoring-bonus'!
|
|
290 |
|
|
291 |
adjacencyBonus
|
|
292 |
|
|
293 |
^ 5
|
|
294 |
!
|
|
295 |
|
|
296 |
adjacencyIncrease
|
|
297 |
|
|
298 |
^ 1.2
|
|
299 |
!
|
|
300 |
|
|
301 |
adjacentCaseEqualBonus
|
|
302 |
|
|
303 |
^ 3
|
|
304 |
!
|
|
305 |
|
|
306 |
caseEqualBonus
|
|
307 |
|
|
308 |
^ 7
|
|
309 |
!
|
|
310 |
|
|
311 |
firstLetterBonus
|
|
312 |
|
|
313 |
^ 12
|
|
314 |
!
|
|
315 |
|
|
316 |
separatorBonus
|
|
317 |
|
|
318 |
^ 5
|
|
319 |
! !
|
|
320 |
|
|
321 |
!FuzzyMatcher methodsFor:'scoring-penalty'!
|
|
322 |
|
|
323 |
leadingLetterPenalty
|
|
324 |
|
|
325 |
^ -3
|
|
326 |
!
|
|
327 |
|
|
328 |
maxLeadingLetterPenalty
|
|
329 |
|
|
330 |
^ -9
|
|
331 |
!
|
|
332 |
|
|
333 |
unmatchedLetterPenalty
|
|
334 |
|
|
335 |
^ -1
|
|
336 |
! !
|
|
337 |
|
|
338 |
!FuzzyMatcher class methodsFor:'documentation'!
|
|
339 |
|
|
340 |
version
|
|
341 |
^ '$Header$'
|
|
342 |
!
|
|
343 |
|
|
344 |
version_CVS
|
|
345 |
^ '$Header$'
|
|
346 |
! !
|
|
347 |
|