#DOCUMENTATION by cg
class: FuzzyMatcher class
comment/format in:
#allMatching:in:
#allMatching:in:by:
#allSortedByScoreMatching:in:
#allSortedByScoreMatching:in:by:
#documentation
"{ Package: 'stx:libbasic2' }"
"{ NameSpace: Smalltalk }"
Object subclass:#FuzzyMatcher
instanceVariableNames:'pattern lowercasePattern indexes'
classVariableNames:''
poolDictionaries:''
category:'Collections-Text-Support'
!
!FuzzyMatcher class methodsFor:'documentation'!
documentation
"
FuzzyMatcher is an approximate string matching algorithm that can determine if a string includes a given pattern.
For example, the string 'axby' matches both the pattern 'ab' and, 'ay', but not 'ba'.
The algorithm is based on lib_fts[1], and includes an optional scoring algorithm
that can be used to sort all the matches based on their similarity to the pattern.
It is used in the sublime text editor.
[see also:]
https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
https://github.com/forrestthewoods/lib_fts
"
!
example
"
|top lv list field patternHolder names|
patternHolder := '' asValue.
list := List new.
top := StandardSystemView new.
lv := ListView origin:(0.0@30) corner:(1.0@1.0) in:top.
lv model:list.
field := EditField origin:(0.0@0.0) corner:(1.0@30) in:top.
field model:patternHolder.
field immediateAccept:true.
names := Smalltalk allClasses collect:#name.
patternHolder
onChangeEvaluate:[
|matcher pattern matches|
pattern := patternHolder value.
pattern notEmpty ifTrue:[
matcher := FuzzyMatcher pattern:pattern.
matches := OrderedCollection new.
names do:[:eachClassName |
matcher
match:eachClassName
ifScored: [ :score | matches add: eachClassName -> score ]
].
matches sort:[:a :b |
a value < b value
or:[ a value = b value and:[ a key > b key]]
].
list removeAll.
list addAllReversed:(matches
collect:[:nameScoreAssoc |
'[%1] %2' bindWith:nameScoreAssoc value with:nameScoreAssoc key])
].
].
top open.
patternHolder value:'mph'.
"
! !
!FuzzyMatcher class methodsFor:'instance creation'!
new
"return an initialized instance"
^ self basicNew initialize.
! !
!FuzzyMatcher class methodsFor:'construction'!
pattern: aString
^self new pattern: aString
! !
!FuzzyMatcher class methodsFor:'utilities api'!
allMatching: aPattern in: aCollection
"Assumes that the collection is a collection of Strings"
| matcher |
matcher := self pattern: aPattern.
^ aCollection select: [ :each | matcher matches: each ]
"
self
allMatching:'clu'
in:(Smalltalk allClasses collect:#name)
"
"Modified (comment): / 14-07-2017 / 12:19:05 / cg"
!
allMatching: aPattern in: aCollection by: aBlockReturningString
| matcher |
matcher := self pattern: aPattern.
^ aCollection select: [ :each | matcher matches: (aBlockReturningString value: each) ]
"
self
allMatching:'clu'
in:(Smalltalk allClasses)
by:[:cls | cls name]
"
"
self
allMatching:'clu'
in:(Smalltalk allClasses)
by:#name
"
"Modified (format): / 14-07-2017 / 12:18:59 / cg"
!
allSortedByScoreMatching: aPattern in: aCollection
"Assumes that the collection is a collection of Strings"
^ self allSortedByScoreMatching: aPattern in: aCollection by: [ :each | each ]
"
self
allSortedByScoreMatching:'clu'
in:(Smalltalk allClasses collect:#name)
"
"
self
allSortedByScoreMatching:'nary'
in:(Smalltalk allClasses collect:#name)
"
"Modified (comment): / 14-07-2017 / 12:18:54 / cg"
!
allSortedByScoreMatching: aPattern in: aCollection by: aBlockReturningString
| matcher matches |
aPattern isEmpty ifTrue: [ ^ aCollection asArray ].
matcher := self pattern: aPattern.
matches := OrderedCollection new: aCollection size // 2.
aCollection do: [ :each |
matcher
match: (aBlockReturningString value: each)
ifScored: [ :score | matches add: score -> each ]
].
matches sort: [ :a :b | a key >= b key].
^ matches collect: [ :each | each value ] as: Array
"
self
allSortedByScoreMatching:'nary'
in:(Smalltalk allClasses)
by:[:cls | cls name]
"
"
self
allSortedByScoreMatching:'nary'
in:(Smalltalk allClasses)
by:#name
"
"Modified (format): / 14-07-2017 / 12:18:48 / cg"
! !
!FuzzyMatcher methodsFor:'accessing'!
pattern
^ pattern
!
pattern: aString
pattern := aString.
lowercasePattern := pattern asLowercase.
indexes := Array new: pattern size.
! !
!FuzzyMatcher methodsFor:'comparing'!
match: aString ifScored: aBlock
| score |
score := 0.
pattern ifEmpty: [ aBlock value: score. ^ self ].
(self matches: aString) ifFalse: [ ^ self ].
score := self firstScore: aString at: indexes first.
2 to: pattern size do: [ :pix |
score := score + (self score: aString at: (indexes at: pix) patternAt: pix)
].
score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty).
aBlock value: score.
!
matches: aString
| idx |
pattern size > aString size ifTrue: [ ^ false ].
idx := 0.
pattern withIndexDo: [ :each :i |
idx := aString
findString: each asString
startingAt: idx + 1
caseSensitive: false.
idx == 0 ifTrue: [ ^ false ].
indexes at: i put: idx.
].
^ true
! !
!FuzzyMatcher methodsFor:'initialization'!
initialize
super initialize.
pattern := lowercasePattern := ''.
indexes := #().
! !
!FuzzyMatcher methodsFor:'private'!
firstScore: aString at: anIndex
| score |
score := (aString at: anIndex) = pattern first
ifTrue: [ self caseEqualBonus ]
ifFalse: [ 0 ].
anIndex = 1 ifTrue: [ ^ score + self firstLetterBonus ].
score := score + (((anIndex - 1) * self leadingLetterPenalty) max: self maxLeadingLetterPenalty).
^ score
!
indexScore
| sum ramp |
ramp := 1.
sum := 0.
1 to: indexes size - 1 do: [ :ix |
ramp := (indexes at: ix) + 1 = (indexes at: ix + 1)
ifTrue: [ ramp + (ramp * self adjacencyIncrease) ]
ifFalse: [ 1 ].
sum := sum + ramp - 1
].
^ sum rounded
!
isSeparator: aCharacter
^ aCharacter = $_ or: [ aCharacter = $: ]
"Created: / 13-07-2017 / 13:30:34 / cg"
!
isSeperator: aCharacter
<resource: #obsolete>
^ self isSeparator: aCharacter
"Modified: / 13-07-2017 / 13:31:18 / cg"
!
score: aString at: stringIndex patternAt: patternIndex
| score prev |
prev := (aString at: stringIndex - 1).
score := (self isSeparator: prev)
ifTrue: [ self separatorBonus ]
ifFalse: [ (prev asLowercase = (lowercasePattern at: patternIndex - 1))
ifTrue: [
self adjacencyBonus +
((prev = (pattern at: patternIndex - 1)) ifTrue: [ self adjacentCaseEqualBonus ] ifFalse: [ 0 ])
]
ifFalse: [ 0 ]
].
(aString at: stringIndex) = (pattern at: patternIndex) ifTrue: [
score := score + self caseEqualBonus.
].
^ score
"Modified: / 13-07-2017 / 13:30:57 / cg"
! !
!FuzzyMatcher methodsFor:'scoring-bonus'!
adjacencyBonus
^ 5
!
adjacencyIncrease
^ 1.2
!
adjacentCaseEqualBonus
^ 3
!
caseEqualBonus
^ 7
!
firstLetterBonus
^ 12
!
separatorBonus
^ 5
! !
!FuzzyMatcher methodsFor:'scoring-penalty'!
leadingLetterPenalty
^ -3
!
maxLeadingLetterPenalty
^ -9
!
unmatchedLetterPenalty
^ -1
! !
!FuzzyMatcher class methodsFor:'documentation'!
version
^ '$Header$'
!
version_CVS
^ '$Header$'
! !