FuzzyMatcher.st
changeset 4493 3961346f8256
parent 4492 05def04efc34
child 5376 61b8a719febd
--- a/FuzzyMatcher.st	Wed Aug 02 15:45:02 2017 +0200
+++ b/FuzzyMatcher.st	Wed Aug 02 16:03:33 2017 +0200
@@ -21,7 +21,12 @@
     The algorithm is based on lib_fts[1], and includes an optional scoring algorithm 
     that can be used to sort all the matches based on their similarity to the pattern.
     It is used (among others) in the sublime text editor.
-    
+
+    [caveat:]
+        although this works great for class searches,
+        it is strange that 'dabc' scores lower against 'abc' than 'adbc'
+        (dabc has a longer common subsequence without interruptions...)
+
     [see also:]
         https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
         https://github.com/forrestthewoods/lib_fts
@@ -31,6 +36,34 @@
 
 example
 "
+    |matcher|
+    
+    matcher := FuzzyMatcher pattern:'abc'.
+    matcher 
+        match:'somearbitrarysequence'   
+        ifScored: [:score | Transcript show:('''somearbitrarysequence'' scores '); showCR:score].
+
+    matcher 
+        match:'someabcd'   
+        ifScored: [:score | Transcript show:('''someabcd'' scores '); showCR:score].
+
+    matcher 
+        match:'abcd'   
+        ifScored: [:score | Transcript show:('''abcd'' scores '); showCR:score].
+
+    matcher 
+        match:'dabc'   
+        ifScored: [:score | Transcript show:('''dabc'' scores '); showCR:score].
+
+    matcher 
+        match:'adbc'   
+        ifScored: [:score | Transcript show:('''adbc'' scores '); showCR:score].
+
+    matcher 
+        match:'abc'   
+        ifScored: [:score | Transcript show:('''abc'' scores '); showCR:score].
+
+    
     |top lv list field patternHolder names|
 
     patternHolder := '' asValue.
@@ -86,25 +119,28 @@
 !
 
 pattern: aString
-
     ^ self new pattern: aString
 
     "
      (self pattern:'mrp') matches:'ButtonMorph'
+     (self pattern:'mrp') matches:'ButtonMorh'
+
+     (self pattern:'mrp') matches:'ButtonMorph'
+     (self pattern:'mrp') matches:'ButtonMorh'
     "
 
-    "Modified (comment): / 14-07-2017 / 15:02:43 / cg"
+    "Modified (comment): / 02-08-2017 / 15:57:07 / cg"
 ! !
 
 !FuzzyMatcher class methodsFor:'utilities api'!
 
 allMatching: aPattern in: aCollection
-    "Assumes that the collection is a collection of Strings"
+    "Assumes that the collection is a collection of Strings;
+     return all those which match"
 
     | matcher |
 
     matcher := self pattern: aPattern.
-
     ^ aCollection select: [ :each | matcher matches: each ]
 
     "
@@ -113,7 +149,7 @@
         in:(Smalltalk allClasses collect:#name)
     "
 
-    "Modified (comment): / 14-07-2017 / 12:19:05 / cg"
+    "Modified (comment): / 02-08-2017 / 16:02:44 / cg"
 !
 
 allMatching: aPattern in: aCollection by: aBlockReturningString
@@ -268,46 +304,62 @@
     "Modified (format): / 14-07-2017 / 12:59:15 / cg"
 ! !
 
-!FuzzyMatcher methodsFor:'comparing'!
+!FuzzyMatcher methodsFor:'api - comparing'!
 
 match: aString ifScored: aBlock
-        
-        | score |
+    "If there is a match, evaluate aBlock, passing the score value"
         
-        pattern ifEmpty: [ aBlock value: "0" aString size negated. ^ self ].
-        (self matches: aString) ifFalse: [ ^ self ].
-        
-        score := self firstScore: aString at: indexes first.
+    | scoreOrNil |
+
+    scoreOrNil := self matchScoreOrNil: aString.
+    scoreOrNil notNil ifTrue:[
+        aBlock value:scoreOrNil
+    ].
+
+    "Modified: / 02-08-2017 / 16:00:59 / cg"
+!
+
+matchScoreOrNil: aString
+    "return the scrore if there is a match; nil otherwise."
         
-        2 to: pattern size do: [ :pix | 
-                score := score + (self score: aString at: (indexes at: pix) patternAt: pix)
-        ].
-                
-        score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty).
-                
-        aBlock value: score.
+    | score |
+
+    pattern ifEmpty: [ ^ (aString size negated) ].
+    (self matches: aString) ifFalse: [ ^ nil ].
+
+    score := self firstScore: aString at: indexes first.
 
-    "Modified: / 14-07-2017 / 12:44:50 / cg"
+    2 to: pattern size do: [ :pix | 
+            score := score + (self score: aString at: (indexes at: pix) patternAt: pix)
+    ].
+
+    score := score + self indexScore + ((aString size - pattern size) * self unmatchedLetterPenalty).
+
+    ^ score.
+
+    "Created: / 02-08-2017 / 15:59:56 / cg"
 !
 
 matches: aString
+    "return true if there is a match; false otherwise."
+     
+    | idx |
 
-	| idx |
-	
-	pattern size > aString size ifTrue: [ ^ false ].
+    pattern size > aString size ifTrue: [ ^ false ].
 
-	idx := 0.
-	pattern withIndexDo: [ :each :i |
-		idx := aString 
-			findString: each asString 
-			startingAt: idx + 1 
-			caseSensitive: false. 
-		
-		idx == 0 ifTrue: [ ^ false ].
-		indexes at: i put: idx.
-	].
+    idx := 0.
+    pattern withIndexDo: [ :each :i |
+            idx := aString 
+                    findString: each asString 
+                    startingAt: idx + 1 
+                    caseSensitive: false. 
 
-	^ true
+            idx == 0 ifTrue: [ ^ false ].
+            indexes at: i put: idx.
+    ].
+    ^ true
+
+    "Modified (format): / 02-08-2017 / 16:01:05 / cg"
 ! !
 
 !FuzzyMatcher methodsFor:'initialization'!