#FEATURE by cg
class: KeywordInContextIndexBuilder
class definition
added:
#exclusionFilter:
#unquoteAlgorithm:
changed:
#addLine:reference:ignoreCase:
#initialize
--- a/KeywordInContextIndexBuilder.st Thu Oct 13 14:48:17 2016 +0200
+++ b/KeywordInContextIndexBuilder.st Thu Oct 13 14:57:17 2016 +0200
@@ -14,7 +14,8 @@
"{ NameSpace: Smalltalk }"
Object subclass:#KeywordInContextIndexBuilder
- instanceVariableNames:'keywordToLinesMapping excluded separatorAlgorithm'
+ instanceVariableNames:'keywordToLinesMapping excluded separatorAlgorithm
+ unquoteAlgorithm exclusionFilter'
classVariableNames:''
poolDictionaries:''
category:'Collections-Support'
@@ -284,12 +285,28 @@
excluded := aListOfExcludedWords asSet.
!
+exclusionFilter:aBlock
+ "define an additional filter to exclude more complicated patterns.
+ This is invoked after filtering by the exclusion list.
+ If defined, this should return true,if the word is to be excluded."
+
+ exclusionFilter := aBlock.
+!
+
separatorAlgorithm:aBlock
"define the algorithm to split a given string into words.
The default is to split at punctuation and whitespace
(see #initialize)"
separatorAlgorithm := aBlock.
+!
+
+unquoteAlgorithm:aBlock
+ "define the algorithm to unquote words.
+ The default is to unquote single and double quotes
+ (see #initialize)"
+
+ unquoteAlgorithm := aBlock.
! !
!KeywordInContextIndexBuilder methodsFor:'building'!
@@ -312,14 +329,15 @@
(separatorAlgorithm value:aLine) do:[:eachWord |
|set word|
+ word := unquoteAlgorithm value:eachWord.
ignoreCase ifTrue:[
- word := eachWord asLowercase.
- ] ifFalse:[
- word := eachWord.
+ word := word asLowercase.
].
(excluded includes:word) ifFalse:[
- set := keywordToLinesMapping at:word ifAbsentPut:[Set new].
- set add:(aLine -> opaqueReference).
+ (exclusionFilter isNil or:[ (exclusionFilter value:word) not]) ifTrue:[
+ set := keywordToLinesMapping at:word ifAbsentPut:[Set new].
+ set add:(aLine -> opaqueReference).
+ ]
]
].
! !
@@ -384,7 +402,9 @@
initialize
keywordToLinesMapping := Dictionary new.
self excluded:(Set new).
+ self exclusionFilter:nil.
self separatorAlgorithm:[:line | line asCollectionOfSubstringsSeparatedByAny:' .:,;-'].
+ self unquoteAlgorithm:[:word | (word unquote:$") unquote:$' ].
! !
!KeywordInContextIndexBuilder class methodsFor:'documentation'!