--- a/KeywordInContextIndexBuilder.st Thu Oct 13 13:04:02 2016 +0200
+++ b/KeywordInContextIndexBuilder.st Thu Oct 13 14:46:18 2016 +0200
@@ -93,11 +93,11 @@
Transcript cr.
Transcript showCR:'Printed as KWOC:'.
kwic
- entriesDo:[:word :left :right :ref :line |
+ entriesDo:[:word :left :right :ref :fullText :context |
Transcript
show:((word contractTo:10) paddedTo:10) allBold;
space;
- show:((line contractTo:60) paddedTo:60);
+ show:((context contractTo:60) paddedTo:60);
space;
show:'['; show:ref; show:']';
cr
@@ -277,38 +277,48 @@
!KeywordInContextIndexBuilder methodsFor:'accessing'!
-excluded:something
- excluded := something asSet.
+excluded:aListOfExcludedWords
+ "define words which are to be ignored.
+ Typically, this is a list of fillwords, such as 'and', 'the', 'in', etc."
+
+ excluded := aListOfExcludedWords asSet.
!
-separatorAlgorithm:something
- separatorAlgorithm := something.
+separatorAlgorithm:aBlock
+ "define the algorithm to split a given string into words.
+ The default is to split at punctuation and whitespace
+ (see #initialize)"
+
+ separatorAlgorithm := aBlock.
! !
!KeywordInContextIndexBuilder methodsFor:'building'!
addLine:aLine reference:opaqueReference
"add a text line; the line is split at words and entered into the kwic.
- the reference argument is stored as 'value' of the generated entries"
+ The reference argument is stored as 'value' of the generated entries.
+ It can be anything"
- self addLine:aLine reference:opaqueReference ignoreCase:false
+ self addLine:aLine reference:opaqueReference ignoreCase:true
!
addLine:aLine reference:opaqueReference ignoreCase:ignoreCase
+ "add a line to the kwic.
+ The line is split up into words, and a reference to opaqueReference
+ is added for each word.
+ The reference argument is stored as 'value' of the generated entries.
+ It can be anything"
+
(separatorAlgorithm value:aLine) do:[:eachWord |
|set word|
ignoreCase ifTrue:[
word := eachWord asLowercase.
] ifFalse:[
- word := eachWord asLowercase.
+ word := eachWord.
].
(excluded includes:word) ifFalse:[
- set := keywordToLinesMapping at:word ifAbsent:nil.
- set isNil ifTrue:[
- set := Set new.
- keywordToLinesMapping at:word put:set
- ].
+ set := keywordToLinesMapping at:word ifAbsentPut:[Set new].
set add:(aLine -> opaqueReference).
]
].
@@ -316,7 +326,7 @@
!KeywordInContextIndexBuilder methodsFor:'enumerating'!
-entriesDo:aFourOrFiveArgBlock
+entriesDo:aFourToSixArgBlock
"evaluate the argument, for each entry.
If it is a 4-arg block, it is called with:
kwic-word,
@@ -324,14 +334,13 @@
right text
and reference
If it is a 5-arg block, the original text is passed as additional argument.
+ If it is a 6-arg block, the original text and the context are passed as additional argument.
(stupid, but done for backward compatibility)"
- |fourArgBlock fiveArgBlock|
+ |fourArgBlock|
- aFourOrFiveArgBlock numArgs == 5 ifTrue:[
- fiveArgBlock := aFourOrFiveArgBlock
- ] ifFalse:[
- fourArgBlock := aFourOrFiveArgBlock
+ aFourToSixArgBlock numArgs == 4 ifTrue:[
+ fourArgBlock := aFourToSixArgBlock
].
keywordToLinesMapping keys asSortedCollection do:[:eachKey |
|setOfMatches lcKey|
@@ -363,7 +372,7 @@
fourArgBlock notNil ifTrue:[
fourArgBlock value:word value:left value:right value:ref.
] ifFalse:[
- fiveArgBlock value:word value:left value:right value:ref value:text.
+ aFourToSixArgBlock value:word optionalArgument:left and:right and:ref and:text and:context
].
].
]