KeywordInContextIndexBuilder.st
changeset 4128 4cc1535fa7dc
parent 4127 0f3c785bb689
child 4129 04b54f7b1a82
--- a/KeywordInContextIndexBuilder.st	Thu Oct 13 13:04:02 2016 +0200
+++ b/KeywordInContextIndexBuilder.st	Thu Oct 13 14:46:18 2016 +0200
@@ -93,11 +93,11 @@
     Transcript cr.
     Transcript showCR:'Printed as KWOC:'.
     kwic 
-        entriesDo:[:word :left :right :ref :line |
+        entriesDo:[:word :left :right :ref :fullText :context |
             Transcript 
                 show:((word contractTo:10) paddedTo:10) allBold;
                 space;
-                show:((line contractTo:60) paddedTo:60);
+                show:((context contractTo:60) paddedTo:60);
                 space;
                 show:'['; show:ref; show:']';
                 cr    
@@ -277,38 +277,48 @@
 
 !KeywordInContextIndexBuilder methodsFor:'accessing'!
 
-excluded:something
-    excluded := something asSet.
+excluded:aListOfExcludedWords
+    "define words which are to be ignored.
+     Typically, this is a list of fillwords, such as 'and', 'the', 'in', etc."
+     
+    excluded := aListOfExcludedWords asSet.
 !
 
-separatorAlgorithm:something
-    separatorAlgorithm := something.
+separatorAlgorithm:aBlock
+    "define the algorithm to split a given string into words.
+     The default is to split at punctuation and whitespace
+     (see #initialize)"
+     
+    separatorAlgorithm := aBlock.
 ! !
 
 !KeywordInContextIndexBuilder methodsFor:'building'!
 
 addLine:aLine reference:opaqueReference
     "add a text line; the line is split at words and entered into the kwic.
-     the reference argument is stored as 'value' of the generated entries"
+     The reference argument is stored as 'value' of the generated entries.
+     It can be anything"
 
-    self addLine:aLine reference:opaqueReference ignoreCase:false
+    self addLine:aLine reference:opaqueReference ignoreCase:true
 !
 
 addLine:aLine reference:opaqueReference ignoreCase:ignoreCase
+    "add a line to the kwic.
+     The line is split up into words, and a reference to opaqueReference
+     is added for each word.
+     The reference argument is stored as 'value' of the generated entries.
+     It can be anything"
+     
     (separatorAlgorithm value:aLine) do:[:eachWord |
         |set word|
 
         ignoreCase ifTrue:[
             word := eachWord asLowercase.
         ] ifFalse:[
-            word := eachWord asLowercase.
+            word := eachWord.
         ].
         (excluded includes:word) ifFalse:[
-            set := keywordToLinesMapping at:word ifAbsent:nil.
-            set isNil ifTrue:[
-                set := Set new.
-                keywordToLinesMapping at:word put:set
-            ].
+            set := keywordToLinesMapping at:word ifAbsentPut:[Set new].
             set add:(aLine -> opaqueReference).
         ]
     ].
@@ -316,7 +326,7 @@
 
 !KeywordInContextIndexBuilder methodsFor:'enumerating'!
 
-entriesDo:aFourOrFiveArgBlock
+entriesDo:aFourToSixArgBlock
     "evaluate the argument, for each entry.
      If it is a 4-arg block, it is called with:
         kwic-word, 
@@ -324,14 +334,13 @@
         right text 
         and reference
      If it is a 5-arg block, the original text is passed as additional argument.
+     If it is a 6-arg block, the original text and the context are passed as additional argument.
      (stupid, but done for backward compatibility)"
 
-    |fourArgBlock fiveArgBlock|
+    |fourArgBlock|
 
-    aFourOrFiveArgBlock numArgs == 5 ifTrue:[
-        fiveArgBlock := aFourOrFiveArgBlock 
-    ] ifFalse:[
-        fourArgBlock := aFourOrFiveArgBlock 
+    aFourToSixArgBlock numArgs == 4 ifTrue:[
+        fourArgBlock := aFourToSixArgBlock 
     ].    
     keywordToLinesMapping keys asSortedCollection do:[:eachKey |
         |setOfMatches lcKey|
@@ -363,7 +372,7 @@
                 fourArgBlock notNil ifTrue:[
                     fourArgBlock value:word value:left value:right value:ref.
                 ] ifFalse:[
-                    fiveArgBlock value:word value:left value:right value:ref value:text.
+                    aFourToSixArgBlock value:word optionalArgument:left and:right and:ref and:text and:context
                 ].    
             ].
         ]