--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/KeywordInContextIndexBuilder.st Mon Dec 08 17:03:24 2003 +0100
@@ -0,0 +1,321 @@
+"
+ COPYRIGHT (c) 2003 by eXept Software AG
+ All Rights Reserved
+
+ This software is furnished under a license and may be used
+ only in accordance with the terms of that license and with the
+ inclusion of the above copyright notice. This software may not
+ be provided or otherwise made available to, or used by, any
+ other person. No title to or ownership of the software is
+ hereby transferred.
+"
+
+"{ Package: 'stx:libbasic2' }"
+
+Object subclass:#KeywordInContextIndexBuilder
+ instanceVariableNames:'keywordToLinesMapping excluded separatorAlgorithm'
+ classVariableNames:''
+ poolDictionaries:''
+ category:'Collections-Support'
+!
+
+!KeywordInContextIndexBuilder class methodsFor:'documentation'!
+
+copyright
+"
+ COPYRIGHT (c) 2003 by eXept Software AG
+ All Rights Reserved
+
+ This software is furnished under a license and may be used
+ only in accordance with the terms of that license and with the
+ inclusion of the above copyright notice. This software may not
+ be provided or otherwise made available to, or used by, any
+ other person. No title to or ownership of the software is
+ hereby transferred.
+"
+!
+
+documentation
+"
+ A support class for building a KWIC (Keyword in Context) indices.
+ (for example, to build a KWIC index on html pages or class documentation).
+
+ [author:]
+ Claus Gittinger (cg@alan)
+
+ [instance variables:]
+
+ [class variables:]
+
+ [see also:]
+
+"
+!
+
+examples
+"
+ [exBegin]
+ |kwic|
+
+ kwic := KeywordInContextIndexBuilder new.
+ kwic excluded:#('the' 'and' 'a' 'an').
+
+ kwic addLine:'bla bla bla' reference:1.
+ kwic addLine:'one two three' reference:2.
+ kwic addLine:'a cat and a dog' reference:3.
+ kwic addLine:'the man in the middle' reference:4.
+ kwic addLine:'the man with the dog' reference:5.
+
+ kwic
+ entriesDo:[:word :left :right :ref |
+ Transcript
+ show:((left contractTo:20) leftPaddedTo:20);
+ space;
+ show:((word contractTo:10) leftPaddedTo:10);
+ space;
+ show:((right contractTo:20) leftPaddedTo:20);
+ cr
+ ].
+ [exEnd]
+
+
+ KWIC index over method selector components:
+ [exBegin]
+ |kwic|
+
+ kwic := KeywordInContextIndexBuilder new.
+ Smalltalk allClassesDo:[:eachClass |
+ eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
+ kwic addLine:sel reference:mthd.
+ ]
+ ].
+ kwic
+ [exEnd]
+
+ KWIC index over method selector components, with word separation:
+ [exBegin]
+ |kwic|
+
+ kwic := KeywordInContextIndexBuilder forMethodSelectorIndex.
+
+ Smalltalk allClassesDo:[:eachClass |
+ eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
+ kwic addLine:sel reference:mthd.
+ ]
+ ].
+ kwic
+ [exEnd]
+
+ KWIC index over method comments:
+ [exBegin]
+ |kwic|
+
+ kwic := KeywordInContextIndexBuilder forMethodComments.
+
+ Smalltalk allClassesDo:[:eachClass |
+ eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
+ |comment|
+
+ (sel == #documentation) ifTrue:[
+ comment := mthd comment.
+ comment notNil ifTrue:[
+ kwic addLine:comment reference:mthd mclass ignoreCase:true.
+ ]
+ ] ifFalse:[
+ (sel ~~ #examples
+ and:[ sel ~~ #copyright
+ and:[ sel ~~ #version]]) ifTrue:[
+ comment := mthd comment.
+ comment notNil ifTrue:[
+ kwic addLine:comment reference:mthd ignoreCase:true.
+ ]
+ ]
+ ]
+ ]
+ ].
+ kwic
+ [exEnd]
+
+ KWIC index over class comments:
+ [exBegin]
+ |kwic|
+
+ kwic := KeywordInContextIndexBuilder forMethodComments.
+
+ Smalltalk allClassesDo:[:eachClass |
+ |mthd comment|
+
+ mthd := eachClass theMetaclass compiledMethodAt:#documentation.
+ mthd notNil ifTrue:[
+ comment := mthd comment.
+ comment notNil ifTrue:[
+ kwic addLine:comment reference:eachClass theNonMetaclass ignoreCase:true.
+ ]
+ ]
+ ].
+ kwic
+ [exEnd]
+"
+! !
+
+!KeywordInContextIndexBuilder class methodsFor:'instance creation'!
+
+forMethodComments
+ "return an indexer for method comments"
+
+ |sepChars sep kwic|
+
+ sepChars := '.,;:_ !![]()''"#?<>|' , Character return, Character lf, Character tab.
+
+ sep := [:lines | lines asString asCollectionOfSubstringsSeparatedByAny:sepChars].
+
+ kwic := self new.
+ kwic separatorAlgorithm:sep.
+ kwic excluded:#('the' 'and' 'a' 'an' 'for' 'with' 'no').
+ ^ kwic
+!
+
+forMethodSelectorIndex
+ "return an indexer for method selector components, with word separation at case boundaries"
+
+ |sep kwic sepUCWords|
+
+ sepUCWords := [:word :keyWords|
+ |s w c lastC last2C frag|
+
+ word asLowercase = word ifTrue:[
+ keyWords add:word.
+ ] ifFalse:[
+ s := word readStream.
+ w := '' writeStream.
+ [s atEnd] whileFalse:[
+ c := s next.
+ (c isUppercase) ifTrue:[
+ (lastC notNil and:[lastC isUppercase not]) ifTrue:[
+ keyWords add:w contents.
+ w := '' writeStream.
+ ].
+ ] ifFalse:[
+ (last2C notNil and:[last2C isUppercase and:[lastC isUppercase]]) ifTrue:[
+ c isLetter ifTrue:[
+ frag := w contents.
+ w := '' writeStream.
+ w nextPut:(frag last).
+ keyWords add:(frag allButLast).
+ ] ifFalse:[
+ ' frag := w contents.
+ w := '' writeStream.
+ keyWords add:frag. '.
+ ].
+ ].
+ ].
+ w nextPut:c.
+ last2C := lastC.
+ lastC := c.
+ ].
+ ].
+ ].
+
+ sep := [:line |
+ |words keyWords|
+
+ words := line asCollectionOfSubstringsSeparatedByAny:'.,;:_ '.
+ keyWords := OrderedCollection new.
+ words do:[:eachWord | sepUCWords value:eachWord value:keyWords].
+ keyWords
+ ].
+
+ kwic := self new.
+ kwic separatorAlgorithm:sep.
+ ^ kwic
+!
+
+new
+ ^ self basicNew initialize
+! !
+
+!KeywordInContextIndexBuilder methodsFor:'accessing'!
+
+excluded:something
+ excluded := something asSet.
+!
+
+separatorAlgorithm:something
+ separatorAlgorithm := something.
+! !
+
+!KeywordInContextIndexBuilder methodsFor:'building'!
+
+addLine:aLine reference:opaqueReference
+ self addLine:aLine reference:opaqueReference ignoreCase:false
+!
+
+addLine:aLine reference:opaqueReference ignoreCase:ignoreCase
+ (separatorAlgorithm value:aLine) do:[:eachWord |
+ |set word|
+
+ ignoreCase ifTrue:[
+ word := eachWord asLowercase.
+ ] ifFalse:[
+ word := eachWord asLowercase.
+ ].
+ (excluded includes:word) ifFalse:[
+ set := keywordToLinesMapping at:word ifAbsent:nil.
+ set isNil ifTrue:[
+ set := Set new.
+ keywordToLinesMapping at:word put:set
+ ].
+ set add:(aLine -> opaqueReference).
+ ]
+ ].
+! !
+
+!KeywordInContextIndexBuilder methodsFor:'enumerating'!
+
+entriesDo:aBlock
+ keywordToLinesMapping keys asSortedCollection do:[:eachKey |
+ |setOfMatches lcKey|
+
+ setOfMatches := keywordToLinesMapping at:eachKey.
+ lcKey := eachKey asLowercase.
+ setOfMatches do:[:eachAssoc |
+ |text ref lines idx lIdx context left right word prevLine nextLine|
+
+ text := eachAssoc key.
+ ref := eachAssoc value.
+
+ lines := text asCollectionOfLines.
+ idx := lines findFirst:[:line | line asLowercase includesString:lcKey].
+ idx ~~ 0 ifTrue:[
+ context := lines at:idx.
+ idx > 1 ifTrue:[
+ prevLine := (lines at:idx-1).
+ context := prevLine , ' ' , context.
+ ].
+ idx < lines size ifTrue:[
+ nextLine := (lines at:idx+1).
+ context := context , ' ' , nextLine.
+ ].
+ lIdx := context asLowercase findString:lcKey.
+ left := (context copyTo:lIdx - 1) withoutSeparators.
+ right := (context copyFrom:lIdx + lcKey size) withoutSeparators.
+ word := (context copyFrom:lIdx to:lIdx + lcKey size - 1) withoutSeparators.
+ aBlock value:word value:left value:right value:ref.
+ ].
+ ]
+ ]
+! !
+
+!KeywordInContextIndexBuilder methodsFor:'initialization'!
+
+initialize
+ keywordToLinesMapping := Dictionary new.
+ excluded := Set new.
+ separatorAlgorithm := [:line | line asCollectionOfSubstringsSeparatedByAny:' .:,;-'].
+! !
+
+!KeywordInContextIndexBuilder class methodsFor:'documentation'!
+
+version
+ ^ '$Header: /cvs/stx/stx/libbasic2/KeywordInContextIndexBuilder.st,v 1.1 2003-12-08 16:03:24 cg Exp $'
+! !