1375
|
1 |
"
|
|
2 |
COPYRIGHT (c) 2003 by eXept Software AG
|
|
3 |
All Rights Reserved
|
|
4 |
|
|
5 |
This software is furnished under a license and may be used
|
|
6 |
only in accordance with the terms of that license and with the
|
|
7 |
inclusion of the above copyright notice. This software may not
|
|
8 |
be provided or otherwise made available to, or used by, any
|
|
9 |
other person. No title to or ownership of the software is
|
|
10 |
hereby transferred.
|
|
11 |
"
|
|
12 |
"{ Package: 'stx:libbasic2' }"
|
|
13 |
|
|
14 |
Object subclass:#KeywordInContextIndexBuilder
|
|
15 |
instanceVariableNames:'keywordToLinesMapping excluded separatorAlgorithm'
|
|
16 |
classVariableNames:''
|
|
17 |
poolDictionaries:''
|
|
18 |
category:'Collections-Support'
|
|
19 |
!
|
|
20 |
|
|
21 |
!KeywordInContextIndexBuilder class methodsFor:'documentation'!
|
|
22 |
|
|
23 |
copyright
|
|
24 |
"
|
|
25 |
COPYRIGHT (c) 2003 by eXept Software AG
|
|
26 |
All Rights Reserved
|
|
27 |
|
|
28 |
This software is furnished under a license and may be used
|
|
29 |
only in accordance with the terms of that license and with the
|
|
30 |
inclusion of the above copyright notice. This software may not
|
|
31 |
be provided or otherwise made available to, or used by, any
|
|
32 |
other person. No title to or ownership of the software is
|
|
33 |
hereby transferred.
|
|
34 |
"
|
|
35 |
!
|
|
36 |
|
|
37 |
documentation
|
|
38 |
"
|
|
39 |
A support class for building a KWIC (Keyword in Context) indices.
|
|
40 |
(for example, to build a KWIC index on html pages or class documentation).
|
|
41 |
|
|
42 |
[author:]
|
|
43 |
Claus Gittinger (cg@alan)
|
|
44 |
|
|
45 |
[instance variables:]
|
|
46 |
|
|
47 |
[class variables:]
|
|
48 |
|
|
49 |
[see also:]
|
|
50 |
|
|
51 |
"
|
|
52 |
!
|
|
53 |
|
|
54 |
examples
|
|
55 |
"
|
|
56 |
[exBegin]
|
|
57 |
|kwic|
|
|
58 |
|
|
59 |
kwic := KeywordInContextIndexBuilder new.
|
|
60 |
kwic excluded:#('the' 'and' 'a' 'an').
|
|
61 |
|
|
62 |
kwic addLine:'bla bla bla' reference:1.
|
|
63 |
kwic addLine:'one two three' reference:2.
|
|
64 |
kwic addLine:'a cat and a dog' reference:3.
|
|
65 |
kwic addLine:'the man in the middle' reference:4.
|
|
66 |
kwic addLine:'the man with the dog' reference:5.
|
|
67 |
|
|
68 |
kwic
|
|
69 |
entriesDo:[:word :left :right :ref |
|
|
70 |
Transcript
|
|
71 |
show:((left contractTo:20) leftPaddedTo:20);
|
|
72 |
space;
|
|
73 |
show:((word contractTo:10) leftPaddedTo:10);
|
|
74 |
space;
|
|
75 |
show:((right contractTo:20) leftPaddedTo:20);
|
2536
|
76 |
space;
|
|
77 |
show:'['; show:ref; show:']';
|
1375
|
78 |
cr
|
|
79 |
].
|
|
80 |
[exEnd]
|
|
81 |
|
|
82 |
|
|
83 |
KWIC index over method selector components:
|
|
84 |
[exBegin]
|
|
85 |
|kwic|
|
|
86 |
|
|
87 |
kwic := KeywordInContextIndexBuilder new.
|
|
88 |
Smalltalk allClassesDo:[:eachClass |
|
|
89 |
eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
|
|
90 |
kwic addLine:sel reference:mthd.
|
|
91 |
]
|
|
92 |
].
|
|
93 |
kwic
|
|
94 |
[exEnd]
|
|
95 |
|
|
96 |
KWIC index over method selector components, with word separation:
|
|
97 |
[exBegin]
|
|
98 |
|kwic|
|
|
99 |
|
|
100 |
kwic := KeywordInContextIndexBuilder forMethodSelectorIndex.
|
|
101 |
|
|
102 |
Smalltalk allClassesDo:[:eachClass |
|
|
103 |
eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
|
|
104 |
kwic addLine:sel reference:mthd.
|
|
105 |
]
|
|
106 |
].
|
|
107 |
kwic
|
|
108 |
[exEnd]
|
|
109 |
|
|
110 |
KWIC index over method comments:
|
|
111 |
[exBegin]
|
|
112 |
|kwic|
|
|
113 |
|
|
114 |
kwic := KeywordInContextIndexBuilder forMethodComments.
|
|
115 |
|
|
116 |
Smalltalk allClassesDo:[:eachClass |
|
|
117 |
eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
|
|
118 |
|comment|
|
|
119 |
|
|
120 |
(sel == #documentation) ifTrue:[
|
|
121 |
comment := mthd comment.
|
|
122 |
comment notNil ifTrue:[
|
|
123 |
kwic addLine:comment reference:mthd mclass ignoreCase:true.
|
|
124 |
]
|
|
125 |
] ifFalse:[
|
|
126 |
(sel ~~ #examples
|
|
127 |
and:[ sel ~~ #copyright
|
|
128 |
and:[ sel ~~ #version]]) ifTrue:[
|
|
129 |
comment := mthd comment.
|
|
130 |
comment notNil ifTrue:[
|
|
131 |
kwic addLine:comment reference:mthd ignoreCase:true.
|
|
132 |
]
|
|
133 |
]
|
|
134 |
]
|
|
135 |
]
|
|
136 |
].
|
|
137 |
kwic
|
|
138 |
[exEnd]
|
|
139 |
|
|
140 |
KWIC index over class comments:
|
|
141 |
[exBegin]
|
|
142 |
|kwic|
|
|
143 |
|
|
144 |
kwic := KeywordInContextIndexBuilder forMethodComments.
|
|
145 |
|
|
146 |
Smalltalk allClassesDo:[:eachClass |
|
|
147 |
|mthd comment|
|
|
148 |
|
|
149 |
mthd := eachClass theMetaclass compiledMethodAt:#documentation.
|
|
150 |
mthd notNil ifTrue:[
|
|
151 |
comment := mthd comment.
|
|
152 |
comment notNil ifTrue:[
|
|
153 |
kwic addLine:comment reference:eachClass theNonMetaclass ignoreCase:true.
|
|
154 |
]
|
|
155 |
]
|
|
156 |
].
|
|
157 |
kwic
|
|
158 |
[exEnd]
|
|
159 |
"
|
|
160 |
! !
|
|
161 |
|
|
162 |
!KeywordInContextIndexBuilder class methodsFor:'instance creation'!
|
|
163 |
|
|
164 |
forMethodComments
|
|
165 |
"return an indexer for method comments"
|
|
166 |
|
|
167 |
|sepChars sep kwic|
|
|
168 |
|
|
169 |
sepChars := '.,;:_ !![]()''"#?<>|' , Character return, Character lf, Character tab.
|
|
170 |
|
|
171 |
sep := [:lines | lines asString asCollectionOfSubstringsSeparatedByAny:sepChars].
|
|
172 |
|
|
173 |
kwic := self new.
|
|
174 |
kwic separatorAlgorithm:sep.
|
|
175 |
kwic excluded:#('the' 'and' 'a' 'an' 'for' 'with' 'no').
|
|
176 |
^ kwic
|
|
177 |
!
|
|
178 |
|
|
179 |
forMethodSelectorIndex
|
|
180 |
"return an indexer for method selector components, with word separation at case boundaries"
|
|
181 |
|
|
182 |
|sep kwic sepUCWords|
|
|
183 |
|
|
184 |
sepUCWords := [:word :keyWords|
|
|
185 |
|s w c lastC last2C frag|
|
|
186 |
|
|
187 |
word asLowercase = word ifTrue:[
|
|
188 |
keyWords add:word.
|
|
189 |
] ifFalse:[
|
|
190 |
s := word readStream.
|
|
191 |
w := '' writeStream.
|
|
192 |
[s atEnd] whileFalse:[
|
|
193 |
c := s next.
|
|
194 |
(c isUppercase) ifTrue:[
|
|
195 |
(lastC notNil and:[lastC isUppercase not]) ifTrue:[
|
|
196 |
keyWords add:w contents.
|
|
197 |
w := '' writeStream.
|
|
198 |
].
|
|
199 |
] ifFalse:[
|
|
200 |
(last2C notNil and:[last2C isUppercase and:[lastC isUppercase]]) ifTrue:[
|
|
201 |
c isLetter ifTrue:[
|
|
202 |
frag := w contents.
|
|
203 |
w := '' writeStream.
|
|
204 |
w nextPut:(frag last).
|
|
205 |
keyWords add:(frag allButLast).
|
|
206 |
] ifFalse:[
|
|
207 |
' frag := w contents.
|
|
208 |
w := '' writeStream.
|
|
209 |
keyWords add:frag. '.
|
|
210 |
].
|
|
211 |
].
|
|
212 |
].
|
|
213 |
w nextPut:c.
|
|
214 |
last2C := lastC.
|
|
215 |
lastC := c.
|
|
216 |
].
|
|
217 |
].
|
|
218 |
].
|
|
219 |
|
|
220 |
sep := [:line |
|
|
221 |
|words keyWords|
|
|
222 |
|
|
223 |
words := line asCollectionOfSubstringsSeparatedByAny:'.,;:_ '.
|
|
224 |
keyWords := OrderedCollection new.
|
|
225 |
words do:[:eachWord | sepUCWords value:eachWord value:keyWords].
|
|
226 |
keyWords
|
|
227 |
].
|
|
228 |
|
|
229 |
kwic := self new.
|
|
230 |
kwic separatorAlgorithm:sep.
|
|
231 |
^ kwic
|
|
232 |
!
|
|
233 |
|
|
234 |
new
|
|
235 |
^ self basicNew initialize
|
|
236 |
! !
|
|
237 |
|
|
238 |
!KeywordInContextIndexBuilder methodsFor:'accessing'!
|
|
239 |
|
|
240 |
excluded:something
|
|
241 |
excluded := something asSet.
|
|
242 |
!
|
|
243 |
|
|
244 |
separatorAlgorithm:something
|
|
245 |
separatorAlgorithm := something.
|
|
246 |
! !
|
|
247 |
|
|
248 |
!KeywordInContextIndexBuilder methodsFor:'building'!
|
|
249 |
|
|
250 |
addLine:aLine reference:opaqueReference
|
|
251 |
self addLine:aLine reference:opaqueReference ignoreCase:false
|
|
252 |
!
|
|
253 |
|
|
254 |
addLine:aLine reference:opaqueReference ignoreCase:ignoreCase
|
|
255 |
(separatorAlgorithm value:aLine) do:[:eachWord |
|
|
256 |
|set word|
|
|
257 |
|
|
258 |
ignoreCase ifTrue:[
|
|
259 |
word := eachWord asLowercase.
|
|
260 |
] ifFalse:[
|
|
261 |
word := eachWord asLowercase.
|
|
262 |
].
|
|
263 |
(excluded includes:word) ifFalse:[
|
|
264 |
set := keywordToLinesMapping at:word ifAbsent:nil.
|
|
265 |
set isNil ifTrue:[
|
|
266 |
set := Set new.
|
|
267 |
keywordToLinesMapping at:word put:set
|
|
268 |
].
|
|
269 |
set add:(aLine -> opaqueReference).
|
|
270 |
]
|
|
271 |
].
|
|
272 |
! !
|
|
273 |
|
|
274 |
!KeywordInContextIndexBuilder methodsFor:'enumerating'!
|
|
275 |
|
|
276 |
entriesDo:aBlock
|
|
277 |
keywordToLinesMapping keys asSortedCollection do:[:eachKey |
|
|
278 |
|setOfMatches lcKey|
|
|
279 |
|
|
280 |
setOfMatches := keywordToLinesMapping at:eachKey.
|
|
281 |
lcKey := eachKey asLowercase.
|
|
282 |
setOfMatches do:[:eachAssoc |
|
|
283 |
|text ref lines idx lIdx context left right word prevLine nextLine|
|
|
284 |
|
|
285 |
text := eachAssoc key.
|
|
286 |
ref := eachAssoc value.
|
|
287 |
|
|
288 |
lines := text asCollectionOfLines.
|
|
289 |
idx := lines findFirst:[:line | line asLowercase includesString:lcKey].
|
|
290 |
idx ~~ 0 ifTrue:[
|
|
291 |
context := lines at:idx.
|
|
292 |
idx > 1 ifTrue:[
|
|
293 |
prevLine := (lines at:idx-1).
|
|
294 |
context := prevLine , ' ' , context.
|
|
295 |
].
|
|
296 |
idx < lines size ifTrue:[
|
|
297 |
nextLine := (lines at:idx+1).
|
|
298 |
context := context , ' ' , nextLine.
|
|
299 |
].
|
|
300 |
lIdx := context asLowercase findString:lcKey.
|
|
301 |
left := (context copyTo:lIdx - 1) withoutSeparators.
|
|
302 |
right := (context copyFrom:lIdx + lcKey size) withoutSeparators.
|
|
303 |
word := (context copyFrom:lIdx to:lIdx + lcKey size - 1) withoutSeparators.
|
|
304 |
aBlock value:word value:left value:right value:ref.
|
|
305 |
].
|
|
306 |
]
|
|
307 |
]
|
|
308 |
! !
|
|
309 |
|
|
310 |
!KeywordInContextIndexBuilder methodsFor:'initialization'!
|
|
311 |
|
|
312 |
initialize
|
|
313 |
keywordToLinesMapping := Dictionary new.
|
|
314 |
excluded := Set new.
|
|
315 |
separatorAlgorithm := [:line | line asCollectionOfSubstringsSeparatedByAny:' .:,;-'].
|
|
316 |
! !
|
|
317 |
|
|
318 |
!KeywordInContextIndexBuilder class methodsFor:'documentation'!
|
|
319 |
|
|
320 |
version
|
2536
|
321 |
^ '$Header: /cvs/stx/stx/libbasic2/KeywordInContextIndexBuilder.st,v 1.2 2011-02-14 17:39:30 cg Exp $'
|
|
322 |
!
|
|
323 |
|
|
324 |
version_CVS
|
|
325 |
^ '$Header: /cvs/stx/stx/libbasic2/KeywordInContextIndexBuilder.st,v 1.2 2011-02-14 17:39:30 cg Exp $'
|
1375
|
326 |
! !
|