KeywordInContextIndexBuilder.st
author Claus Gittinger <cg@exept.de>
Sat, 02 May 2020 21:40:13 +0200
changeset 5476 7355a4b11cb6
parent 5224 2e6d0898b080
permissions -rw-r--r--
#FEATURE by cg class: Socket class added: #newTCPclientToHost:port:domain:domainOrder:withTimeout: changed: #newTCPclientToHost:port:domain:withTimeout:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5223
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
     1
"{ Encoding: utf8 }"
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
     2
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 2003 by eXept Software AG
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
              All Rights Reserved
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 This software is furnished under a license and may be used
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 only in accordance with the terms of that license and with the
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 inclusion of the above copyright notice.   This software may not
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 be provided or otherwise made available to, or used by, any
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 other person.  No title to or ownership of the software is
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 hereby transferred.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"{ Package: 'stx:libbasic2' }"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
4108
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
    16
"{ NameSpace: Smalltalk }"
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
    17
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
Object subclass:#KeywordInContextIndexBuilder
4130
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
    19
	instanceVariableNames:'keywordToLinesMapping excluded separatorAlgorithm
4188
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
    20
		unquoteAlgorithm exclusionFilter matchSorter'
5223
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
    21
	classVariableNames:'FillWordsEnglish FillWordsGerman FillWordsFrench'
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	poolDictionaries:''
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
	category:'Collections-Support'
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
!KeywordInContextIndexBuilder class methodsFor:'documentation'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
copyright
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
 COPYRIGHT (c) 2003 by eXept Software AG
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
              All Rights Reserved
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
 This software is furnished under a license and may be used
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
 only in accordance with the terms of that license and with the
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
 inclusion of the above copyright notice.   This software may not
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
 be provided or otherwise made available to, or used by, any
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
 other person.  No title to or ownership of the software is
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
 hereby transferred.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
documentation
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
"
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    44
    A support class for building KWIC (Keyword in Context) or KWOC (Keyword out of Context) indexes.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    45
    (for example, to build such indexes on html pages or class documentation).
4125
d597206782cc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4124
diff changeset
    46
    
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
    47
    To generate a kwic, add each line together with a reference (or page number, or whatever),
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
    48
    using addLine:reference:.
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    49
    Then, when finished, enumerate the kwic and print as kwic or kwoc.
4127
0f3c785bb689 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4126
diff changeset
    50
    
4187
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    51
    To ignore fill words (such as 'and', 'the', 'in', etc.), 
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    52
    define those with the #excluded: messages.
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    53
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    54
    The keyword handling is configurable by providing actions/lists for:
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    55
        separatorAlgorithm      a block which separates lines into individual words
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    56
                                gets a line; delivers a collection of words
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    57
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    58
        excluded                a collection of words which are to be ignored
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    59
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    60
        unquoteAlgorithm        a block to remove quotes around words. 
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    61
                                gets word as argument, delivers unquoted word
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    62
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    63
        keywordMappingAlgorithm 
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    64
                                maps keywords; for example, can be used to map 'startsWith'
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    65
                                to 'start', so they appear in the same section.
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    66
                                Gets the word and the set-of-all-words as arguments,
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    67
                                delivers the key into which the word's entries should be placed  
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    68
                                
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    69
        matchSorter             determines the order in which keywords are listed
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
    70
        
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    71
    [author:]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
        Claus Gittinger (cg@alan)
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    74
    [examples:]
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    75
        see examples method
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
    [see also:]
4125
d597206782cc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4124
diff changeset
    78
        https://en.wikipedia.org/wiki/Key_Word_in_Context (english)
d597206782cc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4124
diff changeset
    79
        https://de.wikipedia.org/wiki/Permutiertes_Register (german)
d597206782cc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4124
diff changeset
    80
        
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
examples
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
"
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    86
    building a kwic; print as kwic and kwoc
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
                                                                [exBegin]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
    |kwic|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
    kwic := KeywordInContextIndexBuilder new.
4127
0f3c785bb689 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4126
diff changeset
    91
    kwic excluded:#('the' 'and' 'a' 'an' 'in').
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
    kwic addLine:'bla bla bla' reference:1.
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    94
    kwic addLine:'foo, bar. baz' reference:2.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    95
    kwic addLine:'one two three' reference:3.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    96
    kwic addLine:'a cat and a dog' reference:4.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    97
    kwic addLine:'the man in the middle' reference:5.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
    98
    kwic addLine:'the man with the dog' reference:6.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   100
    Transcript showCR:'Printed as KWIC:'.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
    kwic 
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
        entriesDo:[:word :left :right :ref |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
            Transcript 
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
                show:((left contractTo:20) leftPaddedTo:20);
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
                space;
4124
2d4e83bec872 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4108
diff changeset
   106
                show:((word contractTo:10) leftPaddedTo:10) allBold;
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
                space;
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
                show:((right contractTo:20) leftPaddedTo:20);
2536
8907a20de2dc changed: #examples
Claus Gittinger <cg@exept.de>
parents: 1375
diff changeset
   109
                space;
8907a20de2dc changed: #examples
Claus Gittinger <cg@exept.de>
parents: 1375
diff changeset
   110
                show:'['; show:ref; show:']';
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
                cr    
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
        ].
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   113
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   114
    Transcript cr.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   115
    Transcript showCR:'Printed as KWOC:'.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   116
    kwic 
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   117
        entriesDo:[:word :left :right :ref :fullText :context |
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   118
            Transcript 
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   119
                show:((word contractTo:10) paddedTo:10) allBold;
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   120
                space;
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   121
                show:((context contractTo:60) paddedTo:60);
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   122
                space;
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   123
                show:'['; show:ref; show:']';
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   124
                cr    
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   125
        ].
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
                                                                [exEnd]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   128
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   129
  KWIC index over method selector components; build a little browser window:
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
                                                                [exBegin]
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   131
    |kwic v s c list refs|
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
    kwic := KeywordInContextIndexBuilder new.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
    Smalltalk allClassesDo:[:eachClass |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
        eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
            kwic addLine:sel reference:mthd.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
        ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
    ].
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   139
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   140
    v := StandardSystemView new.
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   141
    v addComponent:(s := HVScrollableView for:SelectionInListView).
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   142
    s origin:0.0@0.0 corner:1.0@0.5.
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   143
    v addComponent:(c := HVScrollableView for:CodeView).
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   144
    c origin:0.0@0.5 corner:1.0@1.0.
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   145
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   146
    refs := OrderedCollection new.
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   147
    list := OrderedCollection new.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
    kwic 
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   149
        entriesDo:[:word :left :right :ref |
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   150
            list add:(word,' ',left,' ',word allBold,' ',right,' (',ref mclass name,')').
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   151
            refs add:ref].
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   152
    s list:list.
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   153
    s action:[:lNr | c contents:(refs at:lNr) source].
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   154
    v open.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
                                                                [exEnd]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
  KWIC index over method selector components, with word separation:
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
                                                                [exBegin]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
    |kwic|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
    kwic := KeywordInContextIndexBuilder forMethodSelectorIndex.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
    Smalltalk allClassesDo:[:eachClass |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
        eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
            kwic addLine:sel reference:mthd.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
        ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
    ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
    kwic
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
                                                                [exEnd]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
  KWIC index over method comments:
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
                                                                [exBegin]
4124
2d4e83bec872 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4108
diff changeset
   173
    |kwic v s c refs list|
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
    kwic := KeywordInContextIndexBuilder forMethodComments.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
    Smalltalk allClassesDo:[:eachClass |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
        eachClass instAndClassSelectorsAndMethodsDo:[:sel :mthd |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
            |comment|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
            (sel == #documentation) ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
                comment := mthd comment.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
                comment notNil ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
                    kwic addLine:comment reference:mthd mclass ignoreCase:true.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
                ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
            ] ifFalse:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
                (sel ~~ #examples
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
                and:[ sel ~~ #copyright
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
                and:[ sel ~~ #version]]) ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
                    comment := mthd comment.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
                    comment notNil ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
                        kwic addLine:comment reference:mthd ignoreCase:true.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
                    ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   194
                ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   195
            ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
        ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
    ].
4124
2d4e83bec872 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4108
diff changeset
   198
    kwic.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
                                                                [exEnd]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
  KWIC index over class comments:
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
                                                                [exBegin]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
    |kwic|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
    kwic := KeywordInContextIndexBuilder forMethodComments.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
    Smalltalk allClassesDo:[:eachClass |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
        |mthd comment|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   209
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   210
        mthd := eachClass theMetaclass compiledMethodAt:#documentation.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   211
        mthd notNil ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
            comment := mthd comment.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   213
            comment notNil ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   214
                kwic addLine:comment reference:eachClass theNonMetaclass ignoreCase:true.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   215
            ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   216
        ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   217
    ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   218
    kwic
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   219
                                                                [exEnd]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   220
"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   221
! !
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   222
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   223
!KeywordInContextIndexBuilder class methodsFor:'instance creation'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   224
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
forMethodComments
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
    "return an indexer for method comments"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
    |sepChars sep kwic|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
    sepChars := '.,;:_ !![]()''"#?<>|' , Character return, Character lf, Character tab.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
    sep := [:lines | lines asString asCollectionOfSubstringsSeparatedByAny:sepChars].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
    kwic := self new.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
    kwic separatorAlgorithm:sep.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   236
    kwic excluded:#('the' 'and' 'a' 'an' 'for' 'with' 'no').
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   237
    ^ kwic
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   239
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   240
forMethodSelectorIndex
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   241
    "return an indexer for method selector components, with word separation at case boundaries"
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   242
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   243
    |sep kwic sepUCWords|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   244
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   245
    sepUCWords := [:word :keyWords| 
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   246
                    |s w c lastC last2C frag|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   247
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   248
                    word asLowercase = word ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   249
                        keyWords add:word.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   250
                    ] ifFalse:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   251
                        s := word readStream.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   252
                        w := '' writeStream.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   253
                        [s atEnd] whileFalse:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   254
                            c := s next.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   255
                            (c isUppercase) ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   256
                                (lastC notNil and:[lastC isUppercase not]) ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   257
                                    keyWords add:w contents.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   258
                                    w := '' writeStream.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   259
                                ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   260
                            ] ifFalse:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
                                (last2C notNil and:[last2C isUppercase and:[lastC isUppercase]]) ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   262
                                    c isLetter ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
                                        frag := w contents.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
                                        w := '' writeStream.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
                                        w nextPut:(frag last).
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
                                        keyWords add:(frag allButLast).
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
                                    ] ifFalse:[
4108
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
   268
                                       "/ frag := w contents.
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
   269
                                       "/ w := '' writeStream.
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
   270
                                       "/ keyWords add:frag.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
                                    ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
                                ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
                            ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
                            w nextPut:c.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
                            last2C := lastC.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
                            lastC := c.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
                        ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
                    ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
                  ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
    sep := [:line | 
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   282
                |words keyWords|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   283
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   284
                words := line asCollectionOfSubstringsSeparatedByAny:'.,;:_ '.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   285
                keyWords := OrderedCollection new.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   286
                words do:[:eachWord | sepUCWords value:eachWord value:keyWords].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   287
                keyWords
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   288
            ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   289
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   290
    kwic := self new.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   291
    kwic separatorAlgorithm:sep.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   292
    ^ kwic
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   293
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   294
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   295
new
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   296
    ^ self basicNew initialize
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   297
! !
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   298
5223
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   299
!KeywordInContextIndexBuilder class methodsFor:'queries'!
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   300
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   301
defaultFillWordsEnglish
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   302
    ^ #(
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   303
        'the' 'a'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   304
        'can' 'you' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   305
        'to' 'in' 'out' 'at' 'of' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   306
        'also' 'with' 'without' 'all' 'any' 'how' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   307
        'however' 'although' 'always' 'either' 'neither'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   308
        'anywhere' 'anyway' 'anything' 'anyone'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   309
        'not' 'but' 'else' 'elsewhere'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   310
        'am' 'are' 'is' 'be' 'will' 'wont' 'won''t' 'do' 'don''t'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   311
        'no' 'non' 'now' 'old' 'on' 'only'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   312
        'my' 'their' 'your' 'its'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   313
        'one' 'two' 'three'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   314
        'etc' 'for' 'lot' 'lots' 'made' 'may' 'most' 'mostly' 'much'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   315
        'use' 'this' 'that' 'which' 'what' 'why'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   316
        'or' 'other' 'please'
5224
2e6d0898b080 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 5223
diff changeset
   317
        'vs' 'via'
5223
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   318
    ).
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   319
!
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   320
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   321
defaultFillWordsFrench
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   322
    ^ #(
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   323
        'le' 'la' 'il' 'un' 'une' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   324
    ).
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   325
!
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   326
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   327
defaultFillWordsGerman
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   328
    ^ #(
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   329
        'der' 'die' 'das' 'ein' 'eine' 'einer' 'eines'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   330
        'kann' 'ich' 'du' 'er' 'sie' 'es' 'wir' 'ihr'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   331
        'zu' 'in' 'aus' 'bei' 'von' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   332
        'auch' 'mit' 'ohne' 'alle' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   333
        'wie' 'wo' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   334
        'jedoch' 'obgleich' 'immer' 'entweder' 'oder' 'weder' 'noch'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   335
        'irgendwo' 'dennoch' 'etwas' 'jemand'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   336
        'nicht' 'aber' 'ansonsten' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   337
        'bin' 'sind' 'ist' 'wird' 'nicht'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   338
        'nein' 'alt' 'auf' 'nur'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   339
        'mein' 'dein' 'sein'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   340
        'eins' 'zwei' 'drei'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   341
        'etc' 'kann' 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   342
        'oder' 'bitte'
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   343
    ).
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   344
!
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   345
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   346
fillWordsEnglish
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   347
    FillWordsEnglish isNil ifTrue:[
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   348
        ^ self defaultFillWordsEnglish
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   349
    ].
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   350
    ^ FillWordsEnglish 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   351
!
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   352
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   353
fillWordsFrench
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   354
    FillWordsFrench isNil ifTrue:[
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   355
        ^ self defaultFillWordsFrench
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   356
    ].
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   357
    ^ FillWordsFrench 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   358
!
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   359
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   360
fillWordsGerman
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   361
    FillWordsGerman isNil ifTrue:[
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   362
        ^ self defaultFillWordsGerman
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   363
    ].
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   364
    ^ FillWordsGerman 
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   365
! !
94415e3adba0 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 4188
diff changeset
   366
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   367
!KeywordInContextIndexBuilder methodsFor:'accessing'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   368
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   369
excluded:aListOfExcludedWords
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   370
    "define words which are to be ignored.
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   371
     Typically, this is a list of fillwords, such as 'and', 'the', 'in', etc."
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   372
     
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   373
    excluded := aListOfExcludedWords asSet.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   374
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   375
4130
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   376
exclusionFilter:aBlock
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   377
    "define an additional filter to exclude more complicated patterns.
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   378
     This is invoked after filtering by the exclusion list.
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   379
     If defined, this should return true,if the word is to be excluded."
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   380
     
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   381
    exclusionFilter := aBlock.
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   382
!
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   383
4132
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   384
matchSorter:aSortBlock
4187
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
   385
    "if set, matches will be enumerated in that sort order."
4132
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   386
    
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   387
    matchSorter := aSortBlock.
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   388
!
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   389
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   390
separatorAlgorithm:aBlock
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   391
    "define the algorithm to split a given string into words.
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   392
     The default is to split at punctuation and whitespace
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   393
     (see #initialize)"
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   394
     
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   395
    separatorAlgorithm := aBlock.
4130
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   396
!
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   397
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   398
unquoteAlgorithm:aBlock
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   399
    "define the algorithm to unquote words.
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   400
     The default is to unquote single and double quotes
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   401
     (see #initialize)"
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   402
     
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   403
    unquoteAlgorithm := aBlock.
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   404
! !
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   405
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   406
!KeywordInContextIndexBuilder methodsFor:'building'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   407
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   408
addLine:aLine reference:opaqueReference
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   409
    "add a text line; the line is split at words and entered into the kwic.
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   410
     The reference argument is stored as 'value' of the generated entries.
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   411
     It can be anything"
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   412
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   413
    self addLine:aLine reference:opaqueReference ignoreCase:true
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   414
!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   415
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   416
addLine:aLine reference:opaqueReference ignoreCase:ignoreCase
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   417
    "add a line to the kwic.
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   418
     The line is split up into words, and a reference to opaqueReference
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   419
     is added for each word.
4188
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   420
     The reference argument is stored as 'value' of the generated entries;
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   421
     it can be anything"
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   422
     
4187
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
   423
    (separatorAlgorithm value:aLine optionalArgument:keywordToLinesMapping) do:[:eachWord |
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   424
        |set word|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   425
4131
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   426
        (excluded includes:eachWord) ifFalse:[
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   427
            word := unquoteAlgorithm value:eachWord.
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   428
            ignoreCase ifTrue:[
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   429
                word := word asLowercase.
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   430
            ].
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   431
            (excluded includes:word) ifFalse:[
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   432
                (exclusionFilter isNil or:[ (exclusionFilter value:word) not]) ifTrue:[
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   433
                    set := keywordToLinesMapping at:word ifAbsentPut:[Set new].
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   434
                    set add:(aLine -> opaqueReference).
b4294ed81d7d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4130
diff changeset
   435
                ]
4130
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   436
            ]
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   437
        ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   438
    ].
4188
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   439
!
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   440
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   441
remapKeywordsWith:keywordMappingAlgorithm 
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   442
    "allows for an additional mapper to be applied (after the kwic has been constructed).
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   443
     This can map multiple different words to the same keword.
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   444
     It is given the word and the set of already known words as argument.
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   445
     It may, for example figure out that a word with a long prefix is already in the
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   446
     list and decide, that a new word should be brought into the same bucket.
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   447
     For example, if 'starts' is already in the list, and 'startWith' is encountered."
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   448
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   449
    |knownKeys|
4187
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
   450
    
4188
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   451
    knownKeys := keywordToLinesMapping keys copy.
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   452
    knownKeys do:[:kw |
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   453
        |mappedWord oldSet newSet|
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   454
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   455
        mappedWord := keywordMappingAlgorithm value:kw optionalArgument:knownKeys.
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   456
        mappedWord ~= kw ifTrue:[
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   457
            oldSet := keywordToLinesMapping at:kw ifAbsent:[nil].
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   458
            oldSet notNil ifTrue:[
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   459
                newSet := keywordToLinesMapping at:mappedWord ifAbsentPut:[Set new].
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   460
                oldSet do:[:eachEntry |
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   461
                    newSet add:eachEntry.
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   462
                ].
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   463
                keywordToLinesMapping removeKey:kw.
4187
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
   464
            ]    
4188
f823326d96a3 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4187
diff changeset
   465
        ]    
4187
064b249c5e3d #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4132
diff changeset
   466
    ].
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   467
! !
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   468
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   469
!KeywordInContextIndexBuilder methodsFor:'enumerating'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   470
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   471
entriesDo:aFourToSixArgBlock
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   472
    "evaluate the argument, for each entry.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   473
     If it is a 4-arg block, it is called with:
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   474
        kwic-word, 
5224
2e6d0898b080 #FEATURE by exept
Claus Gittinger <cg@exept.de>
parents: 5223
diff changeset
   475
        left text, 
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   476
        right text 
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   477
        and reference
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   478
     If it is a 5-arg block, the original text is passed as additional argument.
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   479
     If it is a 6-arg block, the original text and the context are passed as additional argument.
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   480
     (stupid, but done for backward compatibility)"
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   481
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   482
    |fourArgBlock|
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   483
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   484
    aFourToSixArgBlock numArgs == 4 ifTrue:[
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   485
        fourArgBlock := aFourToSixArgBlock 
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   486
    ].    
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   487
    keywordToLinesMapping keys asSortedCollection do:[:eachKey |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   488
        |setOfMatches lcKey|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   489
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   490
        setOfMatches := keywordToLinesMapping at:eachKey.
4132
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   491
        matchSorter notNil ifTrue:[
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   492
            setOfMatches := setOfMatches asSortedCollection:matchSorter
f87c478424c7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4131
diff changeset
   493
        ].    
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   494
        lcKey := eachKey asLowercase.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   495
        setOfMatches do:[:eachAssoc |
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   496
            |text ref lines idx lIdx context left right word prevLine nextLine|
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   497
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   498
            text := eachAssoc key.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   499
            ref := eachAssoc value.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   500
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   501
            lines := text asCollectionOfLines.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   502
            idx := lines findFirst:[:line | line asLowercase includesString:lcKey].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   503
            idx ~~ 0 ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   504
                context := lines at:idx.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   505
                idx > 1 ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   506
                    prevLine := (lines at:idx-1).
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   507
                    context := prevLine , ' ' , context.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   508
                ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   509
                idx < lines size ifTrue:[
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   510
                    nextLine := (lines at:idx+1).
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   511
                    context :=  context , ' ' , nextLine.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   512
                ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   513
                lIdx := context asLowercase findString:lcKey.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   514
                left := (context copyTo:lIdx - 1) withoutSeparators.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   515
                right := (context copyFrom:lIdx + lcKey size) withoutSeparators.
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   516
                word := (context copyFrom:lIdx to:lIdx + lcKey size - 1) withoutSeparators.
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   517
                fourArgBlock notNil ifTrue:[
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   518
                    fourArgBlock value:word value:left value:right value:ref.
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   519
                ] ifFalse:[
4128
4cc1535fa7dc #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4127
diff changeset
   520
                    aFourToSixArgBlock value:word optionalArgument:left and:right and:ref and:text and:context
4126
4d3ec803fddf #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4125
diff changeset
   521
                ].    
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   522
            ].
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   523
        ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   524
    ]
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   525
! !
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   526
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   527
!KeywordInContextIndexBuilder methodsFor:'initialization'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   528
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   529
initialize
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   530
    keywordToLinesMapping := Dictionary new.
4129
04b54f7b1a82 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4128
diff changeset
   531
    self excluded:(Set new).
4130
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   532
    self exclusionFilter:nil.
4129
04b54f7b1a82 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4128
diff changeset
   533
    self separatorAlgorithm:[:line | line asCollectionOfSubstringsSeparatedByAny:' .:,;-'].
4130
2532973b50e6 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4129
diff changeset
   534
    self unquoteAlgorithm:[:word | (word unquote:$") unquote:$' ].
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   535
! !
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   536
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   537
!KeywordInContextIndexBuilder class methodsFor:'documentation'!
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   538
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   539
version
4108
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
   540
    ^ '$Header$'
2536
8907a20de2dc changed: #examples
Claus Gittinger <cg@exept.de>
parents: 1375
diff changeset
   541
!
8907a20de2dc changed: #examples
Claus Gittinger <cg@exept.de>
parents: 1375
diff changeset
   542
8907a20de2dc changed: #examples
Claus Gittinger <cg@exept.de>
parents: 1375
diff changeset
   543
version_CVS
4108
667d0bdaf609 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3184
diff changeset
   544
    ^ '$Header$'
1375
e034d3e027f2 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   545
! !
3184
27271594c7d8 comments
Claus Gittinger <cg@exept.de>
parents: 2536
diff changeset
   546