BloomFilter.st
author Claus Gittinger <cg@exept.de>
Wed, 08 May 2019 14:41:45 +0200
changeset 4942 9f424bed67c4
parent 4801 08db5e0efb14
child 4974 33b7c4e3a597
permissions -rw-r--r--
#DOCUMENTATION by cg class: Socket comment/format in: #bindAnonymously #bindTo:reuseAddress:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
     1
"{ Encoding: utf8 }"
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
     2
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 2019 by Claus Gittinger
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
              All Rights Reserved
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 This software is furnished under a license and may be used
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 only in accordance with the terms of that license and with the
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 inclusion of the above copyright notice.   This software may not
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 be provided or otherwise made available to, or used by, any
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 other person.  No title to or ownership of the software is
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 hereby transferred.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"{ Package: 'stx:libbasic2' }"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
"{ NameSpace: Smalltalk }"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
Collection subclass:#BloomFilter
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
	instanceVariableNames:'bits hashingBlocks tally'
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	classVariableNames:''
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	poolDictionaries:''
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	category:'Collections-Unordered'
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
!BloomFilter class methodsFor:'documentation'!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
copyright
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
 COPYRIGHT (c) 2019 by Claus Gittinger
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
              All Rights Reserved
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
 This software is furnished under a license and may be used
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
 only in accordance with the terms of that license and with the
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
 inclusion of the above copyright notice.   This software may not
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
 be provided or otherwise made available to, or used by, any
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
 other person.  No title to or ownership of the software is
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
 hereby transferred.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
documentation
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
    a BloomFilter is a Set-like data structure which can quickly answer whether
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
    an element is definitely NOT in the set, but might generate false positives
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
    (i.e. it might answer true, although the element is not actually present).
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
    Bloom filters are used for tuning, when accesses (eg. to the disk or database)
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
    are expensive, and can be avoided in many cases (in the NOT present case),
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
    and false positives only lead to additional useless queries.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
    Bloom filters cannot remove or enumerate elements. 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
    The only useful operations are adding and inclusion testing (includes).
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    53
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    54
    For example, a database query (eg. 'is something present'), 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    55
    might make use of a bloom filter, to avoid the query in many cases.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
    Bloom filters false positive rate degrades as the number of elements is added,
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
    thus a good choice of its initial size is mandatory.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
examples
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    63
"<<END
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
[exBegin]
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
    |filter set nFalsePositive|
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    66
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    67
    filter := BloomFilter new:10000.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    68
    set := Set new:10000.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    69
    
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    70
    "/ add all words from the Collection class's source code
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    71
    Collection sourceStream 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
        linesDo:[:eachLine |
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
            eachLine 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    74
               splitOn:[:ch | ch isSeparator]
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    75
               do:[:word | 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
                  filter add:word.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
                  set add:word.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
               ].
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
        ];
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
        close.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
    
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
    "/ check some which are known to be present
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
    set do:[:w |
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
        self assert:(filter includes:w)
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
    ].    
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
    "/ check for false positives
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
    nFalsePositive :=
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
        (1 to:1000) count:[:n |
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
            |randomWord isFalsePositive|
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
            isFalsePositive := false.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
            randomWord := (1 to:6) collect:[:i | ($a to: $z) atRandom] as:String.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
            (set includes:randomWord) ifFalse:[
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
                (filter includes:randomWord) ifTrue:[
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
                    Transcript showCR:'false positive: ',randomWord.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
                    isFalsePositive := true.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
                ].    
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
            ].
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
            isFalsePositive
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
        ].
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
    Transcript show:nFalsePositive; showCR:' false positives'.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
    filter inspect.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
[exEnd]
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
END>>"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
! !
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
!BloomFilter class methodsFor:'instance creation'!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
new:initialNumberOfBits
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
    ^ self 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
        new:initialNumberOfBits 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
        hashes:{
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
            [:s | s hash_fnv1a] .
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
            [:s | s hash_sdbm] .
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
        }
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
    "Created: / 10-02-2019 / 14:14:23 / Claus Gittinger"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
new:initialNumberOfBits hashes:hashActionBlocks
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
    ^ super new 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
        numberOfBits:initialNumberOfBits 
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
        hashes:hashActionBlocks
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   124
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
    "Created: / 10-02-2019 / 14:13:05 / Claus Gittinger"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
! !
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   128
!BloomFilter methodsFor:'adding'!
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
add:aString
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   131
    "add the arguemnt, aString to the set"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
    |mod isNew|
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
    mod := bits size.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
    isNew := false.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
    hashingBlocks do:[:each |
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
        |h idx|
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
        
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
        h := each value:aString.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
        idx := (h \\ mod)+1.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
        (bits at:idx) == 0 ifTrue:[ isNew := true ].
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
        bits at:idx put:1.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
    ].
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
    isNew ifTrue:[
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
        tally := tally + 1
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
    ].
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
    "Created: / 10-02-2019 / 14:16:29 / Claus Gittinger"
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   150
! !
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   152
!BloomFilter methodsFor:'blocked'!
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   154
do:aBlock
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   155
    self shouldNotImplement
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   157
    "Created: / 10-02-2019 / 14:40:17 / Claus Gittinger"
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
remove:aString
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
    "raise an error - elements cannot be removed"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
    self shouldNotImplement
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
    "Created: / 10-02-2019 / 14:18:14 / Claus Gittinger"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
! !
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
!BloomFilter methodsFor:'initialization'!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
numberOfBits:initialNumberOfBits hashes:hashingBlocksArg
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
    bits := BitArray new:(initialNumberOfBits nextPrime).
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
    hashingBlocks := hashingBlocksArg.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
    tally := 0.
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
    "Created: / 10-02-2019 / 14:15:10 / Claus Gittinger"
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
! !
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
4801
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   178
!BloomFilter methodsFor:'queries'!
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   179
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   180
includes:aString
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   181
    "returns false, if the arguemnt, aString is definitly NOT in the set.
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   182
     However, it may return true as a false positive"
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   183
     
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   184
    |mod|
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   185
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   186
    mod := bits size.
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   187
    hashingBlocks do:[:each |
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   188
        |h|
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   189
        
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   190
        h := each value:aString.
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   191
        (bits at:(h \\ mod)+1) == 0 ifTrue:[^ false].
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   192
    ].
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   193
    ^ true
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   194
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   195
    "Created: / 10-02-2019 / 14:16:59 / Claus Gittinger"
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   196
! !
08db5e0efb14 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4800
diff changeset
   197
4800
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
!BloomFilter class methodsFor:'documentation'!
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
version_CVS
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
    ^ '$Header$'
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
! !
a6cbf556cfd1 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203