CharacterArray.st
changeset 16898 0663e6b0d177
parent 16887 8e30e6a52a64
child 16899 c8de03aacca0
equal deleted inserted replaced
16897:f3e1acb976ba 16898:0663e6b0d177
  2128 !
  2128 !
  2129 
  2129 
  2130 hash
  2130 hash
  2131     "return an integer useful as a hash-key"
  2131     "return an integer useful as a hash-key"
  2132 
  2132 
       
  2133     "/ whenever changing, also care for String>>hash.
  2133     "/ immediately after any change, execute (maybe in a debugger):
  2134     "/ immediately after any change, execute (maybe in a debugger):
  2134     "/      Set allSubInstancesDo:[:s | s rehash]
  2135     "/      Set allSubInstancesDo:[:s | s rehash]
  2135     "/ ^ self hash_dragonBook
  2136     ^ self hash_fnv1a
  2136 
  2137 
  2137     |h|
  2138     "
  2138 
  2139      'a' hash 604776751
  2139     "/
  2140      'a' asUnicode16String hash 
  2140     "/ this is the sdbm algorithm
  2141      'aa' hash                  
  2141     "/
       
  2142     h := 0.
       
  2143     self do:[:char |
       
  2144         h := (65599 times:h) plus:char codePoint.
       
  2145     ].
       
  2146     ^ h 
       
  2147 
       
  2148     "
       
  2149      'a' hash
       
  2150      'a' asUnicode16String hash
       
  2151      'aa' hash
       
  2152      'aa' asUnicode16String hash
  2142      'aa' asUnicode16String hash
  2153      'ab' hash
  2143      'ab' hash
  2154      'ab' asUnicode16String hash
  2144      'ab' asUnicode16String hash
  2155      'ab' hash
  2145      'ab' hash
  2156      'ab' asArray hash
  2146      'ab' asArray hash
  2249     h := 2166136261.
  2239     h := 2166136261.
  2250     self do:[:eachChar |
  2240     self do:[:eachChar |
  2251         h := h bitXor:(eachChar codePoint).
  2241         h := h bitXor:(eachChar codePoint).
  2252         h := (h * 16777619) bitAnd:16rFFFFFFFF.
  2242         h := (h * 16777619) bitAnd:16rFFFFFFFF.
  2253     ].
  2243     ].
       
  2244     "/ make sure, it fits into a smallInt
       
  2245     h := (h bitXor: (h >> 30)) bitAnd: 16r3FFFFFFF.
  2254     ^ h
  2246     ^ h
  2255 
  2247 
  2256     "
  2248     "
  2257      'abc' hash_fnv1a  
  2249      'abc' hash_fnv1a  
       
  2250      'abc' asUnicode16String hash_fnv1a  
       
  2251      'abc' asUnicode32String hash_fnv1a 
       
  2252 
  2258      'foofooHelloWorld' hash_fnv1a   
  2253      'foofooHelloWorld' hash_fnv1a   
       
  2254      'foofooHelloWorld' asUnicode16String hash_fnv1a   
       
  2255      'foofooHelloWorld' asUnicode32String hash_fnv1a   
       
  2256 
  2259      'blablaHelloWorld' hash_fnv1a   
  2257      'blablaHelloWorld' hash_fnv1a   
       
  2258      'blablaHelloWorld' asUnicode16String hash_fnv1a   
       
  2259      'blablaHelloWorld' asUnicode32String hash_fnv1a   
  2260     "
  2260     "
  2261 !
  2261 !
  2262 
  2262 
  2263 hash_java
  2263 hash_java
  2264     "return an integer useful as a hash-key.
  2264     "return an integer useful as a hash-key.
  2278     "
  2278     "
  2279      'abc' hash_java  
  2279      'abc' hash_java  
  2280      'foofooHelloWorld' hash_java   
  2280      'foofooHelloWorld' hash_java   
  2281      'blablaHelloWorld' hash_java   
  2281      'blablaHelloWorld' hash_java   
  2282     "
  2282     "
       
  2283 !
       
  2284 
       
  2285 hash_sdbm
       
  2286     "return an integer useful as a hash-key.
       
  2287      This method implements the sdbm algorithm."
       
  2288 
       
  2289     |h|
       
  2290 
       
  2291     "/
       
  2292     "/ this is the sdbm algorithm
       
  2293     "/
       
  2294     h := 0.
       
  2295     self do:[:char |
       
  2296         h := (65599 times:h) plus:char codePoint.
       
  2297     ].
       
  2298     ^ h 
       
  2299 
       
  2300     "
       
  2301      'a' hash
       
  2302      'a' asUnicode16String hash
       
  2303      'aa' hash
       
  2304      'aa' asUnicode16String hash
       
  2305      'ab' hash
       
  2306      'ab' asUnicode16String hash
       
  2307      'ab' hash
       
  2308      'ab' asArray hash
       
  2309     "
       
  2310 
       
  2311     "
       
  2312         |syms ms|
       
  2313 
       
  2314         syms := Symbol allInstances.
       
  2315         Transcript show:'syms: '; showCR:syms size.
       
  2316         Transcript show:'sdbm hashes: '; showCR:(syms collect:[:s| s hash]) asSet size.
       
  2317         Transcript show:'dragonBook hashes: '; showCR:(syms collect:[:s| s hash_dragonBook]) asSet size.
       
  2318 
       
  2319         ms := Time millisecondsToRun:[
       
  2320             10 timesRepeat:[
       
  2321                 syms do:[:each| each hash].
       
  2322             ].
       
  2323         ].
       
  2324         Transcript show:'sdbm hash: '; showCR:ms.
       
  2325 
       
  2326         ms := Time millisecondsToRun:[
       
  2327             10 timesRepeat:[
       
  2328                 syms do:[:each| each hash_dragonBook].
       
  2329             ].
       
  2330         ].
       
  2331         Transcript show:'dragonBook: '; showCR:ms.
       
  2332 
       
  2333         syms := syms collect:[:each| each asUnicode16String].
       
  2334         ms := Time millisecondsToRun:[
       
  2335             10 timesRepeat:[
       
  2336                 syms do:[:each| each hash].
       
  2337             ].
       
  2338         ].
       
  2339         Transcript show:'unicode sdbm hash: '; showCR:ms.
       
  2340 
       
  2341         ms := Time millisecondsToRun:[
       
  2342             10 timesRepeat:[
       
  2343                 syms do:[:each| each hash_dragonBook].
       
  2344             ].
       
  2345         ].
       
  2346         Transcript show:'unicode dragonBook:'; showCR:ms.
       
  2347     "
       
  2348 
       
  2349     "Modified: / 26-12-2011 / 14:09:07 / cg"
  2283 !
  2350 !
  2284 
  2351 
  2285 levenshteinTo:aString
  2352 levenshteinTo:aString
  2286     "return the levenshtein distance to the argument, aString;
  2353     "return the levenshtein distance to the argument, aString;
  2287      this value corresponds to the number of replacements that have to be
  2354      this value corresponds to the number of replacements that have to be
  7042 ! !
  7109 ! !
  7043 
  7110 
  7044 !CharacterArray class methodsFor:'documentation'!
  7111 !CharacterArray class methodsFor:'documentation'!
  7045 
  7112 
  7046 version
  7113 version
  7047     ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.544 2014-10-08 08:49:47 cg Exp $'
  7114     ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.545 2014-10-09 12:32:53 cg Exp $'
  7048 !
  7115 !
  7049 
  7116 
  7050 version_CVS
  7117 version_CVS
  7051     ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.544 2014-10-08 08:49:47 cg Exp $'
  7118     ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.545 2014-10-09 12:32:53 cg Exp $'
  7052 ! !
  7119 ! !
  7053 
  7120 
  7054 
  7121 
  7055 CharacterArray initialize!
  7122 CharacterArray initialize!