*** empty log message ***
authorClaus Gittinger <cg@exept.de>
Mon, 10 Aug 2009 15:34:02 +0200
changeset 11838 0a3d38e446e4
parent 11837 27fb4984e97b
child 11839 84dd112e1080
*** empty log message ***
CharacterArray.st
--- a/CharacterArray.st	Mon Aug 10 15:30:08 2009 +0200
+++ b/CharacterArray.st	Mon Aug 10 15:34:02 2009 +0200
@@ -1844,6 +1844,21 @@
     "Modified: 22.4.1996 / 15:56:07 / cg"
 !
 
+hammingDistanceTo:aString
+    "return the hamming distance (the number of characters which are different).
+     In information theory, the Hamming distance between two strings of equal length 
+     is the number of positions for which the corresponding symbols are different. 
+     Put another way, it measures the minimum number of substitutions required to change 
+     one into the other, or the number of errors that transformed one string into the other."
+
+    self assert:(aString size == self size).
+    ^ 1 to:self size count:[:idx | (self at:idx) ~= (aString at:idx)]
+
+    "
+     'roses' hammingDistanceTo:'toned'
+    "
+!
+
 hash
     "return an integer useful as a hash-key"
 
@@ -2299,6 +2314,67 @@
     "
 !
 
+asKoelnerPhoneticCode
+    "return a koelner phonetic code.
+     The koelnerPhonetic code is for the german language what the soundex code is for english;
+     it returns simular strings for similar sounding words. 
+     There are some differences to soundex, though: 
+        its length is not limited to 4, but depends on the length of the original string;
+        it does not start with the first character of the input."
+
+    ^ PhoneticStringUtilities koelnerPhoneticCodeOf:self 
+
+    "
+     #(
+        'Müller'
+        'Miller'
+        'Mueller'
+        'Mühler'
+        'Mühlherr'
+        'Mülherr'
+        'Myler'
+        'Millar'
+        'Myller'
+        'Müllar'
+        'Müler'
+        'Muehler'
+        'Mülller'
+        'Müllerr'
+        'Muehlherr'
+        'Muellar'
+        'Mueler'
+        'Mülleer'
+        'Mueller'
+        'Nüller'
+        'Nyller'
+        'Niler'
+        'Czerny'
+        'Tscherny'
+        'Czernie'
+        'Tschernie'
+        'Schernie'
+        'Scherny'
+        'Scherno'
+        'Czerne'
+        'Zerny'
+        'Tzernie'
+        'Breschnew'
+     ) do:[:w |
+         Transcript show:w; show:'->'; showCR:(w asKoelnerPhoneticCode)
+     ].
+    "
+
+    "
+     'Breschnew' asKoelnerPhoneticCode -> '17863'
+     'Breschnew' asKoelnerPhoneticCode -> '17863'
+     'Breschneff' asKoelnerPhoneticCode -> '17863'
+     'Braeschneff' asKoelnerPhoneticCode -> '17863'
+     'Braessneff' asKoelnerPhoneticCode -> '17863'
+     'Pressneff' asKoelnerPhoneticCode -> '17863'
+     'Presznäph' asKoelnerPhoneticCode -> '17863'
+    "
+!
+
 asLowercase
     "return a copy of myself in lowercase letters"
 
@@ -2465,63 +2541,13 @@
 !
 
 asSoundexCode
-    "return a soundex string or nil.
+    "return a soundex phonetic code or nil.
      Soundex returns similar codes for similar sounding words, making it a useful
      tool when searching for words where the correct spelling is unknown.
      (read Knuth or search the web if you dont know what a soundex code is).
      Caveat: 'similar sounding words' means: 'similar sounding in english'."
 
-    |inStream codeStream ch last lch codeLength codes sc|
-
-    inStream := self readStream.
-    inStream skipSeparators.
-    inStream atEnd ifTrue:[
-        ^ nil
-    ].
-    ch := inStream next.
-    ch isLetter ifFalse:[
-        ^ nil
-    ].
-    codeLength := 0.
-
-    codes := Dictionary new.
-    codes atAll:'bpfv'     put:$1.
-    codes atAll:'cskgjqxz' put:$2.
-    codes atAll:'dt'       put:$3.
-    codes atAll:'l'        put:$4.
-    codes atAll:'nm'       put:$5.
-    codes atAll:'r'        put:$6.
-
-    codeStream := WriteStream on:(String new:4).
-    codeStream nextPut:(ch asUppercase).
-
-    [inStream atEnd] whileFalse:[
-        ch := inStream next.
-        lch := ch asLowercase.
-        lch = last ifFalse:[
-            last := lch.
-
-            sc := codes at:ch ifAbsent:nil.
-            sc notNil ifTrue:[
-                codeLength < 3 ifTrue:[
-                    codeStream nextPut:sc.
-                    codeLength := codeLength + 1.
-                ]
-            ] ifFalse:[
-"/                ch isLetter ifFalse:[
-"/                    "/ something else - ignore it
-"/                ] ifTrue:[
-"/                    "/ else its a vowel and we ignore it
-"/                ]
-            ].
-        ]
-    ].
-    [ codeLength < 3 ] whileTrue:[
-        codeStream nextPut:$0.
-        codeLength := codeLength + 1.
-    ].
-
-    ^ codeStream contents
+    ^ PhoneticStringUtilities soundexCodeOf:self
 
     "
      'claus' asSoundexCode     
@@ -5705,7 +5731,7 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.399 2009-07-31 08:49:19 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.400 2009-08-10 13:34:02 cg Exp $'
 ! !
 
 CharacterArray initialize!