--- a/CharacterArray.st Mon Aug 10 15:30:08 2009 +0200
+++ b/CharacterArray.st Mon Aug 10 15:34:02 2009 +0200
@@ -1844,6 +1844,21 @@
"Modified: 22.4.1996 / 15:56:07 / cg"
!
+hammingDistanceTo:aString
+ "return the hamming distance (the number of characters which are different).
+ In information theory, the Hamming distance between two strings of equal length
+ is the number of positions for which the corresponding symbols are different.
+ Put another way, it measures the minimum number of substitutions required to change
+ one into the other, or the number of errors that transformed one string into the other."
+
+ self assert:(aString size == self size).
+ ^ 1 to:self size count:[:idx | (self at:idx) ~= (aString at:idx)]
+
+ "
+ 'roses' hammingDistanceTo:'toned'
+ "
+!
+
hash
"return an integer useful as a hash-key"
@@ -2299,6 +2314,67 @@
"
!
+asKoelnerPhoneticCode
+ "return a koelner phonetic code.
+ The koelnerPhonetic code is for the german language what the soundex code is for english;
+ it returns simular strings for similar sounding words.
+ There are some differences to soundex, though:
+ its length is not limited to 4, but depends on the length of the original string;
+ it does not start with the first character of the input."
+
+ ^ PhoneticStringUtilities koelnerPhoneticCodeOf:self
+
+ "
+ #(
+ 'Müller'
+ 'Miller'
+ 'Mueller'
+ 'Mühler'
+ 'Mühlherr'
+ 'Mülherr'
+ 'Myler'
+ 'Millar'
+ 'Myller'
+ 'Müllar'
+ 'Müler'
+ 'Muehler'
+ 'Mülller'
+ 'Müllerr'
+ 'Muehlherr'
+ 'Muellar'
+ 'Mueler'
+ 'Mülleer'
+ 'Mueller'
+ 'Nüller'
+ 'Nyller'
+ 'Niler'
+ 'Czerny'
+ 'Tscherny'
+ 'Czernie'
+ 'Tschernie'
+ 'Schernie'
+ 'Scherny'
+ 'Scherno'
+ 'Czerne'
+ 'Zerny'
+ 'Tzernie'
+ 'Breschnew'
+ ) do:[:w |
+ Transcript show:w; show:'->'; showCR:(w asKoelnerPhoneticCode)
+ ].
+ "
+
+ "
+ 'Breschnew' asKoelnerPhoneticCode -> '17863'
+ 'Breschnew' asKoelnerPhoneticCode -> '17863'
+ 'Breschneff' asKoelnerPhoneticCode -> '17863'
+ 'Braeschneff' asKoelnerPhoneticCode -> '17863'
+ 'Braessneff' asKoelnerPhoneticCode -> '17863'
+ 'Pressneff' asKoelnerPhoneticCode -> '17863'
+ 'Presznäph' asKoelnerPhoneticCode -> '17863'
+ "
+!
+
asLowercase
"return a copy of myself in lowercase letters"
@@ -2465,63 +2541,13 @@
!
asSoundexCode
- "return a soundex string or nil.
+ "return a soundex phonetic code or nil.
Soundex returns similar codes for similar sounding words, making it a useful
tool when searching for words where the correct spelling is unknown.
(read Knuth or search the web if you dont know what a soundex code is).
Caveat: 'similar sounding words' means: 'similar sounding in english'."
- |inStream codeStream ch last lch codeLength codes sc|
-
- inStream := self readStream.
- inStream skipSeparators.
- inStream atEnd ifTrue:[
- ^ nil
- ].
- ch := inStream next.
- ch isLetter ifFalse:[
- ^ nil
- ].
- codeLength := 0.
-
- codes := Dictionary new.
- codes atAll:'bpfv' put:$1.
- codes atAll:'cskgjqxz' put:$2.
- codes atAll:'dt' put:$3.
- codes atAll:'l' put:$4.
- codes atAll:'nm' put:$5.
- codes atAll:'r' put:$6.
-
- codeStream := WriteStream on:(String new:4).
- codeStream nextPut:(ch asUppercase).
-
- [inStream atEnd] whileFalse:[
- ch := inStream next.
- lch := ch asLowercase.
- lch = last ifFalse:[
- last := lch.
-
- sc := codes at:ch ifAbsent:nil.
- sc notNil ifTrue:[
- codeLength < 3 ifTrue:[
- codeStream nextPut:sc.
- codeLength := codeLength + 1.
- ]
- ] ifFalse:[
-"/ ch isLetter ifFalse:[
-"/ "/ something else - ignore it
-"/ ] ifTrue:[
-"/ "/ else its a vowel and we ignore it
-"/ ]
- ].
- ]
- ].
- [ codeLength < 3 ] whileTrue:[
- codeStream nextPut:$0.
- codeLength := codeLength + 1.
- ].
-
- ^ codeStream contents
+ ^ PhoneticStringUtilities soundexCodeOf:self
"
'claus' asSoundexCode
@@ -5705,7 +5731,7 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.399 2009-07-31 08:49:19 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.400 2009-08-10 13:34:02 cg Exp $'
! !
CharacterArray initialize!