PhoneticStringUtilities.st
author Claus Gittinger <cg@exept.de>
Wed, 15 Feb 2017 21:45:37 +0100
changeset 4323 7caede8761de
parent 4194 12b5e3e2219b
child 4467 c946d9eea9ec
child 4769 89914ccfcf7d
permissions -rw-r--r--
#DOCUMENTATION by cg class: RandomKISS2 comment/format in: #documentation
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    15
"{ NameSpace: Smalltalk }"
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    16
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
	instanceVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
	classVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	poolDictionaries:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    24
Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    25
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    26
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    27
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    28
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    29
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    30
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    31
PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    32
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    33
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    34
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    35
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    36
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    37
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    38
PhoneticStringUtilities::PhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    39
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    40
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    41
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    42
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    43
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    44
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    45
PhoneticStringUtilities::PhoneticStringComparator subclass:#SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    46
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    47
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    48
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    49
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    50
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    51
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    52
PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    53
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    54
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    55
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    56
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    57
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    58
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    59
Object subclass:#NYSIISStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    60
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    61
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    62
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    63
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    64
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    65
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    66
PhoneticStringUtilities::PhoneticStringComparator subclass:#PhonemStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    67
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    68
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    69
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    70
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    71
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    72
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    73
PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    74
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    75
		currentIndex skipCount'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    76
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    77
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    78
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    79
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    80
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    81
PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    82
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    83
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    84
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    85
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    86
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    87
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
copyright
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
documentation
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
"
2445
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   107
    Utilities which are helpful to perform phonetic string searches or comparisons.
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   108
    These are all variations or improvements of the soundex algorithm, which usually fails
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   109
    to provide good results for non-english languages.
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
   110
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   111
    soundexCode
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   112
        this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   113
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   114
    nysiis
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   115
        a modified soundex algorithm
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   116
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   117
    miracode
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   118
        another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   119
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   120
    mySQLSoundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   121
        another modified soundex algorithm used in mySQL.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   122
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   123
    koelner phoneticCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   124
        provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   125
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   126
    Double metaphone 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   127
        works with most european languages.
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   128
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   129
    phonem
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   130
        described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   131
        from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   132
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   133
    More info for german readers is found in:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   134
        http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   135
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   136
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   137
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   138
sampleData
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   139
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   140
    for the 50 most common german names, we get:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   141
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   142
                            ext. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   143
    name        soundex   soundex   metaphone   phonet  phonet2     phonix      daitsch phonem      koeln
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   144
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   145
    müller      M460    54600000    MLR         MÜLA    NILA        M4000000    689000  MYLR        657
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   146
    schmidt     S253    25300000    SKMTT       SHMIT   ZNIT        S5300000    463000  CMYD        8628
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   147
    schneider   S253    25360000    SKNTR       SHNEIDA ZNEITA      S5300000    463900  CNAYDR      8627
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   148
    fischer     F260    12600000    FSKR        FISHA   FIZA        F8000000    749000  VYCR        387
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   149
    weber       W160    16000000    WBR         WEBA    FEBA        $1000000    779000  VBR         317
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   150
    meyer       M600    56000000    MYR         MEIA    NEIA        M0000000    619000  MAYR        67
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   151
    wagner      W256    25600000    WKNR        WAKNA   FAKNA       $2500000    756900  VACNR       367
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   152
    schulz      S242    24200000    SKLS        SHULS   ZULZ        S4800000    484000  CULC        85
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   153
    becker      B260    12600000    BKR         BEKA    BEKA        B2000000    759000  BCR         147
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   154
    hoffmann    H155    15500000    HFMN        HOFMAN  UFNAN       $7550000    576600  OVMAN       036
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   155
    schäfer     S216    21600000    SKFR        SHEFA   ZEFA        S7000000    479000  CVR         837
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
    "return a koelner phonetic code.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
     The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
     it returns simular strings for similar sounding words. 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
     There are some differences to soundex, though: 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
        its length is not limited to 4, but depends on the length of the original string;
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   167
        it does not start with the first character of the input.
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   168
     This algorithm is described by Postel 1969"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   170
    ^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
     #(
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
        'Müller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
        'Miller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
        'Mueller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
        'Mühler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
        'Mühlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
        'Mülherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
        'Myler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
        'Millar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
        'Myller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
        'Müllar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
        'Müler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
        'Muehler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
        'Mülller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
        'Müllerr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
        'Muehlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
        'Muellar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
        'Mueler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
        'Mülleer'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
        'Mueller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
        'Nüller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   194
        'Nyller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   195
        'Niler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
        'Czerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
        'Tscherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
        'Czernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
        'Tschernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
        'Schernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
        'Scherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
        'Scherno'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
        'Czerne'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
        'Zerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
        'Tzernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
        'Breschnew'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
     ) do:[:w |
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   209
     ].
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   210
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   211
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
    "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   213
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   214
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   215
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   216
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   217
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   218
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   219
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   220
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   221
!
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   222
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   223
mySQLSoundexCodeOf:aString
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   224
    "return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   225
     (no 4char limitation; different order of duplicate vowel vs. duplicate code elimination)"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   226
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   227
    ^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   228
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   229
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   230
     #(
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   231
        'Müller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   232
        'Miller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   233
        'Mueller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   234
        'Mühler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   235
        'Mühlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   236
        'Mülherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   237
        'Myler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   238
        'Millar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   239
        'Myller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   240
        'Müllar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   241
        'Müler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   242
        'Muehler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   243
        'Mülller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   244
        'Müllerr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   245
        'Muehlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   246
        'Muellar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   247
        'Mueler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   248
        'Mülleer'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   249
        'Mueller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   250
        'Nüller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   251
        'Nyller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   252
        'Niler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   253
        'Czerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   254
        'Tscherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   255
        'Czernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   256
        'Tschernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   257
        'Schernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   258
        'Scherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   259
        'Scherno'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   260
        'Czerne'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   261
        'Zerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   262
        'Tzernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   263
        'Breschnew'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   264
     ) do:[:w |
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   265
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   266
     ].
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   267
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   268
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   269
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   270
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   271
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   272
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   273
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   274
     PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   275
     PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   276
     PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
soundexCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
    "return a soundex phonetic code or nil.
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   282
     Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   283
     tool when searching for words where the correct spelling is unknown.
4194
12b5e3e2219b #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4184
diff changeset
   284
     (read Knuth or search the web if you don't know what a soundex code is).
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   285
     Caveat: 'similar sounding words' means: 'similar sounding in english'."
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   286
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   287
    ^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   288
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   289
"/ old code - now use code in private class...
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   290
"/    |inStream codeStream ch last lch codeLength codes code lastCode|
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   291
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   292
"/    inStream := aString readStream.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   293
"/    inStream skipSeparators.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   294
"/    inStream atEnd ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   295
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   296
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   297
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   298
"/    ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   299
"/    ch isLetter ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   300
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   301
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   302
"/    codeLength := 0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   303
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   304
"/    codes := Dictionary new.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   305
"/    codes atAll:'bpfv'     put:$1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   306
"/    codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   307
"/    codes atAll:'dt'       put:$3.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   308
"/    codes atAll:'l'        put:$4.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   309
"/    codes atAll:'nm'       put:$5.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   310
"/    codes atAll:'r'        put:$6.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   311
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   312
"/    codeStream := WriteStream on:(String new:4).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   313
"/    codeStream nextPut:(ch asUppercase).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   314
"/    last := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   315
"/    lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   316
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   317
"/    [inStream atEnd] whileFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   318
"/        ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   319
"/        lch := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   320
"/        lch = last ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   321
"/            last := lch.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   322
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   323
"/            code := codes at:lch ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   324
"/            (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   325
"/                codeLength < 3 ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   326
"/                    codeStream nextPut:code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   327
"/                    codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   328
"/                    codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   329
"/                ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   330
"/            ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   331
"/            lastCode := code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   332
"/        ]
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   333
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   334
"/    [ codeLength < 3 ] whileTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   335
"/        codeStream nextPut:$0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   336
"/        codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   337
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   338
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   339
"/    ^ codeStream contents
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   340
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   341
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   342
     PhoneticStringUtilities soundexCodeOf:'claus'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   343
     PhoneticStringUtilities soundexCodeOf:'clause'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   344
     PhoneticStringUtilities soundexCodeOf:'close'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   345
     PhoneticStringUtilities soundexCodeOf:'smalltalk' 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   346
     PhoneticStringUtilities soundexCodeOf:'smaltalk'  
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   347
     PhoneticStringUtilities soundexCodeOf:'smaltak'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   348
     PhoneticStringUtilities soundexCodeOf:'smaltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   349
     PhoneticStringUtilities soundexCodeOf:'smoltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   350
     PhoneticStringUtilities soundexCodeOf:'aa'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   351
     PhoneticStringUtilities soundexCodeOf:'by'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   352
     PhoneticStringUtilities soundexCodeOf:'bab'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   353
     PhoneticStringUtilities soundexCodeOf:'bob'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   354
     PhoneticStringUtilities soundexCodeOf:'bop'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   355
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   356
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   357
3648
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   358
!PhoneticStringUtilities class methodsFor:'queries'!
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   359
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   360
isUtilityClass
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   361
    ^ self == PhoneticStringUtilities
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   362
! !
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   363
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   364
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   365
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   366
defaultClass
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   367
	^SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   368
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   369
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   370
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'documentation'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   371
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   372
documentation
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   373
"
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   374
    abstract superclass for various phonetic comparators.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   375
    They returns similar strings for similar sounding words, which can be used
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   376
    to find similar sounding words in a search list.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   377
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   378
    Notice, that some comparators are better for particular languages.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   379
"
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   380
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   381
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   382
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   383
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   384
new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   385
    ^ self basicNew initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   386
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   387
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   388
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'queries'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   389
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   390
isAbstract
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   391
    ^ self == PhoneticStringUtilities::PhoneticStringComparator
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   392
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   393
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   394
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   395
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   396
does:aString soundLike:anotherString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   397
    |translations1 translations2|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   398
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   399
    translations1 := self phoneticStringsFor:aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   400
    translations2 := self phoneticStringsFor:anotherString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   401
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   402
    ^ translations1 contains:[:t1 | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   403
        translations2 contains:[:t2 | t1 = t2]]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   404
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   405
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   406
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   407
            does:'miller' soundLike:'miler'.   
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   408
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   409
            does:'miller' soundLike:'milner'.   
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   410
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   411
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   412
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   413
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   414
    "Should answer an array of alternate phonetic strings for the given input string."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   415
    self subclassResponsibility
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   416
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   417
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   418
     (PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   419
            phoneticStringsFor:'miller') first      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   420
     'miller' asSoundexCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   421
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   422
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   423
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   424
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   425
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   426
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   427
    "Invoked when a new instance is created."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   428
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   429
    "/ please change as required (and remove this comment)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   430
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   431
    "/ super initialize.   -- commented since inherited method does nothing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   432
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   433
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   434
!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   435
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   436
documentation
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   437
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   438
    There are many extended and enhanced soundex variants around;
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   439
    here is one, called 'extended soundex'. It is destribed for example in
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   440
    http://www.epidata.dk/documentation.php.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   441
    An author or origin is unknown.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   442
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   443
    The number of digits is increased to 5 or 8;
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   444
    The first character is not used literally; instead it is encoded like the rest.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   445
    This might have a negative effect on names starting with a vovel, though.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   446
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   447
    Overall, it can be doubted if this is really an enhancement after all.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   448
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   449
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   450
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   451
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   452
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   453
phoneticStringsFor:aString
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   454
    "generates both an extended soundex of length 5 and one of length 8"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   455
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   456
    |first second u t prevCode|
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   457
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   458
    u := aString asUppercase.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   459
    first := second := ''.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   460
    u do:[:c | 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   461
        t := self translate:c.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   462
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   463
            first := first , t.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   464
            second := second , t.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   465
            second size == 8 ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   466
                ^ Array with:(first copyTo:5) with:second 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   467
            ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   468
        ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   469
        prevCode := t
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   470
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   471
    [ first size < 5 ] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   472
        first := first , '0'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   473
        second := second , '0'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   474
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   475
    [ second size < 8 ] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   476
        second := second , '0'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   477
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   478
    ^ Array with:first with:second
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   479
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   480
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   481
     self basicNew phoneticStringsFor:'müller'  #('87900' '87900000')  
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   482
     self basicNew phoneticStringsFor:'miller'  #('87900' '87900000')   
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   483
     self basicNew phoneticStringsFor:'muller'  #('87900' '87900000')    
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   484
     self basicNew phoneticStringsFor:'muler'   #('87900' '87900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   485
     self basicNew phoneticStringsFor:'schmidt'    #('38600' '38600000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   486
     self basicNew phoneticStringsFor:'schneider'  #('38690' '38690000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   487
     self basicNew phoneticStringsFor:'fischer'    #('23900' '23900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   488
     self basicNew phoneticStringsFor:'weber'      #('19000' '19000000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   489
     self basicNew phoneticStringsFor:'meyer'      #('89000' '89000000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   490
     self basicNew phoneticStringsFor:'wagner'     #('48900' '48900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   491
     self basicNew phoneticStringsFor:'schulz'     #('37500' '37500000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   492
     self basicNew phoneticStringsFor:'becker'     #('13900' '13900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   493
     self basicNew phoneticStringsFor:'hoffmann'   #('28800' '28800000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   494
     self basicNew phoneticStringsFor:'schäfer'    #('32900' '32900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   495
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   496
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   497
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   498
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   499
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   500
translate:aCharacter
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   501
    "use simple if's for more speed when compiled"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   502
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   503
    "vowels serve as separators"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   504
    aCharacter == $A ifTrue:[^ '0' ].         
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   505
    aCharacter == $E ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   506
    aCharacter == $I ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   507
    aCharacter == $O ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   508
    aCharacter == $U ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   509
    aCharacter == $Y ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   510
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   511
    aCharacter == $B ifTrue:[^ '1' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   512
    aCharacter == $P ifTrue:[^ '1' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   513
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   514
    aCharacter == $F ifTrue:[^ '2' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   515
    aCharacter == $V ifTrue:[^ '2' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   516
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   517
    aCharacter == $C ifTrue:[^ '3' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   518
    aCharacter == $S ifTrue:[^ '3' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   519
    aCharacter == $K ifTrue:[^ '3' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   520
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   521
    aCharacter == $G ifTrue:[^ '4' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   522
    aCharacter == $J ifTrue:[^ '4' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   523
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   524
    aCharacter == $Q ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   525
    aCharacter == $X ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   526
    aCharacter == $Z ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   527
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   528
    aCharacter == $D ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   529
    aCharacter == $G ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   530
    aCharacter == $T ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   531
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   532
    aCharacter == $L ifTrue:[^ '7' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   533
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   534
    aCharacter == $M ifTrue:[^ '8' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   535
    aCharacter == $N ifTrue:[^ '8' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   536
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   537
    aCharacter == $R ifTrue:[^ '9' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   538
    ^ nil
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   539
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   540
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   541
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   542
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   543
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   544
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   545
     The koelnerPhonetic code is for the german language what the soundex code is for english.
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   546
     It returns similar strings for similar sounding words. 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   547
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   548
     There are some differences to soundex, though: 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   549
        its length is not limited to 4, but depends on the length of the original string;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   550
        it does not start with the first character of the input.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   551
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   552
     This algorithm was described by Postel 1969
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   553
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   554
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   555
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   556
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   557
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   558
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   559
    "return a koelner phonetic code.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   560
     The koelnerPhonetic code is for the german language what the soundex code is for english;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   561
     it returns simular strings for similar sounding words. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   562
     There are some differences to soundex, though: 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   563
        its length is not limited to 4, but depends on the length of the original string;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   564
        it does not start with the first character of the input.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   565
     This algorithm is described by Postel 1969"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   566
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   567
    |in ret val rslt|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   568
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   569
    in := aString withoutSeparators asLowercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   570
    in := in copyReplaceString:'ph' withString:'f'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   571
    in := in copyReplaceAll:$ü withAll:'u'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   572
    in := in copyReplaceAll:$ä withAll:'a'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   573
    in := in copyReplaceAll:$ö withAll:'o'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   574
    in := in copyReplaceAll:$ß withAll:'ss'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   575
    in := '#',in,'#'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   576
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   577
    ret := ''.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   578
    1 to:in size-2 do:[:i |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   579
        |sub|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   580
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   581
        sub := in copyFrom:i to:i+2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   582
        val := (i==1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   583
                    ifTrue:[ self convertFirst:sub ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   584
                    ifFalse:[ self convertRest:sub ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   585
        ret := ret,val
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   586
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   587
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   588
    ret := ret select:[:ch | ch ~= $-].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   589
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   590
    (ret startsWith:'0') ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   591
        ret := '0',(ret select:[:ch | ch ~= $0]).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   592
    ] ifFalse:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   593
        ret := ret select:[:ch | ch ~= $0].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   594
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   595
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   596
    rslt := String streamContents:[:s |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   597
        |prev|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   598
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   599
        ret do:[:ch |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   600
            ch ~= prev ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   601
                s nextPut:ch
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   602
            ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   603
            prev := ch.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   604
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   605
      ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   606
    ^ Array with:rslt.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   607
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   608
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   609
     #(
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   610
        'Müller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   611
        'Miller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   612
        'Mueller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   613
        'Mühler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   614
        'Mühlherr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   615
        'Mülherr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   616
        'Myler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   617
        'Millar'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   618
        'Myller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   619
        'Müllar'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   620
        'Müler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   621
        'Muehler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   622
        'Mülller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   623
        'Müllerr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   624
        'Muehlherr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   625
        'Muellar'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   626
        'Mueler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   627
        'Mülleer'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   628
        'Mueller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   629
        'Nüller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   630
        'Nyller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   631
        'Niler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   632
        'Czerny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   633
        'Tscherny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   634
        'Czernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   635
        'Tschernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   636
        'Schernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   637
        'Scherny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   638
        'Scherno'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   639
        'Czerne'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   640
        'Zerny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   641
        'Tzernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   642
        'Breschnew'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   643
     ) do:[:w |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   644
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:w) first
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   645
     ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   646
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   647
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   648
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   649
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Breschnew' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   650
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Breschneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   651
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Braeschneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   652
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Braessneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   653
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Pressneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   654
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Presznäph' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   655
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Präschnäf' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   656
    "
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   657
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   658
     self basicNew phoneticStringsFor:'müller'      #('657')    
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   659
     self basicNew phoneticStringsFor:'möller'      #('657')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   660
     self basicNew phoneticStringsFor:'miller'      #('657')     
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   661
     self basicNew phoneticStringsFor:'muller'      #('657')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   662
     self basicNew phoneticStringsFor:'muler'       #('657')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   663
     self basicNew phoneticStringsFor:'schmidt'     #('862')   
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   664
     self basicNew phoneticStringsFor:'schneider'   #('8627') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   665
     self basicNew phoneticStringsFor:'fischer'     #('387') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   666
     self basicNew phoneticStringsFor:'weber'       #('317') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   667
     self basicNew phoneticStringsFor:'meyer'       #('67') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   668
     self basicNew phoneticStringsFor:'wagner'      #('3467') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   669
     self basicNew phoneticStringsFor:'schulz'      #('858')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   670
     self basicNew phoneticStringsFor:'becker'      #('147')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   671
     self basicNew phoneticStringsFor:'hoffmann'    #('036')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   672
     self basicNew phoneticStringsFor:'schäfer'     #('837') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   673
    "
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   674
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   675
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   676
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   677
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   678
convertFirst:chars
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   679
    #(
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   680
        ('#a#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   681
        ('#e#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   682
        ('#i#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   683
        ('#j#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   684
        ('#y#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   685
        ('#o#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   686
        ('#u#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   687
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   688
        ('#ca' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   689
        ('#ch' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   690
        ('#ck' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   691
        ('#cl' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   692
        ('#co' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   693
        ('#cq' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   694
        ('#cr' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   695
        ('#cu' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   696
        ('#cx' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   697
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   698
        ('#c#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   699
    ) do:[:pair | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   700
        (pair first match:chars) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   701
            ^ pair second
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   702
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   703
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   704
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   705
    ^ self convertRest:chars
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   706
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   707
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   708
convertRest:chars
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   709
    #(
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   710
        ('#ds' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   711
        ('#dc' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   712
        ('#dz' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   713
        ('#ts' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   714
        ('#tc' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   715
        ('#tz' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   716
        ('#d#' '2')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   717
        ('#t#' '2')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   718
        ('cx#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   719
        ('kx#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   720
        ('qx#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   721
        ('#x#' '48')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   722
        ('sc#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   723
        ('sz#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   724
        ('#ca' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   725
        ('#co' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   726
        ('#cu' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   727
        ('#ch' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   728
        ('#ck' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   729
        ('#cx' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   730
        ('#cq' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   731
        ('#c#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   732
        ('#a#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   733
        ('#e#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   734
        ('#i#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   735
        ('#j#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   736
        ('#y#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   737
        ('#o#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   738
        ('#u#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   739
        ('#h#' '-')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   740
        ('#l#' '5')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   741
        ('#r#' '7')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   742
        ('#m#' '6')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   743
        ('#n#' '6')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   744
        ('#s#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   745
        ('#z#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   746
        ('#b#' '1')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   747
        ('#p#' '1')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   748
        ('#f#' '3')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   749
        ('#v#' '3')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   750
        ('#w#' '3')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   751
        ('#g#' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   752
        ('#k#' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   753
        ('#q#' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   754
        ('###' '?')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   755
    ) do:[:pair | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   756
        (pair first match:chars) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   757
            ^ pair second
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   758
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   759
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   760
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   761
    self error:'cannot happen'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   762
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   763
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   764
!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   765
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   766
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   767
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   768
    WARNING: this is the so called 'simplified soundex' algorithm;
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   769
      there are more variants like miracode (american soundex) or mysqlSoundex around.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   770
      Be sure to use the correct algorithm, if the generated strings must be compatible
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   771
      (otherwise, the differences are probably too small to be noticed as effect, but
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   772
      your search will be different)
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   773
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   774
    The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   775
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   776
    SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   777
    components of names, but by doing so reports more matches. 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   778
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   779
    There are some variations around in the literature; 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   780
    the following is called 'simplified soundex', and the rules for coding a name are:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   781
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   782
    1. The first letter of the name is used in its un-coded form to serve as the prefix
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   783
       character of the code. (The rest of the code is numerical).
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   784
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   785
    2. Thereafter, W and H are ignored entirely.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   786
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   787
    3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   788
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   789
    4. Other letters of the name are converted to a numerical equivalent:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   790
                 B, P, F, V              1 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   791
                 C, G, J, K, Q, S, X, Z  2 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   792
                 D, T                    3 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   793
                 L                       4 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   794
                 M, N                    5 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   795
                 R                       6 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   796
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   797
    5. There are two exceptions: 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   798
        1. Letters that follow prefix letters which would, if coded, have the same
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   799
           numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   800
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   801
        2. The second letter of any pair of consonants having the same code number is likewise ignored, 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   802
           i.e. unless there is a ''separator'' between them in the name.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   803
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   804
    6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   805
       Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   806
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   807
    Notice, that in another variant, w and h are treated slightly differently.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   808
    This is only of relevance, if you need to reconstruct original soundex codes of other programs
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   809
    or for the original 1880 us census data.
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   810
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   811
    Also notice, that soundex deals better with english. 
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   812
    For german and other languages, other algorithms may provide better results.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   813
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   814
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   815
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   816
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   817
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   818
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   819
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   820
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   821
    u := aString asUppercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   822
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   823
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   824
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   825
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   826
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   827
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   828
            p size == 4 ifTrue:[^ Array with:p ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   829
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   830
        prevCode := t
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   831
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   832
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   833
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   834
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   835
    ^ Array with:(p copyFrom:1 to:4)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   836
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   837
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   838
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   839
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   840
translate:aCharacter
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   841
    "use simple if's for more speed when compiled"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   842
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   843
    "vowels serve as separators"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   844
    aCharacter == $A ifTrue:[^ '0' ].         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   845
    aCharacter == $E ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   846
    aCharacter == $I ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   847
    aCharacter == $O ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   848
    aCharacter == $U ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   849
    aCharacter == $Y ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   850
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   851
    aCharacter == $B ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   852
    aCharacter == $P ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   853
    aCharacter == $F ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   854
    aCharacter == $V ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   855
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   856
    aCharacter == $C ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   857
    aCharacter == $S ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   858
    aCharacter == $K ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   859
    aCharacter == $G ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   860
    aCharacter == $J ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   861
    aCharacter == $Q ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   862
    aCharacter == $X ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   863
    aCharacter == $Z ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   864
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   865
    aCharacter == $D ifTrue:[^ '3' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   866
    aCharacter == $T ifTrue:[^ '3' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   867
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   868
    aCharacter == $L ifTrue:[^ '4' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   869
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   870
    aCharacter == $M ifTrue:[^ '5' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   871
    aCharacter == $N ifTrue:[^ '5' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   872
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   873
    aCharacter == $R ifTrue:[^ '6' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   874
    ^ nil
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   875
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   876
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   877
!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   878
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   879
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   880
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   881
    MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   882
    and also removing vokals first, then removing duplicate codes
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   883
    (whereas the soundex code does this in reverse order).
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   884
4133
eda6f1bfc8d2 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 3839
diff changeset
   885
    These variations are important, if you need the miracode soundex codes to be generated.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   886
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   887
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   888
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   889
!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   890
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   891
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   892
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   893
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   894
    u := aString asUppercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   895
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   896
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   897
    u from:2 to:u size do:[:c |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   898
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   899
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   900
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   901
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   902
        (t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   903
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   904
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   905
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   906
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   907
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   908
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   909
    ^ Array with:p
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   910
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   911
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   912
!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   913
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   914
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   915
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   916
    NYSIIS Algorithm:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   917
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   918
    1.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   919
        remove all ''S'' and ''Z'' chars from the end of the surname 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   920
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   921
    2.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   922
        transcode initial strings
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   923
            MAC => MC
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   924
            PF => F
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   925
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   926
    3.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   927
        Transcode trailing strings as follows,
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   928
        
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   929
            IX => IC
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   930
            EX => EC
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   931
            YE,EE,IE => Y
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   932
            NT,ND => D 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   933
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   934
    4.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   935
        transcode ''EV'' to ''EF'' if not at start of name
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   936
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   937
    5.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   938
        use first character of name as first character of key 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   939
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   940
    6.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   941
        remove any ''W'' that follows a vowel 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   942
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   943
    7.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   944
        replace all vowels with ''A'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   945
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   946
    8.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   947
        transcode ''GHT'' to ''GT'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   948
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   949
    9.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   950
        transcode ''DG'' to ''G'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   951
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   952
    10.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   953
        transcode ''PH'' to ''F'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   954
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   955
    11.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   956
        if not first character, eliminate all ''H'' preceded or followed by a vowel 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   957
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   958
    12.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   959
        change ''KN'' to ''N'', else ''K'' to ''C'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   960
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   961
    13.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   962
        if not first character, change ''M'' to ''N'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   963
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   964
    14.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   965
        if not first character, change ''Q'' to ''G'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   966
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   967
    15.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   968
        transcode ''SH'' to ''S'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   969
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   970
    16.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   971
        transcode ''SCH'' to ''S'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   972
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   973
    17.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   974
        transcode ''YW'' to ''Y'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   975
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   976
    18.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   977
        if not first or last character, change ''Y'' to ''A'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   978
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   979
    19.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   980
        transcode ''WR'' to ''R'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   981
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   982
    20.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   983
        if not first character, change ''Z'' to ''S'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   984
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   985
    21.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   986
        transcode terminal ''AY'' to ''Y'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   987
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   988
    22.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   989
        remove traling vowels 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   990
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   991
    23.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   992
        collapse all strings of repeated characters 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   993
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   994
    24.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   995
        if first char of original surname was a vowel, append it to the code
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   996
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   997
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   998
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   999
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1000
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1001
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1002
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1003
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1004
    k := self rule1:(aString asUppercase).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1005
    k := self rule2:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1006
    k := self rule3:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1007
    k := self rule4:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1008
    k := self rule5:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1009
    k := self rule6:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1010
    k := self rule7:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1011
    k := self rule8:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1012
    k := self rule9:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1013
    k := self rule10:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1014
    k := self rule11:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1015
    k := self rule12:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1016
    k := self rule13:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1017
    k := self rule14:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1018
    k := self rule15:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1019
    k := self rule16:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1020
    k := self rule17:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1021
    k := self rule18:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1022
    k := self rule19:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1023
    k := self rule20:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1024
    k := self rule21:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1025
    k := self rule22:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1026
    k := self rule23:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1027
    k := self rule24:k originalKey:aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1028
    ^ Array with:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1029
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1030
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1031
     self new phoneticStringsFor:'hello'
3839
6874980a5d05 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3685
diff changeset
  1032
     self new phoneticStringsFor:'bliss'
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1033
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1034
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1035
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1036
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1037
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1038
rule10:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1039
    "10. transcode 'PH' to 'F' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1040
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1041
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1042
        transcodeAll:'PH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1043
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1044
        to:'F'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1045
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1046
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1047
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1048
rule11:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1049
    |k c|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1050
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1051
    "11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1052
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1053
    c := SortedCollection sortBlock:[:a :b | b < a ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1054
    2 to:key size do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1055
        (key at:i) = $H ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1056
            ((key at:i - 1) isVowel 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1057
                or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1058
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1059
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1060
    c do:[:n | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1061
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1062
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1063
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1064
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1065
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1066
rule12:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1067
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1068
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1069
    "12. change 'KN' to 'N', else 'K' to 'C' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1070
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1071
                transcodeAll:'KN'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1072
                of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1073
                to:'K'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1074
                startingAt:1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1075
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1076
                transcodeAll:'K'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1077
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1078
                to:'C'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1079
                startingAt:1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1080
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1081
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1082
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1083
rule13:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1084
    "13. if not first character, change 'M' to 'N' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1085
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1086
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1087
        transcodeAll:'M'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1088
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1089
        to:'N'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1090
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1091
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1092
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1093
rule14:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1094
    "14. if not first character, change 'Q' to 'G' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1095
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1096
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1097
        transcodeAll:'Q'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1098
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1099
        to:'G'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1100
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1101
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1102
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1103
rule15:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1104
    "15. transcode 'SH' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1105
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1106
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1107
        transcodeAll:'SH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1108
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1109
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1110
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1111
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1112
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1113
rule16:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1114
    "16. transcode 'SCH' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1115
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1116
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1117
        transcodeAll:'SCH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1118
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1119
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1120
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1121
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1122
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1123
rule17:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1124
    "17. transcode 'YW' to 'Y' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1125
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1126
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1127
        transcodeAll:'YW'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1128
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1129
        to:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1130
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1131
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1132
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1133
rule18:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1134
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1135
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1136
    "18. if not first or last character, change 'Y' to 'A' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1137
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1138
                transcodeAll:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1139
                of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1140
                to:'A'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1141
                startingAt:2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1142
    key last = $Y ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1143
        k at:k size put:$Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1144
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1145
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1146
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1147
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1148
rule19:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1149
    "19. transcode 'WR' to 'R' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1150
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1151
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1152
        transcodeAll:'WR'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1153
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1154
        to:'R'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1155
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1156
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1157
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1158
rule1:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1159
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1160
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1161
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1162
     "1. Remove all 'S' and 'Z' chars from the end of the name"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1163
    [
3839
6874980a5d05 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3685
diff changeset
  1164
        'SZ' includes:k last
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1165
    ] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1166
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1167
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1168
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1169
rule20:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1170
    "20. if not first character, change 'Z' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1171
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1172
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1173
        transcodeAll:'Z'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1174
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1175
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1176
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1177
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1178
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1179
rule21:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1180
    "21. transcode terminal 'AY' to 'Y' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1181
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1182
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1183
        transcodeAll:'AY'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1184
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1185
        to:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1186
        startingAt:key size - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1187
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1188
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1189
rule22:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1190
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1191
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1192
    "22. remove trailing vowels "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1193
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1194
    [ k last isVowel ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1195
        k := k copyFrom:1 to:k size - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1196
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1197
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1198
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1199
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1200
rule23:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1201
    |k c|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1202
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1203
    "23. collapse all strings of repeated characters "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1204
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1205
    c := SortedCollection sortBlock:[:a :b | b < a ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1206
    k size to:2 do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1207
        (k at:i) = (k at:i - 1) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1208
            c add:i
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1209
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1210
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1211
    c do:[:n | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1212
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1213
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1214
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1215
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1216
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1217
rule24:key originalKey:originalKey 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1218
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1219
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1220
    "24. if first char of original surname was a vowel, append it to the code"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1221
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1222
    originalKey first isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1223
        k := k , originalKey first asString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1224
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1225
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1226
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1227
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1228
rule2:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1229
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1230
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1231
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1232
     "2. Transcode initial strings:  MAC => MC   PF => F"
4184
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1233
    (k startsWith:'MAC') ifTrue:[
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1234
        k := 'MC' , (k copyFrom:4)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1235
    ].
4184
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1236
    (k startsWith:'PF') ifTrue:[
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1237
        k := 'F' , (k copyFrom:3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1238
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1239
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1240
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1241
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1242
rule3:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1243
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1244
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1245
    "3. Transcode trailing strings as follows:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1246
        IX => IC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1247
          EX => EC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1248
          YE, EE, IE => Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1249
           NT, ND => D"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1250
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1251
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1252
                transcodeTrailing:#( 'IX' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1253
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1254
                to:'IC'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1255
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1256
                transcodeTrailing:#( 'EX' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1257
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1258
                to:'EC'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1259
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1260
                transcodeTrailing:#( 'YE' 'EE' 'IE' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1261
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1262
                to:'Y'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1263
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1264
                transcodeTrailing:#( 'NT' 'ND' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1265
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1266
                to:'D'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1267
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1268
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1269
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1270
rule4:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1271
    "4. Transcode 'EV' to 'EF' if not at start of name"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1272
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1273
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1274
        transcodeAll:'EV'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1275
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1276
        to:'EF'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1277
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1278
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1279
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1280
rule5:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1281
    "5. Use first character of name as first character of key.  Ignored because we're doing an in-place conversion"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1282
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1283
    ^ key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1284
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1285
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1286
rule6:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1287
    |k i|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1288
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1289
    "6. Remove any 'W' that follows a vowel"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1290
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1291
    i := 2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1292
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1293
        (i := k indexOf:$W startingAt:i) > 0
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1294
    ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1295
        (k at:i - 1) isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1296
            k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1297
            i := i - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1298
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1299
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1300
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1301
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1302
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1303
rule7:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1304
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1305
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1306
    "7. replace all vowels with 'A' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1307
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1308
    1 to:key size do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1309
        (key at:i) isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1310
            k at:i put:$A
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1311
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1312
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1313
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1314
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1315
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1316
rule8:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1317
    "8. transcode 'GHT' to 'GT' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1318
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1319
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1320
        transcodeAll:'GHT'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1321
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1322
        to:'GT'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1323
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1324
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1325
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1326
rule9:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1327
    "9. transcode 'DG' to 'G' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1328
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1329
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1330
        transcodeAll:'DG'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1331
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1332
        to:'G'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1333
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1334
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1335
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1336
transcodeAll:aString of:key to:replacementString startingAt:start 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1337
    |k i|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1338
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1339
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1340
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1341
        (i := k indexOfSubCollection:aString startingAt:start) > 0
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1342
    ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1343
        k := (k copyFrom:1 to:i - 1) , replacementString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1344
                    , (k copyFrom:i + aString size to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1345
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1346
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1347
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1348
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1349
transcodeTrailing:anArrayOfStrings of:key to:replacementString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1350
    |answer|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1351
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1352
    answer := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1353
    anArrayOfStrings do:[:aString | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1354
        answer := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1355
                    transcodeAll:aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1356
                    of:answer
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1357
                    to:replacementString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1358
                    startingAt:(answer size - aString size) + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1359
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1360
    ^ answer
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1361
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1362
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1363
!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1364
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1365
documentation
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1366
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1367
    Implementation of the PHONEM algorithm, as described in
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1368
    'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1369
    Ein Programm fuer kontextsensitive phonetische Textumwandlung
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1370
    ct Magazin fuer Computer & Technik 25/1998'
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1371
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1372
    This algorithm deals better with the german language (it cares for umlauts)
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1373
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1374
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1375
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1376
!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1377
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1378
phoneticStringsFor:aString 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1379
    |s idx t t2|
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1380
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1381
    s := aString asUppercase.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1382
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1383
    idx := 1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1384
    [idx < (s size-1)] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1385
        t2 := nil.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1386
        t := s copyFrom:idx to:idx+1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1387
        t = 'SC' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1388
        ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1389
        ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1390
        ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1391
        ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1392
        ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1393
        ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1394
        ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1395
        ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1396
        ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1397
        ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1398
        ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1399
        ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1400
        ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1401
        ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1402
        ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1403
        ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1404
        t2 notNil ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1405
            s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1406
        ] ifFalse:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1407
            idx := idx + 1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1408
        ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1409
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1410
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1411
    "/ single character substitutions via tr
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1412
    s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1413
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1414
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1415
    ^ Array with:s
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1416
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1417
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1418
     self basicNew phoneticStringsFor:'müller'  #('MYLR')    
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1419
     self basicNew phoneticStringsFor:'mueller' #('MYLR')    
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1420
     self basicNew phoneticStringsFor:'möller'  #('MYLR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1421
     self basicNew phoneticStringsFor:'miller'  #('MYLR')     
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1422
     self basicNew phoneticStringsFor:'muller'  #('MULR') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1423
     self basicNew phoneticStringsFor:'muler'   #('MULR') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1424
     self basicNew phoneticStringsFor:'schmidt'     #('CMYD')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1425
     self basicNew phoneticStringsFor:'schneider'   #('CNAYDR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1426
     self basicNew phoneticStringsFor:'fischer'     #('VYCR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1427
     self basicNew phoneticStringsFor:'weber'       #('VBR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1428
     self basicNew phoneticStringsFor:'meyer'       #('MAYR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1429
     self basicNew phoneticStringsFor:'wagner'      #('VACNR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1430
     self basicNew phoneticStringsFor:'schulz'      #('CULC')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1431
     self basicNew phoneticStringsFor:'becker'      #('BCR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1432
     self basicNew phoneticStringsFor:'hoffmann'    #('OVMAN')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1433
     self basicNew phoneticStringsFor:'schäfer'     #('CVR')
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1434
     self basicNew phoneticStringsFor:'scheffer'    #('CVR')
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1435
     self basicNew phoneticStringsFor:'schaeffer'   #('CVR')
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1436
     self basicNew phoneticStringsFor:'schaefer'    #('CVR')
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1437
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1438
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1439
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1440
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1441
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1442
copyright
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1443
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1444
Copyright (c) 2002-2004 Robert Jarvis
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1445
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1446
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1447
files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1448
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1449
the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1450
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1451
The above copyright notice and this permission notice shall be included in all copies or substantial 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1452
portions of the Software.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1453
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1454
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1455
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1456
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1457
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1458
USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1459
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1460
! !
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1461
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1462
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1463
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1464
isSlavoGermanic:aString
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1465
    ^ #('w' 'k' 'cz' 'witz') contains:[:sub | aString includesString:sub]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1466
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1467
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1468
     self isSlavoGermanic:'walter'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1469
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1470
! !
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1471
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1472
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1473
3685
01ebbac96899 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3648
diff changeset
  1474
documentation
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1475
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1476
    The Double Metaphone algorithm:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1477
    see internet
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1478
"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1479
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1480
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1481
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1482
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1483
currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1484
	^currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1485
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1486
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1487
currentIndex: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1488
	currentIndex := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1489
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1490
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1491
inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1492
	^inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1493
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1494
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1495
inputKey: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1496
	inputKey := aString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1497
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1498
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1499
primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1500
	^primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1501
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1502
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1503
primaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1504
	primaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1505
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1506
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1507
secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1508
	^secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1509
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1510
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1511
secondaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1512
	secondaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1513
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1514
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1515
skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1516
	^skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1517
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1518
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1519
skipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1520
	skipCount := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1521
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1522
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1523
startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1524
	^startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1525
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1526
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1527
startIndex: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1528
	startIndex := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1529
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1530
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1531
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1532
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1533
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1534
        "Private - Answers an array of alternate phonetic strings for the given input string."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1535
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1536
        self inputKey: aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1537
        self performInitialProcessing.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1538
        self processRemainingCharacters.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1539
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1540
        ^ Array with: primaryTranslation with: secondaryTranslation
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1541
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1542
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1543
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1544
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1545
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1546
	super initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1547
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1548
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1549
		startIndex: 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1550
		primaryTranslation: '';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1551
		secondaryTranslation: '';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1552
		skipCount: 0;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1553
		currentIndex: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1554
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1555
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1556
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1557
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1558
addPrimaryTranslation: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1559
	self primaryTranslation: self primaryTranslation, aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1560
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1561
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1562
addSecondaryTranslation: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1563
	self secondaryTranslation: self secondaryTranslation, aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1564
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1565
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1566
decrementSkipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1567
	self skipCount: self skipCount - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1568
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1569
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1570
incrementSkipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1571
	self incrementSkipCount: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1572
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1573
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1574
incrementSkipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1575
	self skipCount: self skipCount + anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1576
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1577
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1578
incrementStartIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1579
	self startIndex: self startIndex + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1580
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1581
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1582
isSlavoGermanic: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1583
	^((aString includesAnyOf: 'WK') or:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1584
		[ (aString indexOfSubCollection: 'CZ' startingAt: 1) >= 1 ]) or:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1585
		[ (aString indexOfSubCollection: 'WITZ' startingAt: 1) >= 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1586
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1587
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1588
keyAt: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1589
	(anInteger >=1 and: [ anInteger <= self inputKey size ])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1590
		ifTrue: [ ^self inputKey at: anInteger ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1591
		ifFalse: [ ^$  ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1592
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1593
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1594
keyLeftString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1595
	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1596
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1597
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1598
keyMidString: lengthInteger from: fromInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1599
	| result from len additionalSpaces |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1600
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1601
	result := ''.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1602
	from := fromInteger.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1603
	len := lengthInteger.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1604
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1605
	"Prepend spaces if caller is requesting characters from before the start of the string"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1606
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1607
	[ from < 1 ] whileTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1608
		[ result := result, ' '.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1609
		from := from + 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1610
		len := len - 1 ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1611
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1612
	from + len - 1 > self inputKey size
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1613
		ifTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1614
			[ additionalSpaces := from + len - 1 - self inputKey size.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1615
			len := self inputKey size - from + 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1616
		ifFalse: [ additionalSpaces := 0 ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1617
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1618
	result := result, (self inputKey copyFrom: from to: (from+len-1 min: self inputKey size)).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1619
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1620
	[ additionalSpaces > 0 ] whileTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1621
		[ result := result, ' '.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1622
		additionalSpaces := additionalSpaces - 1 ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1623
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1624
	^result
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1625
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1626
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1627
keyRightString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1628
	^self keyMidString: lengthInteger from: self inputKey size - lengthInteger + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1629
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1630
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1631
performInitialProcessing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1632
	(#('GN' 'KN' 'PN' 'WR' 'PS') includes: (self inputKey copyFrom: 1 to: 2))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1633
		ifTrue: [ self incrementStartIndex ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1634
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1635
	(self keyAt: 1) = $X
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1636
		ifTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1637
			[ self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1638
				addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1639
				addSecondaryTranslation: 'S'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1640
			self incrementStartIndex ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1641
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1642
	(self keyAt: 1) isVowel
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1643
		ifTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1644
			[ self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1645
				addPrimaryTranslation: 'A';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1646
				addSecondaryTranslation: 'A'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1647
			self incrementStartIndex ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1648
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1649
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1650
processB
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1651
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1652
                addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1653
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1654
        (self keyAt: (currentIndex + 1)) = $B
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1655
                ifTrue: [ self incrementSkipCount ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1656
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1657
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1658
processC
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1659
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1660
        ((((currentIndex >= 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1661
                and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1662
                and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1663
                and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1664
                and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1665
                                or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1666
                                                and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1667
                        ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1668
                                [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1669
                                self addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1670
                                self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1671
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1672
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1673
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1674
        (self inputKey beginsWith: 'CAESAR')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1675
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1676
                        [ self addPrimaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1677
                        self addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1678
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1679
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1680
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1681
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1682
        (self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1683
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1684
                        [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1685
                        self addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1686
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1687
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1688
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1689
        "iv"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1690
        (self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1691
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1692
                        [ (currentIndex > 1                "a"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1693
                                        and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1694
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1695
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1696
                                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1697
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1698
                                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1699
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1700
                        (currentIndex = 1          "b"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1701
                                        and: [ (self inputKey size > 5 and: [(self inputKey copyFrom: 1 to: 6) = 'CHARAC'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1702
                                                        or: [ (self inputKey copyFrom: 1 to: 6) = 'CHARIS' ]] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1703
                                                or: [self inputKey size > 4 and: [ ((((self inputKey copyFrom: 1 to: 4) = 'CHOR'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1704
                                                        or: [ (self inputKey copyFrom: 1 to: 4) = 'CHYM' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1705
                                                        or: [ (self inputKey copyFrom: 1 to: 4) = 'CHIA' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1706
                                                        or: [ (self inputKey copyFrom: 1 to: 4) = 'CHEM' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1707
                                                        and: [ (self inputKey copyFrom: 1 to: 4) ~= 'CHORE' ] ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1708
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1709
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1710
                                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1711
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1712
                                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1713
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1714
                        (((((#('VAN ' 'VON ') includes: (self inputKey copyFrom: 1 to: 4))              "c"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1715
                                        or: [ (self inputKey copyFrom: 1 to: 3) = 'SCH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1716
                                        or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1717
                                                        includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1718
                                        or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1719
                                        or: [ ((currentIndex = 1)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1720
                                                        or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1721
                                                and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1722
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1723
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1724
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1725
                                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1726
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1727
                                                ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1728
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1729
                                        [ currentIndex > 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1730
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1731
                                                        [ (self inputKey copyFrom: 1 to: 2) = 'MC'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1732
                                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1733
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1734
                                                                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1735
                                                                                                addSecondaryTranslation: 'K' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1736
                                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1737
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1738
                                                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1739
                                                                                                addSecondaryTranslation: 'K' ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1740
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1741
                                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1742
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1743
                                                                addSecondaryTranslation: 'X' ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1744
                                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1745
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1746
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1747
        "v"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1748
        (self keyAt: currentIndex+1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1749
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1750
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1751
                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1752
                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1753
                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1754
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1755
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1756
        "vi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1757
        (self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1758
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1759
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1760
                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1761
                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1762
                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1763
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1764
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1765
        "vii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1766
        ((self keyAt: currentIndex+1) = $C
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1767
                        and: [ ((currentIndex = 2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1768
                                and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1769
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1770
                        [ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1771
                                        and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1772
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1773
                                        [ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1774
                                                        or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1775
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1776
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1777
                                                                addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1778
                                                                addSecondaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1779
                                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1780
                                                                ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1781
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1782
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1783
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1784
                                                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1785
                                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1786
                                                                ^self ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1787
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1788
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1789
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1790
                                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1791
                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1792
                                                ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1793
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1794
        "viii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1795
        (#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1796
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1797
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1798
                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1799
                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1800
                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1801
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1802
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1803
        "ix"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1804
        (#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1805
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1806
                        [ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1807
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1808
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1809
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1810
                                                addSecondaryTranslation: 'X' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1811
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1812
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1813
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1814
                                                addSecondaryTranslation: 'S'].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1815
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1816
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1817
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1818
        "x"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1819
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1820
                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1821
                addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1822
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1823
        "xi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1824
        (#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1825
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1826
                        [ self incrementSkipCount: 2 ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1827
                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1828
                        [ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1829
                                        and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1830
                                ifTrue: [ self incrementSkipCount: 1] ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1831
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1832
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1833
processCedille 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1834
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1835
		addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1836
		addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1837
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1838
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1839
processD
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1840
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1841
        (self keyAt: currentIndex+1) = $G
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1842
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1843
                        [ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1844
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1845
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1846
                                                addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1847
                                                addSecondaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1848
                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1849
                                        ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1850
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1851
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1852
                                                addPrimaryTranslation: 'TK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1853
                                                addSecondaryTranslation: 'TK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1854
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1855
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1856
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1857
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1858
        (#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1859
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1860
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1861
                                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1862
                                addSecondaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1863
                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1864
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1865
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1866
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1867
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1868
                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1869
                addSecondaryTranslation: 'T'
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1870
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1871
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1872
processF
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1873
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1874
		addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1875
		addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1876
	(self keyAt: self currentIndex+1) = $F
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1877
		ifTrue: [ self incrementSkipCount: 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1878
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1879
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1880
processG
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1881
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1882
        case 'G':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1883
                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1884
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1885
        | word |
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1886
        (self keyAt: currentIndex + 1) = $H
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1887
        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1888
                "if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1889
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1890
                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1891
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1892
              " {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1893
                   MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1894
                   current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1895
                   break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1896
                }"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1897
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1898
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1899
                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1900
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1901
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1902
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1903
                "if(current < 3)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1904
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1905
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1906
                currentIndex < 4 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1907
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1908
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1909
                        " //'ghislane', ghiradelli
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1910
               if(current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1911
               { "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1912
                        currentIndex = 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1913
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1914
                                "if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1915
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1916
                                (self keyAt: currentIndex + 2) = $I
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1917
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1918
                                        "MetaphAdd(J);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1919
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1920
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1921
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1922
                                        "MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1923
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1924
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1925
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1926
                                "  current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1927
                                break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1928
                                ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1929
                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1930
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1931
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1932
                " //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1933
                if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1934
                //e.g., 'bough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1935
                OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1936
                //e.g., 'broughton'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1937
                OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1938
         "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1939
                (((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1940
                or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])  
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1941
                or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1942
                ifTrue: [                         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1943
                        "current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1944
                        break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1945
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1946
                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1947
                        " //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1948
               if((current > 2) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1949
               AND (GetAt(current - 1) == 'U') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1950
               AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1951
                        (currentIndex > 3 and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1952
                                ((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1953
                                        #($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1954
                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1955
                        ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1956
                                "MetaphAdd(F);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1957
                                self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1958
                                addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1959
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1960
                                " if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1961
                    MetaphAdd(K);"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1962
                                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1963
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1964
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1965
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1966
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1967
                        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1968
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1969
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1970
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1971
                "if(GetAt(current + 1) == 'N')"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1972
          (self keyAt: currentIndex + 1) = $N
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1973
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1974
                        "if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1975
                        (currentIndex = 2 and: [(self inputKey at: 1) isVowel and: [(self isSlavoGermanic: self inputKey) not]])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1976
               ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1977
                                "MetaphAdd(KN, N);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1978
                                self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1979
                                addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1980
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1981
                                " //not e.g. 'cagney'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1982
                                if(!!StringAt((current + 2), 2, EY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1983
                                AND (GetAt(current + 1) !!= 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1984
                                AND !!SlavoGermanic())"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1985
                                ((self inputKey size >= (currentIndex + 2)) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1986
                                        (self inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: self inputKey size)) ~= 'EY' and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1987
                                                (self keyAt: currentIndex + 1) ~= $Y and: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1988
                                                        (self isSlavoGermanic: self inputKey) not
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1989
                                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1990
                                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1991
                                ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1992
                                        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1993
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1994
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1995
                                        self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1996
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1997
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1998
                        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1999
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2000
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2001
                " //'tagliaro'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2002
                if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2003
                ((self inputKey size >= (currentIndex + 3)) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2004
                        (self inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2005
                                (self isSlavoGermanic: self inputKey) not]])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2006
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2007
                        self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2008
                        addSecondaryTranslation: 'L'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2009
                        ^self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2010
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2011
                " //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2012
                if((current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2013
                AND ((GetAt(current + 1) == 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2014
                OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2015
                (currentIndex = 1 and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2016
                        ((self keyAt: currentIndex + 1) = $Y) or: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2017
                        (#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes: 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2018
                                (self inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2019
                ]]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2020
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2021
                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2022
                        ^self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2023
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2024
                " // -ger-,  -gy-
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2025
                if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2026
                AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2027
                AND !!StringAt((current - 1), 1, E, I, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2028
                AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2029
                "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2030
          (((self inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: self inputKey size)) = 'ER' or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2031
                                ((self keyAt: currentIndex + 1) = $Y)]) 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2032
                        and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := self inputKey copyFrom: 1 to: (6 min: self inputKey size))) not)
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2033
                                and: [(self keyAt: currentIndex - 1) ~= $E
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2034
                                        and: [(#('RGY' 'OGY') includes: (self inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2035
                 ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2036
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2037
                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2038
                        ^self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2039
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2040
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2041
          " // italian e.g, 'biaggi'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2042
           if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2043
           "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2044
                ((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (self inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: self inputKey size)))])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2045
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2046
                        " //obvious germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2047
                                        if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2048
                                                OR StringAt((current + 1), 2, ET, ))                                                MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2049
                        word := (self inputKey copyFrom: 1 to: 4).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2050
                        ((#('VAN ' 'VON ') includes: word) or: [(word copyFrom: 1 to: 3) = 'SCH' or: [(word copyFrom: 1 to: 2) = 'ET']]) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2051
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2052
                                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2053
                                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2054
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2055
                            " //always soft if french ending
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2056
                                                if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2057
                                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2058
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2059
                                                        MetaphAdd(J, K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2060
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2061
                                        break;"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2062
                                (((self inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: self inputKey size)), '    ') copyFrom: 1 to: 4) = 'IER '
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2063
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2064
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2065
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2066
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2067
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2068
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2069
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2070
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2071
                        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2072
                        ^self incrementSkipCount: 1.       
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2073
                ].                      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2074
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2075
        " if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2076
             current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2077
         else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2078
             current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2079
         MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2080
            break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2081
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2082
                (self keyAt: (currentIndex + 1)) = $G
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2083
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2084
                        self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2085
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2086
                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2087
                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2088
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2089
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2090
processH
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2091
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2092
        case 'H':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2093
                                //only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2094
                                if(((current == 0) OR IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2095
                                        AND IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2096
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2097
                                        MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2098
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2099
                                }else//also takes care of 'HH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2100
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2101
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2102
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2103
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2104
        (((currentIndex = 1) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2105
                or: [ (self keyAt: currentIndex - 1) isVowel]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2106
        and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2107
        ifTrue: [               
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2108
                self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2109
                addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2110
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2111
        ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2112
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2113
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2114
processJ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2115
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2116
        case 'J':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2117
                                //obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2118
                                if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2119
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2120
                                        if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2121
                                                MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2122
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2123
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2124
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2125
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2126
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2127
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2128
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2129
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2130
                                if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2131
                                        MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2132
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2133
                                        //spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2134
                                        if(IsVowel(current - 1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2135
                                                AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2136
                                                        AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2137
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2138
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2139
                                                if(current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2140
                                                        MetaphAdd(J,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2141
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2142
                                                        if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2143
                                                                        AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2144
                                                                MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2145
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2146
                                if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2147
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2148
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2149
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2150
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2151
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2152
        | currentWord firstWord nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2153
        currentWord := self inputKey copyFrom: currentIndex to: (currentIndex + 3 min: self inputKey size).
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2154
        firstWord := self inputKey copyFrom: 1 to: (4 min: self inputKey size).
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2155
        nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2156
        (currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2157
        ifTrue: [       
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2158
                ((currentIndex = 1 and: [self inputKey size = 4 or: [self inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2159
                        or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2160
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2161
                        self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2162
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2163
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2164
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2165
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2166
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2167
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2168
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2169
        (currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2170
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2171
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2172
                addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2173
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2174
                ((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
3489
6ef5f530df03 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3488
diff changeset
  2175
                and: [(self isSlavoGermanic: self inputKey) not and: [nextLetter == $A or: [nextLetter == $O]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2176
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2177
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2178
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2179
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2180
                        currentIndex = self inputKey size 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2181
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2182
                                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2183
                                addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2184
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2185
                                ((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2186
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2187
                                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2188
                                        addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2189
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2190
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2191
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2192
        ].
3489
6ef5f530df03 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3488
diff changeset
  2193
        nextLetter == $J
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2194
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2195
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2196
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2197
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2198
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2199
processK
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2200
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2201
        case 'K':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2202
                                if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2203
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2204
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2205
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2206
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2207
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2208
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2209
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2210
        (self keyAt: currentIndex + 1) = $K
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2211
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2212
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2213
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2214
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2215
        addSecondaryTranslation: 'K'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2216
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2217
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2218
processL
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2219
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2220
"case 'L':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2221
                                if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2222
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2223
                                        //spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2224
                                        if(((current == (length - 3)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2225
                                                AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2226
                                                         OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, )) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2227
                                                                AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2228
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2229
                                                MetaphAdd(L,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2230
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2231
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2232
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2233
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2234
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2235
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2236
                                MetaphAdd(L);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2237
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2238
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2239
        | currentWord |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2240
        (self keyAt: currentIndex + 1) = $L 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2241
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2242
                (((currentIndex = (self inputKey size - 2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2243
                and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := self inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: self inputKey size))])])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2244
                or: [((#('AS' 'OS') includes: (self inputKey copyFrom: self inputKey size - 1 to: self inputKey size)) or: [#($A $O) includes: (self keyAt: self inputKey size)]) and: [currentWord = 'ALLE']
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2245
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2246
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2247
                        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2248
                        addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2249
                        ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2250
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2251
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2252
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2253
        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2254
        addSecondaryTranslation: 'L'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2255
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2256
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2257
processM
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2258
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2259
"case 'M':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2260
                                if((StringAt((current - 1), 3, UMB, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2261
                                        AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2262
                                                //'dumb','thumb'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2263
                                                OR  (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2264
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2265
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2266
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2267
                                MetaphAdd(M);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2268
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2269
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2270
        (((currentIndex > 1 and: [(self inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: self inputKey size)) = 'UMB'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2271
                and: [currentIndex + 1 = self inputKey size or: [(self inputKey copyFrom: (currentIndex + 2 min: self inputKey size) to: (currentIndex + 4 min: self inputKey size)) = 'ER']])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2272
                or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2273
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2274
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2275
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2276
                self addPrimaryTranslation: 'M';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2277
                addSecondaryTranslation: 'M'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2278
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2279
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2280
processN
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2281
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2282
        case 'N':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2283
                                if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2284
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2285
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2286
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2287
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2288
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2289
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2290
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2291
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2292
        (self keyAt: currentIndex + 1) = $N
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2293
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2294
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2295
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2296
        self addPrimaryTranslation: 'N';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2297
        addSecondaryTranslation: 'N'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2298
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2299
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2300
processNtilde
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2301
        "case 'Ñ':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2302
                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2303
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2304
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2305
        "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2306
        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2307
        addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2308
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2309
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2310
processP
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2311
        "case 'P':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2312
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2313
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2314
                                        MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2315
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2316
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2317
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2318
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2319
                                //also account for campbell, raspberry
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2320
                                if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2321
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2322
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2323
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2324
                                        MetaphAdd(P);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2325
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2326
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2327
        | nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2328
        (nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2329
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2330
                self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2331
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2332
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2333
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2334
        (#($P $B) includes: nextLetter)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2335
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2336
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2337
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2338
                self addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2339
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2340
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2341
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2342
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2343
processQ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2344
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2345
        case 'Q':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2346
                                if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2347
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2348
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2349
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2350
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2351
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2352
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2353
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2354
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2355
        (self keyAt: currentIndex + 1) = $Q
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2356
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2357
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2358
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2359
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2360
        addSecondaryTranslation: 'K'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2361
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2362
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2363
processR
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2364
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2365
        case 'R':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2366
                                //french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2367
                                if((current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2368
                                        AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2369
                                                AND StringAt((current - 2), 2, IE, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2370
                                                        AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2371
                                        MetaphAdd(, R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2372
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2373
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2374
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2375
                                if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2376
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2377
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2378
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2379
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2380
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2381
        (currentIndex = self inputKey size and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2382
                (self isSlavoGermanic: self inputKey) not and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2383
                        (self inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2384
                                (#('ME' 'MA') includes: (self inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2385
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2386
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2387
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2388
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2389
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2390
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2391
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2392
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2393
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2394
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2395
        (self keyAt: currentIndex + 1) = $R
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2396
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2397
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2398
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2399
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2400
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2401
processRemainingCharacters
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2402
    self startIndex to: self inputKey size do:[ :i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2403
        | c methodSelector |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2404
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2405
        self skipCount = 0 ifTrue:[ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2406
            ((self primaryTranslation size > 4) and: [ self secondaryTranslation size > 4 ])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2407
                ifTrue: [ ^self ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2408
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2409
            self currentIndex: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2410
            c := self keyAt: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2411
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2412
            (c isVowel not and: [c ~= $Y]) ifTrue:[ 
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
  2413
                c == $Ç ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2414
                    methodSelector := #processCedille 
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
  2415
                ] ifFalse: [ c == $Ñ ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2416
                    methodSelector := #processNtilde 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2417
                ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2418
                    methodSelector := ('process', c asString) asSymbol 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2419
                ]].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2420
                self perform: methodSelector 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2421
            ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2422
        ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2423
            self decrementSkipCount 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2424
        ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2425
    ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2426
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2427
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2428
processS
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2429
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2430
        case 'S':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2431
                                //special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2432
                                if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2433
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2434
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2435
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2436
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2437
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2438
                                //special case 'sugar-'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2439
                                if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2440
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2441
                                        MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2442
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2443
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2444
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2445
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2446
                                if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2447
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2448
                                        //germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2449
                                        if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2450
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2451
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2452
                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2453
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2454
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2455
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2456
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2457
                                //italian & armenian
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2458
                                if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2459
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2460
                                        if(!!SlavoGermanic())
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2461
                                                MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2462
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2463
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2464
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2465
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2466
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2467
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2468
                                //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2469
                                //also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2470
                                if(((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2471
                                                AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2472
                                                        OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2473
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2474
                                        MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2475
                                        if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2476
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2477
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2478
                                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2479
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2480
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2481
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2482
                                if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2483
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2484
                                        //Schlesinger's rule
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2485
                                        if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2486
                                                //dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2487
                                                if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2488
                                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2489
                                                        //'schermerhorn', 'schenker'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2490
                                                        if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2491
                                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2492
                                                                MetaphAdd(X, SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2493
                                                        }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2494
                                                                MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2495
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2496
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2497
                                                }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2498
                                                        if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2499
                                                                MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2500
                                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2501
                                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2502
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2503
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2504
                                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2505
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2506
                                        if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2507
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2508
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2509
                                                current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2510
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2511
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2512
                                        //else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2513
                                        MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2514
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2515
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2516
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2517
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2518
                                //french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2519
                                if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2520
                                        MetaphAdd(, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2521
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2522
                                        MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2523
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2524
                                if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2525
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2526
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2527
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2528
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2529
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2530
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2531
        | nextChar char2 chars char |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2532
        (#('ISL' 'YSL') includes: (self inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: self inputKey size))) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2533
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2534
                ^self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2535
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2536
        (currentIndex = 1 and: [(self inputKey copyFrom: 1 to: (5 min: self inputKey size)) = 'SUGAR'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2537
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2538
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2539
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2540
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2541
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2542
        (self inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: self inputKey size)) = 'SH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2543
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2544
                (#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (self inputKey copyFrom: (currentIndex + 1 min: self inputKey size) to: ((currentIndex + 5) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2545
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2546
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2547
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2548
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2549
                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2550
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2551
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2552
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2553
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2554
        ((#('SIO' 'SIA') includes: (self inputKey copyFrom: currentIndex to: (currentIndex + 2 min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2555
                or: [(self inputKey copyFrom: currentIndex to: (currentIndex + 3 min: self inputKey size)) = 'SIAN'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2556
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2557
                (self isSlavoGermanic: self inputKey) not
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2558
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2559
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2560
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2561
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2562
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2563
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2564
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2565
                ^self incrementSkipCount: 2
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2566
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2567
        ((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2568
                or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2569
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2570
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2571
                addSecondaryTranslation: 'X'.
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
  2572
                nextChar == $Z
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2573
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2574
                        ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2575
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2576
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2577
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2578
        ((self inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: self inputKey size)) = 'SC')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2579
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2580
                (char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2581
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2582
                        (#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := self inputKey copyFrom: ((currentIndex + 3) min: self inputKey size) to: ((currentIndex + 4) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2583
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2584
                                (#('ER' 'EN') includes: chars)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2585
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2586
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2587
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2588
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2589
                                        self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2590
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2591
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2592
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2593
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2594
                                ((currentIndex = 1 and: [(char := self inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2595
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2596
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2597
                                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2598
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2599
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2600
                                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2601
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2602
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2603
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2604
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2605
                        (#($I $E $Y) includes: char2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2606
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2607
                                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2608
                                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2609
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2610
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2611
                                self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2612
                                addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2613
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2614
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2615
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2616
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2617
        (currentIndex = self inputKey size and: [(#('AI' 'OI') includes: (self inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2618
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2619
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2620
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2621
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2622
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2623
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2624
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2625
        (#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2626
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2627
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2628
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2629
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2630
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2631
processT
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2632
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2633
        case 'T':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2634
                                if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2635
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2636
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2637
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2638
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2639
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2640
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2641
                                if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2642
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2643
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2644
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2645
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2646
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2647
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2648
                                if(StringAt(current, 2, TH, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2649
                                        OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2650
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2651
                                        //special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2652
                                        if(StringAt((current + 2), 2, OM, AM, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2653
                                                OR StringAt(0, 4, VAN , VON , ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2654
                                                        OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2655
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2656
                                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2657
                                        }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2658
                                                MetaphAdd(0, T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2659
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2660
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2661
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2662
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2663
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2664
                                if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2665
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2666
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2667
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2668
                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2669
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2670
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2671
        ((self inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: self inputKey size)) = 'TION')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2672
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2673
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2674
                addSecondaryTranslation: 'X'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2675
                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2676
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2677
        (#('TIA' 'TCH') includes: (self inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2678
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2679
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2680
                addSecondaryTranslation: 'X'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2681
                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2682
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2683
        (((self inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: self inputKey size)) = 'TH') or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2684
                ((self inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: self inputKey size)) = 'TTH')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2685
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2686
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2687
                ((#('OM' 'AM') includes: (self inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2688
                or: [(#('VAN ' 'VON ') includes: (self inputKey copyFrom: 1 to: (4 min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2689
                        or: [(self inputKey copyFrom: 1 to: (3 min: self inputKey size)) = 'SCH']
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2690
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2691
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2692
                        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2693
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2694
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2695
                        self addPrimaryTranslation: '0';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2696
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2697
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2698
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2699
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2700
        (#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2701
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2702
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2703
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2704
        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2705
        addSecondaryTranslation: 'T'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2706
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2707
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2708
processV
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2709
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2710
        case 'V':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2711
                                if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2712
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2713
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2714
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2715
                                MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2716
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2717
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2718
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2719
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2720
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2721
        (self keyAt: currentIndex + 1) = $V
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2722
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2723
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2724
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2725
        self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2726
        addSecondaryTranslation: 'F'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2727
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2728
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2729
processW
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2730
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2731
        case 'W':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2732
                                //can also be in middle of word
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2733
                                if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2734
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2735
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2736
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2737
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2738
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2739
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2740
                                if((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2741
                                        AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2742
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2743
                                        //Wasserman should match Vasserman
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2744
                                        if(IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2745
                                                MetaphAdd(A, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2746
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2747
                                                //need Uomo to match Womo
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2748
                                                MetaphAdd(A);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2749
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2750
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2751
                                //Arnow should match Arnoff
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2752
                                if(((current == last) AND IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2753
                                        OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2754
                                                        OR StringAt(0, 3, SCH, ))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2755
                                  {
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2756
                                        MetaphAdd(, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2757
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2758
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2759
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2760
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2761
                                //polish e.g. 'filipowicz'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2762
                                if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2763
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2764
                                        MetaphAdd(TS, FX);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2765
                                        current +=4;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2766
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2767
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2768
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2769
                                //else skip it
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2770
                                current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2771
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2772
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2773
        | word nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2774
        ((word := self inputKey copyFrom: currentIndex to: (currentIndex + 1 min: self inputKey size)) = 'WR')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2775
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2776
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2777
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2778
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2779
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2780
        ((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2781
                word = 'WH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2782
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2783
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2784
                nextLetter isVowel
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2785
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2786
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2787
                        addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2788
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2789
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2790
                        addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2791
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2792
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2793
        ((((currentIndex = self inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2794
                or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (self inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: self inputKey size))])
4184
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  2795
                        or: [self inputKey startsWith:'SCH'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2796
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2797
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2798
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2799
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2800
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2801
        (#('WICZ' 'WITZ') includes: (self inputKey copyFrom: currentIndex to: (currentIndex + 4 min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2802
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2803
                self addPrimaryTranslation: 'TS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2804
                addSecondaryTranslation: 'FX'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2805
                ^self incrementSkipCount: 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2806
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2807
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2808
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2809
processX
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2810
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2811
        case 'X':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2812
                                //french e.g. breaux
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2813
                                if(!!((current == last) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2814
                                        AND (StringAt((current - 3), 3, IAU, EAU, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2815
                                                        OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2816
                                        MetaphAdd(KS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2817
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2818
                                if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2819
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2820
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2821
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2822
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2823
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2824
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2825
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2826
        ((currentIndex = self inputKey size) 
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2827
        and: [(#('IAU' 'EAU') includes: (self inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex)) 
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2828
              or: [(#('AU' 'OU') includes: (self inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]]) 
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2829
        ifFalse: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2830
                self addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2831
                addSecondaryTranslation: 'KS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2832
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2833
        (#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2834
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2835
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2836
        ]
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2837
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2838
    "Modified: / 24-07-2011 / 06:54:25 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2839
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2840
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2841
processZ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2842
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2843
        case 'Z':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2844
                                //chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2845
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2846
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2847
                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2848
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2849
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2850
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2851
                                        if(StringAt((current + 1), 2, ZO, ZI, ZA, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2852
                                                OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2853
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2854
                                                MetaphAdd(S, TS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2855
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2856
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2857
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2858
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2859
                                if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2860
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2861
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2862
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2863
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2864
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2865
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2866
        (self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2867
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2868
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2869
                addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2870
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2871
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2872
                ((#('ZO' 'ZI' 'ZA') includes: (self inputKey copyFrom: ((currentIndex + 1) min: self inputKey size) to: ((currentIndex + 2) min: self inputKey size))) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2873
                        (self isSlavoGermanic: self inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2874
                ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2875
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2876
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2877
                        addSecondaryTranslation: 'TS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2878
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2879
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2880
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2881
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2882
                (self keyAt: currentIndex + 1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2883
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2884
                        ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2885
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2886
        ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2887
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2888
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2889
!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2890
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2891
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2892
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2893
    Miracode (also called American Soundex) is like Soundex with the addition that h and w are 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2894
    discarded if they separate consonants.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2895
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2896
    These variants may be specifically important because they were used in U.S. National Archives. 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2897
    Most archive data were encoded with Miracode, but there are some entries encoded with 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2898
    Simplified Soundex. 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2899
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2900
    The HW-rule was documented as a standard in 1910, but actually data of 1880, 1900 and 1910 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2901
    censuses were encoded with mixed methods.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2902
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2903
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2904
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2905
!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2906
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2907
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2908
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2909
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2910
    u := aString asUppercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2911
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2912
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2913
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2914
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2915
        (t notNil 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2916
        and:[ t ~= '0' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2917
        and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2918
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2919
            p size == 4 ifTrue:[^ Array with:p ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2920
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2921
        (c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2922
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2923
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2924
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2925
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2926
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2927
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2928
    ^ Array with:(p copyFrom:1 to:4)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2929
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2930
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2931
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2932
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2933
version
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  2934
    ^ '$Header$'
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2935
!
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2936
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2937
version_CVS
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  2938
    ^ '$Header$'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2939
! !
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  2940