PhoneticStringUtilities.st
author Claus Gittinger <cg@exept.de>
Thu, 01 Oct 2009 10:46:37 +0200
changeset 2285 0527d18cfec9
parent 2215 c1f072a78366
child 2445 d55a3b1e8791
permissions -rw-r--r--
changed: #documentation
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
	instanceVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
	classVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
	poolDictionaries:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    22
Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    23
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    24
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    25
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    26
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    27
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    28
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    29
PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    30
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    31
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    32
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    33
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    34
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    35
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    36
PhoneticStringUtilities::PhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    37
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    38
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    39
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    40
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    41
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    42
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    43
PhoneticStringUtilities::PhoneticStringComparator subclass:#SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    44
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    45
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    46
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    47
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    48
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    49
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    50
PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    51
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    52
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    53
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    54
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    55
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    56
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    57
Object subclass:#NYSIISStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    58
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    59
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    60
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    61
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    62
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    63
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    64
PhoneticStringUtilities::PhoneticStringComparator subclass:#PhonemStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    65
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    66
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    67
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    68
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    69
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    70
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    71
PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    72
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    73
		currentIndex skipCount'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    74
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    75
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    76
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    77
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    78
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    79
PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    80
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    81
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    82
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    83
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    84
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    85
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
copyright
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
documentation
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
"
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
   105
    Utilities which are helpful to perform phonetic string searches or comparisons
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
   106
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   107
    soundexCode
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   108
        this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   109
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   110
    nysiis
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   111
        a modified soundex algorithm
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   112
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   113
    miracode
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   114
        another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   115
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   116
    mySQLSoundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   117
        another modified soundex algorithm used in mySQL.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   118
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   119
    koelner phoneticCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   120
        provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   121
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   122
    Double metaphone 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   123
        works with most european languages.
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   124
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   125
    phonem
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   126
        described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   127
        from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   128
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   129
    More info for german readers is found in:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   130
        http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   131
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   132
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   133
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   134
sampleData
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   135
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   136
    for the 50 most common german names, we get:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   137
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   138
                            ext. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   139
    name        soundex   soundex   metaphone   phonet  phonet2     phonix      daitsch phonem      koeln
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   140
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   141
    müller      M460    54600000    MLR         MÜLA    NILA        M4000000    689000  MYLR        657
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   142
    schmidt     S253    25300000    SKMTT       SHMIT   ZNIT        S5300000    463000  CMYD        8628
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   143
    schneider   S253    25360000    SKNTR       SHNEIDA ZNEITA      S5300000    463900  CNAYDR      8627
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   144
    fischer     F260    12600000    FSKR        FISHA   FIZA        F8000000    749000  VYCR        387
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   145
    weber       W160    16000000    WBR         WEBA    FEBA        $1000000    779000  VBR         317
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   146
    meyer       M600    56000000    MYR         MEIA    NEIA        M0000000    619000  MAYR        67
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   147
    wagner      W256    25600000    WKNR        WAKNA   FAKNA       $2500000    756900  VACNR       367
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   148
    schulz      S242    24200000    SKLS        SHULS   ZULZ        S4800000    484000  CULC        85
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   149
    becker      B260    12600000    BKR         BEKA    BEKA        B2000000    759000  BCR         147
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   150
    hoffmann    H155    15500000    HFMN        HOFMAN  UFNAN       $7550000    576600  OVMAN       036
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   151
    schäfer     S216    21600000    SKFR        SHEFA   ZEFA        S7000000    479000  CVR         837
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   154
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
    "return a koelner phonetic code.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
     The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
     it returns simular strings for similar sounding words. 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
     There are some differences to soundex, though: 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
        its length is not limited to 4, but depends on the length of the original string;
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   163
        it does not start with the first character of the input.
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   164
     This algorithm is described by Postel 1969"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   166
    ^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
     #(
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
        'Müller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
        'Miller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
        'Mueller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
        'Mühler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
        'Mühlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
        'Mülherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
        'Myler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
        'Millar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
        'Myller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
        'Müllar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
        'Müler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
        'Muehler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
        'Mülller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
        'Müllerr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
        'Muehlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
        'Muellar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
        'Mueler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
        'Mülleer'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
        'Mueller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
        'Nüller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
        'Nyller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
        'Niler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
        'Czerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
        'Tscherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   194
        'Czernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   195
        'Tschernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
        'Schernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
        'Scherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
        'Scherno'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
        'Czerne'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
        'Zerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
        'Tzernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
        'Breschnew'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
     ) do:[:w |
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
     ].
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
    "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   209
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   210
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   211
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   212
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   213
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   214
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   215
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   216
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   217
!
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   218
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   219
mySQLSoundexCodeOf:aString
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   220
    "return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   221
     (no 4char limitation; different order of duplicate vowel vs. duplicate code elimination)"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   222
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   223
    ^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   224
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   225
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   226
     #(
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   227
        'Müller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   228
        'Miller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   229
        'Mueller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   230
        'Mühler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   231
        'Mühlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   232
        'Mülherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   233
        'Myler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   234
        'Millar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   235
        'Myller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   236
        'Müllar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   237
        'Müler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   238
        'Muehler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   239
        'Mülller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   240
        'Müllerr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   241
        'Muehlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   242
        'Muellar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   243
        'Mueler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   244
        'Mülleer'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   245
        'Mueller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   246
        'Nüller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   247
        'Nyller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   248
        'Niler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   249
        'Czerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   250
        'Tscherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   251
        'Czernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   252
        'Tschernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   253
        'Schernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   254
        'Scherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   255
        'Scherno'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   256
        'Czerne'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   257
        'Zerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   258
        'Tzernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   259
        'Breschnew'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   260
     ) do:[:w |
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   261
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   262
     ].
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   263
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   264
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   265
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   266
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   267
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   268
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   269
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   270
     PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   271
     PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   272
     PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
soundexCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
    "return a soundex phonetic code or nil.
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   278
     Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
     tool when searching for words where the correct spelling is unknown.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
     (read Knuth or search the web if you dont know what a soundex code is).
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
     Caveat: 'similar sounding words' means: 'similar sounding in english'."
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   282
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   283
    ^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   284
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   285
"/ old code - now use code in private class...
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   286
"/    |inStream codeStream ch last lch codeLength codes code lastCode|
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   287
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   288
"/    inStream := aString readStream.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   289
"/    inStream skipSeparators.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   290
"/    inStream atEnd ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   291
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   292
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   293
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   294
"/    ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   295
"/    ch isLetter ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   296
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   297
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   298
"/    codeLength := 0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   299
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   300
"/    codes := Dictionary new.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   301
"/    codes atAll:'bpfv'     put:$1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   302
"/    codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   303
"/    codes atAll:'dt'       put:$3.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   304
"/    codes atAll:'l'        put:$4.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   305
"/    codes atAll:'nm'       put:$5.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   306
"/    codes atAll:'r'        put:$6.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   307
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   308
"/    codeStream := WriteStream on:(String new:4).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   309
"/    codeStream nextPut:(ch asUppercase).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   310
"/    last := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   311
"/    lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   312
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   313
"/    [inStream atEnd] whileFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   314
"/        ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   315
"/        lch := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   316
"/        lch = last ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   317
"/            last := lch.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   318
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   319
"/            code := codes at:lch ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   320
"/            (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   321
"/                codeLength < 3 ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   322
"/                    codeStream nextPut:code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   323
"/                    codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   324
"/                    codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   325
"/                ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   326
"/            ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   327
"/            lastCode := code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   328
"/        ]
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   329
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   330
"/    [ codeLength < 3 ] whileTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   331
"/        codeStream nextPut:$0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   332
"/        codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   333
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   334
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   335
"/    ^ codeStream contents
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   336
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   337
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   338
     PhoneticStringUtilities soundexCodeOf:'claus'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   339
     PhoneticStringUtilities soundexCodeOf:'clause'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   340
     PhoneticStringUtilities soundexCodeOf:'close'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   341
     PhoneticStringUtilities soundexCodeOf:'smalltalk' 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   342
     PhoneticStringUtilities soundexCodeOf:'smaltalk'  
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   343
     PhoneticStringUtilities soundexCodeOf:'smaltak'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   344
     PhoneticStringUtilities soundexCodeOf:'smaltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   345
     PhoneticStringUtilities soundexCodeOf:'smoltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   346
     PhoneticStringUtilities soundexCodeOf:'aa'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   347
     PhoneticStringUtilities soundexCodeOf:'by'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   348
     PhoneticStringUtilities soundexCodeOf:'bab'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   349
     PhoneticStringUtilities soundexCodeOf:'bob'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   350
     PhoneticStringUtilities soundexCodeOf:'bop'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   351
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   352
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   353
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   354
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   355
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   356
defaultClass
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   357
	^SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   358
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   359
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   360
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   361
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   362
new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   363
    ^ self basicNew initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   364
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   365
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   366
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   367
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   368
does:aString soundLike:anotherString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   369
    |translations1 translations2|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   370
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   371
    translations1 := self phoneticStringsFor:aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   372
    translations2 := self phoneticStringsFor:anotherString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   373
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   374
    ^ translations1 contains:[:t1 | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   375
        translations2 contains:[:t2 | t1 = t2]]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   376
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   377
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   378
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   379
            does:'miller' soundLike:'miler'.   
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   380
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   381
            does:'miller' soundLike:'milner'.   
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   382
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   383
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   384
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   385
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   386
    "Should answer an array of alternate phonetic strings for the given input string."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   387
    self subclassResponsibility
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   388
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   389
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   390
     (PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   391
            phoneticStringsFor:'miller') first      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   392
     'miller' asSoundexCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   393
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   394
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   395
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   396
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   397
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   398
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   399
    "Invoked when a new instance is created."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   400
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   401
    "/ please change as required (and remove this comment)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   402
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   403
    "/ super initialize.   -- commented since inherited method does nothing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   404
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   405
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   406
!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   407
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   408
documentation
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   409
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   410
    There are many extended and enhanced soundex variants around;
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   411
    here is one, called 'extended soundex'. It is destribed for example in
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   412
    http://www.epidata.dk/documentation.php.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   413
    An author or origin is unknown.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   414
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   415
    The number of digits is increased to 5 or 8;
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   416
    The first character is not used literally; instead it is encoded like the rest.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   417
    This might have a negative effect on names starting with a vovel, though.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   418
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   419
    Overall, it can be doubted if this is really an enhancement after all.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   420
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   421
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   422
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   423
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   424
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   425
phoneticStringsFor:aString
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   426
    "generates both an extended soundex of length 5 and one of length 8"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   427
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   428
    |first second u t prevCode|
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   429
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   430
    u := aString asUppercase.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   431
    first := second := ''.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   432
    u do:[:c | 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   433
        t := self translate:c.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   434
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   435
            first := first , t.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   436
            second := second , t.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   437
            second size == 8 ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   438
                ^ Array with:(first copyTo:5) with:second 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   439
            ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   440
        ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   441
        prevCode := t
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   442
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   443
    [ first size < 5 ] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   444
        first := first , '0'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   445
        second := second , '0'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   446
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   447
    [ second size < 8 ] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   448
        second := second , '0'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   449
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   450
    ^ Array with:first with:second
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   451
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   452
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   453
     self basicNew phoneticStringsFor:'müller'  #('87900' '87900000')  
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   454
     self basicNew phoneticStringsFor:'miller'  #('87900' '87900000')   
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   455
     self basicNew phoneticStringsFor:'muller'  #('87900' '87900000')    
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   456
     self basicNew phoneticStringsFor:'muler'   #('87900' '87900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   457
     self basicNew phoneticStringsFor:'schmidt'    #('38600' '38600000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   458
     self basicNew phoneticStringsFor:'schneider'  #('38690' '38690000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   459
     self basicNew phoneticStringsFor:'fischer'    #('23900' '23900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   460
     self basicNew phoneticStringsFor:'weber'      #('19000' '19000000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   461
     self basicNew phoneticStringsFor:'meyer'      #('89000' '89000000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   462
     self basicNew phoneticStringsFor:'wagner'     #('48900' '48900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   463
     self basicNew phoneticStringsFor:'schulz'     #('37500' '37500000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   464
     self basicNew phoneticStringsFor:'becker'     #('13900' '13900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   465
     self basicNew phoneticStringsFor:'hoffmann'   #('28800' '28800000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   466
     self basicNew phoneticStringsFor:'schäfer'    #('32900' '32900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   467
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   468
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   469
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   470
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   471
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   472
translate:aCharacter
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   473
    "use simple if's for more speed when compiled"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   474
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   475
    "vowels serve as separators"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   476
    aCharacter == $A ifTrue:[^ '0' ].         
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   477
    aCharacter == $E ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   478
    aCharacter == $I ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   479
    aCharacter == $O ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   480
    aCharacter == $U ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   481
    aCharacter == $Y ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   482
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   483
    aCharacter == $B ifTrue:[^ '1' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   484
    aCharacter == $P ifTrue:[^ '1' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   485
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   486
    aCharacter == $F ifTrue:[^ '2' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   487
    aCharacter == $V ifTrue:[^ '2' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   488
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   489
    aCharacter == $C ifTrue:[^ '3' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   490
    aCharacter == $S ifTrue:[^ '3' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   491
    aCharacter == $K ifTrue:[^ '3' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   492
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   493
    aCharacter == $G ifTrue:[^ '4' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   494
    aCharacter == $J ifTrue:[^ '4' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   495
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   496
    aCharacter == $Q ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   497
    aCharacter == $X ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   498
    aCharacter == $Z ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   499
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   500
    aCharacter == $D ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   501
    aCharacter == $G ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   502
    aCharacter == $T ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   503
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   504
    aCharacter == $L ifTrue:[^ '7' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   505
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   506
    aCharacter == $M ifTrue:[^ '8' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   507
    aCharacter == $N ifTrue:[^ '8' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   508
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   509
    aCharacter == $R ifTrue:[^ '9' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   510
    ^ nil
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   511
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   512
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   513
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   514
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   515
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   516
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   517
     The koelnerPhonetic code is for the german language what the soundex code is for english.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   518
     It returns simular strings for similar sounding words. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   519
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   520
     There are some differences to soundex, though: 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   521
        its length is not limited to 4, but depends on the length of the original string;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   522
        it does not start with the first character of the input.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   523
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   524
     This algorithm was described by Postel 1969
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   525
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   526
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   527
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   528
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   529
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   530
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   531
    "return a koelner phonetic code.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   532
     The koelnerPhonetic code is for the german language what the soundex code is for english;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   533
     it returns simular strings for similar sounding words. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   534
     There are some differences to soundex, though: 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   535
        its length is not limited to 4, but depends on the length of the original string;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   536
        it does not start with the first character of the input.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   537
     This algorithm is described by Postel 1969"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   538
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   539
    |in ret val rslt|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   540
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   541
    in := aString withoutSeparators asLowercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   542
    in := in copyReplaceString:'ph' withString:'f'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   543
    in := in copyReplaceAll:$ü withAll:'u'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   544
    in := in copyReplaceAll:$ä withAll:'a'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   545
    in := in copyReplaceAll:$ö withAll:'o'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   546
    in := in copyReplaceAll:$ß withAll:'ss'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   547
    in := '#',in,'#'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   548
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   549
    ret := ''.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   550
    1 to:in size-2 do:[:i |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   551
        |sub|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   552
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   553
        sub := in copyFrom:i to:i+2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   554
        val := (i==1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   555
                    ifTrue:[ self convertFirst:sub ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   556
                    ifFalse:[ self convertRest:sub ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   557
        ret := ret,val
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   558
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   559
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   560
    ret := ret select:[:ch | ch ~= $-].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   561
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   562
    (ret startsWith:'0') ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   563
        ret := '0',(ret select:[:ch | ch ~= $0]).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   564
    ] ifFalse:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   565
        ret := ret select:[:ch | ch ~= $0].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   566
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   567
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   568
    rslt := String streamContents:[:s |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   569
        |prev|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   570
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   571
        ret do:[:ch |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   572
            ch ~= prev ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   573
                s nextPut:ch
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   574
            ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   575
            prev := ch.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   576
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   577
      ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   578
    ^ Array with:rslt.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   579
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   580
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   581
     #(
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   582
        'Müller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   583
        'Miller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   584
        'Mueller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   585
        'Mühler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   586
        'Mühlherr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   587
        'Mülherr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   588
        'Myler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   589
        'Millar'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   590
        'Myller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   591
        'Müllar'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   592
        'Müler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   593
        'Muehler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   594
        'Mülller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   595
        'Müllerr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   596
        'Muehlherr'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   597
        'Muellar'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   598
        'Mueler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   599
        'Mülleer'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   600
        'Mueller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   601
        'Nüller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   602
        'Nyller'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   603
        'Niler'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   604
        'Czerny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   605
        'Tscherny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   606
        'Czernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   607
        'Tschernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   608
        'Schernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   609
        'Scherny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   610
        'Scherno'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   611
        'Czerne'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   612
        'Zerny'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   613
        'Tzernie'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   614
        'Breschnew'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   615
     ) do:[:w |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   616
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:w) first
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   617
     ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   618
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   619
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   620
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   621
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Breschnew' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   622
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Breschneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   623
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Braeschneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   624
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Braessneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   625
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Pressneff' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   626
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Presznäph' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   627
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new phoneticStringsFor:'Präschnäf' -> '17863'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   628
    "
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   629
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   630
     self basicNew phoneticStringsFor:'müller'      #('657')    
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   631
     self basicNew phoneticStringsFor:'möller'      #('657')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   632
     self basicNew phoneticStringsFor:'miller'      #('657')     
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   633
     self basicNew phoneticStringsFor:'muller'      #('657')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   634
     self basicNew phoneticStringsFor:'muler'       #('657')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   635
     self basicNew phoneticStringsFor:'schmidt'     #('862')   
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   636
     self basicNew phoneticStringsFor:'schneider'   #('8627') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   637
     self basicNew phoneticStringsFor:'fischer'     #('387') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   638
     self basicNew phoneticStringsFor:'weber'       #('317') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   639
     self basicNew phoneticStringsFor:'meyer'       #('67') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   640
     self basicNew phoneticStringsFor:'wagner'      #('3467') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   641
     self basicNew phoneticStringsFor:'schulz'      #('858')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   642
     self basicNew phoneticStringsFor:'becker'      #('147')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   643
     self basicNew phoneticStringsFor:'hoffmann'    #('036')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   644
     self basicNew phoneticStringsFor:'schäfer'     #('837') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   645
    "
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   646
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   647
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   648
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   649
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   650
convertFirst:chars
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   651
    #(
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   652
        ('#a#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   653
        ('#e#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   654
        ('#i#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   655
        ('#j#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   656
        ('#y#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   657
        ('#o#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   658
        ('#u#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   659
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   660
        ('#ca' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   661
        ('#ch' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   662
        ('#ck' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   663
        ('#cl' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   664
        ('#co' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   665
        ('#cq' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   666
        ('#cr' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   667
        ('#cu' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   668
        ('#cx' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   669
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   670
        ('#c#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   671
    ) do:[:pair | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   672
        (pair first match:chars) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   673
            ^ pair second
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   674
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   675
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   676
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   677
    ^ self convertRest:chars
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   678
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   679
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   680
convertRest:chars
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   681
    #(
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   682
        ('#ds' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   683
        ('#dc' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   684
        ('#dz' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   685
        ('#ts' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   686
        ('#tc' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   687
        ('#tz' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   688
        ('#d#' '2')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   689
        ('#t#' '2')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   690
        ('cx#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   691
        ('kx#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   692
        ('qx#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   693
        ('#x#' '48')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   694
        ('sc#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   695
        ('sz#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   696
        ('#ca' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   697
        ('#co' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   698
        ('#cu' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   699
        ('#ch' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   700
        ('#ck' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   701
        ('#cx' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   702
        ('#cq' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   703
        ('#c#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   704
        ('#a#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   705
        ('#e#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   706
        ('#i#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   707
        ('#j#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   708
        ('#y#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   709
        ('#o#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   710
        ('#u#' '0')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   711
        ('#h#' '-')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   712
        ('#l#' '5')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   713
        ('#r#' '7')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   714
        ('#m#' '6')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   715
        ('#n#' '6')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   716
        ('#s#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   717
        ('#z#' '8')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   718
        ('#b#' '1')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   719
        ('#p#' '1')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   720
        ('#f#' '3')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   721
        ('#v#' '3')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   722
        ('#w#' '3')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   723
        ('#g#' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   724
        ('#k#' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   725
        ('#q#' '4')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   726
        ('###' '?')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   727
    ) do:[:pair | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   728
        (pair first match:chars) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   729
            ^ pair second
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   730
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   731
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   732
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   733
    self error:'cannot happen'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   734
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   735
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   736
!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   737
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   738
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   739
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   740
WARNING: this is the so called 'simplified soundex' algorithm;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   741
there are more variants like miracode (american soundex) or mysqlSoundex around.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   742
Be sure to use the correct algorithm, if the generated strings must be compatible
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   743
(otherwise, the differences are probably too small to be noticed as effect)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   744
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   745
The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   746
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   747
SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   748
components of names, but by doing so reports more matches. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   749
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   750
There are some variations around in the literature; 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   751
the following is called 'simplified soundex', and the rules for coding a name are:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   752
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   753
1. The first letter of the name is used in its un-coded form to serve as the prefix
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   754
   character of the code. (The rest of the code is numerical).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   755
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   756
2. Thereafter, W and H are ignored entirely.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   757
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   758
3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   759
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   760
4. Other letters of the name are converted to a numerical equivalent:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   761
             B, P, F, V              1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   762
             C, G, J, K, Q, S, X, Z  2 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   763
             D, T                    3 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   764
             L                       4 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   765
             M, N                    5 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   766
             R                       6 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   767
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   768
5. There are two exceptions: 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   769
    1. Letters that follow prefix letters which would, if coded, have the same
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   770
       numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   771
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   772
    2. The second letter of any pair of consonants having the same code number is likewise ignored, 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   773
       i.e. unless there is a ''separator'' between them in the name.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   774
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   775
6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   776
   Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   777
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   778
Notice, that in another variant, w and h are treated slightly differently.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   779
This is only of relevance, if you need to reconstruct original soundex codes of other programs
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   780
or for the original 1880 us census data.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   781
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   782
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   783
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   784
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   785
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   786
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   787
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   788
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   789
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   790
    u := aString asUppercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   791
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   792
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   793
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   794
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   795
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   796
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   797
            p size == 4 ifTrue:[^ Array with:p ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   798
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   799
        prevCode := t
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   800
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   801
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   802
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   803
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   804
    ^ Array with:(p copyFrom:1 to:4)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   805
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   806
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   807
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   808
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   809
translate:aCharacter
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   810
    "use simple if's for more speed when compiled"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   811
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   812
    "vowels serve as separators"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   813
    aCharacter == $A ifTrue:[^ '0' ].         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   814
    aCharacter == $E ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   815
    aCharacter == $I ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   816
    aCharacter == $O ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   817
    aCharacter == $U ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   818
    aCharacter == $Y ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   819
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   820
    aCharacter == $B ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   821
    aCharacter == $P ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   822
    aCharacter == $F ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   823
    aCharacter == $V ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   824
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   825
    aCharacter == $C ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   826
    aCharacter == $S ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   827
    aCharacter == $K ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   828
    aCharacter == $G ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   829
    aCharacter == $J ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   830
    aCharacter == $Q ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   831
    aCharacter == $X ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   832
    aCharacter == $Z ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   833
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   834
    aCharacter == $D ifTrue:[^ '3' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   835
    aCharacter == $T ifTrue:[^ '3' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   836
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   837
    aCharacter == $L ifTrue:[^ '4' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   838
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   839
    aCharacter == $M ifTrue:[^ '5' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   840
    aCharacter == $N ifTrue:[^ '5' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   841
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   842
    aCharacter == $R ifTrue:[^ '6' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   843
    ^ nil
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   844
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   845
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   846
!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   847
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   848
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   849
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   850
MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   851
and also removing vokals first, then removing duplicate codes
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   852
(whereas the soundex code does this in reverse order).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   853
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   854
These variations are important, if you need the ame soundex codes to be generated.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   855
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   856
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   857
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   858
!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   859
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   860
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   861
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   862
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   863
    u := aString asUppercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   864
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   865
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   866
    u from:2 to:u size do:[:c |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   867
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   868
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   869
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   870
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   871
        (t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   872
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   873
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   874
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   875
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   876
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   877
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   878
    ^ Array with:p
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   879
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   880
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   881
!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   882
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   883
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   884
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   885
NYSIIS Algorithm:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   886
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   887
1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   888
    remove all ''S'' and ''Z'' chars from the end of the surname 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   889
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   890
2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   891
    transcode initial strings
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   892
        MAC => MC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   893
        PF => F
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   894
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   895
3.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   896
    Transcode trailing strings as follows,
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   897
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   898
        IX => IC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   899
        EX => EC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   900
        YE,EE,IE => Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   901
        NT,ND => D 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   902
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   903
4.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   904
    transcode ''EV'' to ''EF'' if not at start of name
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   905
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   906
5.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   907
    use first character of name as first character of key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   908
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   909
6.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   910
    remove any ''W'' that follows a vowel 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   911
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   912
7.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   913
    replace all vowels with ''A'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   914
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   915
8.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   916
    transcode ''GHT'' to ''GT'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   917
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   918
9.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   919
    transcode ''DG'' to ''G'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   920
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   921
10.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   922
    transcode ''PH'' to ''F'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   923
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   924
11.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   925
    if not first character, eliminate all ''H'' preceded or followed by a vowel 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   926
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   927
12.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   928
    change ''KN'' to ''N'', else ''K'' to ''C'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   929
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   930
13.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   931
    if not first character, change ''M'' to ''N'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   932
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   933
14.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   934
    if not first character, change ''Q'' to ''G'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   935
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   936
15.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   937
    transcode ''SH'' to ''S'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   938
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   939
16.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   940
    transcode ''SCH'' to ''S'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   941
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   942
17.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   943
    transcode ''YW'' to ''Y'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   944
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   945
18.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   946
    if not first or last character, change ''Y'' to ''A'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   947
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   948
19.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   949
    transcode ''WR'' to ''R'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   950
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   951
20.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   952
    if not first character, change ''Z'' to ''S'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   953
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   954
21.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   955
    transcode terminal ''AY'' to ''Y'' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   956
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   957
22.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   958
    remove traling vowels 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   959
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   960
23.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   961
    collapse all strings of repeated characters 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   962
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   963
24.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   964
    if first char of original surname was a vowel, append it to the code
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   965
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   966
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   967
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   968
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   969
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   970
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   971
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   972
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   973
    k := self rule1:(aString asUppercase).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   974
    k := self rule2:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   975
    k := self rule3:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   976
    k := self rule4:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   977
    k := self rule5:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   978
    k := self rule6:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   979
    k := self rule7:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   980
    k := self rule8:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   981
    k := self rule9:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   982
    k := self rule10:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   983
    k := self rule11:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   984
    k := self rule12:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   985
    k := self rule13:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   986
    k := self rule14:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   987
    k := self rule15:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   988
    k := self rule16:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   989
    k := self rule17:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   990
    k := self rule18:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   991
    k := self rule19:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   992
    k := self rule20:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   993
    k := self rule21:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   994
    k := self rule22:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   995
    k := self rule23:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   996
    k := self rule24:k originalKey:aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   997
    ^ Array with:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   998
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   999
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1000
     self new phoneticStringsFor:'hello'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1001
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1002
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1003
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1004
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1005
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1006
rule10:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1007
    "10. transcode 'PH' to 'F' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1008
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1009
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1010
        transcodeAll:'PH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1011
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1012
        to:'F'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1013
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1014
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1015
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1016
rule11:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1017
    |k c|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1018
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1019
    "11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1020
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1021
    c := SortedCollection sortBlock:[:a :b | b < a ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1022
    2 to:key size do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1023
        (key at:i) = $H ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1024
            ((key at:i - 1) isVowel 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1025
                or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1026
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1027
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1028
    c do:[:n | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1029
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1030
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1031
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1032
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1033
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1034
rule12:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1035
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1036
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1037
    "12. change 'KN' to 'N', else 'K' to 'C' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1038
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1039
                transcodeAll:'KN'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1040
                of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1041
                to:'K'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1042
                startingAt:1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1043
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1044
                transcodeAll:'K'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1045
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1046
                to:'C'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1047
                startingAt:1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1048
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1049
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1050
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1051
rule13:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1052
    "13. if not first character, change 'M' to 'N' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1053
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1054
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1055
        transcodeAll:'M'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1056
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1057
        to:'N'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1058
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1059
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1060
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1061
rule14:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1062
    "14. if not first character, change 'Q' to 'G' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1063
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1064
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1065
        transcodeAll:'Q'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1066
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1067
        to:'G'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1068
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1069
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1070
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1071
rule15:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1072
    "15. transcode 'SH' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1073
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1074
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1075
        transcodeAll:'SH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1076
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1077
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1078
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1079
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1080
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1081
rule16:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1082
    "16. transcode 'SCH' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1083
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1084
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1085
        transcodeAll:'SCH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1086
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1087
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1088
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1089
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1090
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1091
rule17:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1092
    "17. transcode 'YW' to 'Y' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1093
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1094
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1095
        transcodeAll:'YW'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1096
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1097
        to:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1098
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1099
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1100
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1101
rule18:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1102
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1103
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1104
    "18. if not first or last character, change 'Y' to 'A' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1105
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1106
                transcodeAll:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1107
                of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1108
                to:'A'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1109
                startingAt:2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1110
    key last = $Y ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1111
        k at:k size put:$Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1112
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1113
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1114
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1115
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1116
rule19:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1117
    "19. transcode 'WR' to 'R' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1118
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1119
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1120
        transcodeAll:'WR'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1121
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1122
        to:'R'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1123
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1124
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1125
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1126
rule1:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1127
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1128
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1129
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1130
     "1. Remove all 'S' and 'Z' chars from the end of the name"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1131
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1132
        #( 'S' 'Z' ) includes:k last
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1133
    ] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1134
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1135
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1136
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1137
rule20:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1138
    "20. if not first character, change 'Z' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1139
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1140
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1141
        transcodeAll:'Z'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1142
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1143
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1144
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1145
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1146
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1147
rule21:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1148
    "21. transcode terminal 'AY' to 'Y' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1149
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1150
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1151
        transcodeAll:'AY'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1152
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1153
        to:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1154
        startingAt:key size - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1155
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1156
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1157
rule22:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1158
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1159
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1160
    "22. remove trailing vowels "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1161
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1162
    [ k last isVowel ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1163
        k := k copyFrom:1 to:k size - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1164
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1165
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1166
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1167
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1168
rule23:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1169
    |k c|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1170
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1171
    "23. collapse all strings of repeated characters "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1172
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1173
    c := SortedCollection sortBlock:[:a :b | b < a ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1174
    k size to:2 do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1175
        (k at:i) = (k at:i - 1) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1176
            c add:i
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1177
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1178
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1179
    c do:[:n | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1180
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1181
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1182
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1183
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1184
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1185
rule24:key originalKey:originalKey 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1186
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1187
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1188
    "24. if first char of original surname was a vowel, append it to the code"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1189
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1190
    originalKey first isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1191
        k := k , originalKey first asString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1192
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1193
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1194
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1195
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1196
rule2:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1197
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1198
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1199
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1200
     "2. Transcode initial strings:  MAC => MC   PF => F"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1201
    (k copyFrom:1 to:3) = 'MAC' ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1202
        k := 'MC' , (k copyFrom:4 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1203
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1204
    (k copyFrom:1 to:2) = 'PF' ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1205
        k := 'F' , (k copyFrom:3 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1206
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1207
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1208
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1209
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1210
rule3:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1211
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1212
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1213
    "3. Transcode trailing strings as follows:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1214
        IX => IC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1215
          EX => EC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1216
          YE, EE, IE => Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1217
           NT, ND => D"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1218
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1219
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1220
                transcodeTrailing:#( 'IX' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1221
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1222
                to:'IC'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1223
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1224
                transcodeTrailing:#( 'EX' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1225
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1226
                to:'EC'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1227
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1228
                transcodeTrailing:#( 'YE' 'EE' 'IE' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1229
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1230
                to:'Y'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1231
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1232
                transcodeTrailing:#( 'NT' 'ND' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1233
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1234
                to:'D'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1235
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1236
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1237
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1238
rule4:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1239
    "4. Transcode 'EV' to 'EF' if not at start of name"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1240
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1241
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1242
        transcodeAll:'EV'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1243
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1244
        to:'EF'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1245
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1246
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1247
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1248
rule5:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1249
    "5. Use first character of name as first character of key.  Ignored because we're doing an in-place conversion"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1250
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1251
    ^ key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1252
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1253
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1254
rule6:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1255
    |k i|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1256
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1257
    "6. Remove any 'W' that follows a vowel"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1258
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1259
    i := 2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1260
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1261
        (i := k indexOf:$W startingAt:i) > 0
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1262
    ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1263
        (k at:i - 1) isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1264
            k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1265
            i := i - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1266
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1267
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1268
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1269
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1270
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1271
rule7:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1272
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1273
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1274
    "7. replace all vowels with 'A' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1275
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1276
    1 to:key size do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1277
        (key at:i) isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1278
            k at:i put:$A
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1279
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1280
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1281
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1282
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1283
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1284
rule8:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1285
    "8. transcode 'GHT' to 'GT' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1286
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1287
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1288
        transcodeAll:'GHT'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1289
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1290
        to:'GT'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1291
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1292
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1293
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1294
rule9:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1295
    "9. transcode 'DG' to 'G' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1296
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1297
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1298
        transcodeAll:'DG'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1299
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1300
        to:'G'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1301
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1302
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1303
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1304
transcodeAll:aString of:key to:replacementString startingAt:start 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1305
    |k i|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1306
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1307
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1308
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1309
        (i := k indexOfSubCollection:aString startingAt:start) > 0
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1310
    ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1311
        k := (k copyFrom:1 to:i - 1) , replacementString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1312
                    , (k copyFrom:i + aString size to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1313
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1314
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1315
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1316
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1317
transcodeTrailing:anArrayOfStrings of:key to:replacementString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1318
    |answer|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1319
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1320
    answer := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1321
    anArrayOfStrings do:[:aString | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1322
        answer := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1323
                    transcodeAll:aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1324
                    of:answer
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1325
                    to:replacementString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1326
                    startingAt:(answer size - aString size) + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1327
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1328
    ^ answer
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1329
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1330
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1331
!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1332
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1333
documentation
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1334
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1335
Implementation of the PHONEM algorithm, as described in
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1336
'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1337
Ein Programm fuer kontextsensitive phonetische Textumwandlung
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1338
ct Magazin fuer Computer & Technik 25/1998'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1339
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1340
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1341
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1342
!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1343
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1344
phoneticStringsFor:aString 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1345
    |s idx t t2|
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1346
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1347
    s := aString asUppercase.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1348
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1349
    idx := 1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1350
    [idx < (s size-1)] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1351
        t2 := nil.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1352
        t := s copyFrom:idx to:idx+1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1353
        t = 'SC' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1354
        ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1355
        ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1356
        ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1357
        ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1358
        ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1359
        ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1360
        ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1361
        ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1362
        ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1363
        ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1364
        ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1365
        ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1366
        ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1367
        ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1368
        ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1369
        ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1370
        t2 notNil ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1371
            s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1372
        ] ifFalse:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1373
            idx := idx + 1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1374
        ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1375
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1376
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1377
    "/ single character substitutions via tr
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1378
    s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1379
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1380
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1381
    ^ Array with:s
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1382
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1383
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1384
     self basicNew phoneticStringsFor:'müller'  #('MYLR')    
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1385
     self basicNew phoneticStringsFor:'möller'  #('MYLR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1386
     self basicNew phoneticStringsFor:'miller'  #('MYLR')     
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1387
     self basicNew phoneticStringsFor:'muller'  #('MULR') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1388
     self basicNew phoneticStringsFor:'muler'   #('MULR') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1389
     self basicNew phoneticStringsFor:'schmidt'     #('CMYD')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1390
     self basicNew phoneticStringsFor:'schneider'   #('CNAYDR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1391
     self basicNew phoneticStringsFor:'fischer'     #('VYCR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1392
     self basicNew phoneticStringsFor:'weber'       #('VBR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1393
     self basicNew phoneticStringsFor:'meyer'       #('MAYR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1394
     self basicNew phoneticStringsFor:'wagner'      #('VACNR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1395
     self basicNew phoneticStringsFor:'schulz'      #('CULC')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1396
     self basicNew phoneticStringsFor:'becker'      #('BCR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1397
     self basicNew phoneticStringsFor:'hoffmann'    #('OVMAN')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1398
     self basicNew phoneticStringsFor:'schäfer'     #('CVR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1399
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1400
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1401
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1402
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1403
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1404
copyright
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1405
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1406
Copyright (c) 2002-2004 Robert Jarvis
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1407
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1408
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1409
files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1410
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1411
the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1412
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1413
The above copyright notice and this permission notice shall be included in all copies or substantial 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1414
portions of the Software.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1415
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1416
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1417
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1418
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1419
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1420
USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1421
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1422
! !
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1423
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1424
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1425
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1426
isSlavoGermanic:aString
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1427
    ^ #('w' 'k' 'cz' 'witz') contains:[:sub | aString includesString:sub]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1428
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1429
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1430
     self isSlavoGermanic:'walter'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1431
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1432
! !
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1433
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1434
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1435
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1436
documentaion
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1437
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1438
The Double Metaphone algorithm:
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1439
see internet
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1440
"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1441
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1442
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1443
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1444
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1445
currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1446
	^currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1447
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1448
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1449
currentIndex: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1450
	currentIndex := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1451
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1452
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1453
inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1454
	^inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1455
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1456
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1457
inputKey: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1458
	inputKey := aString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1459
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1460
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1461
primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1462
	^primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1463
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1464
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1465
primaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1466
	primaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1467
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1468
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1469
secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1470
	^secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1471
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1472
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1473
secondaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1474
	secondaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1475
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1476
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1477
skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1478
	^skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1479
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1480
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1481
skipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1482
	skipCount := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1483
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1484
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1485
startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1486
	^startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1487
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1488
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1489
startIndex: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1490
	startIndex := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1491
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1492
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1493
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1494
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1495
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1496
        "Private - Answers an array of alternate phonetic strings for the given input string."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1497
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1498
        self inputKey: aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1499
        self performInitialProcessing.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1500
        self processRemainingCharacters.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1501
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1502
        ^ Array with: primaryTranslation with: secondaryTranslation
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1503
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1504
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1505
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1506
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1507
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1508
	super initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1509
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1510
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1511
		startIndex: 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1512
		primaryTranslation: '';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1513
		secondaryTranslation: '';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1514
		skipCount: 0;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1515
		currentIndex: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1516
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1517
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1518
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1519
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1520
addPrimaryTranslation: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1521
	self primaryTranslation: self primaryTranslation, aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1522
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1523
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1524
addSecondaryTranslation: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1525
	self secondaryTranslation: self secondaryTranslation, aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1526
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1527
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1528
decrementSkipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1529
	self skipCount: self skipCount - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1530
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1531
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1532
incrementSkipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1533
	self incrementSkipCount: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1534
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1535
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1536
incrementSkipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1537
	self skipCount: self skipCount + anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1538
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1539
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1540
incrementStartIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1541
	self startIndex: self startIndex + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1542
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1543
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1544
isSlavoGermanic: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1545
	^((aString includesAnyOf: 'WK') or:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1546
		[ (aString indexOfSubCollection: 'CZ' startingAt: 1) >= 1 ]) or:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1547
		[ (aString indexOfSubCollection: 'WITZ' startingAt: 1) >= 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1548
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1549
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1550
keyAt: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1551
	(anInteger >=1 and: [ anInteger <= self inputKey size ])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1552
		ifTrue: [ ^self inputKey at: anInteger ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1553
		ifFalse: [ ^$  ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1554
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1555
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1556
keyLeftString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1557
	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1558
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1559
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1560
keyMidString: lengthInteger from: fromInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1561
	| result from len additionalSpaces |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1562
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1563
	result := ''.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1564
	from := fromInteger.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1565
	len := lengthInteger.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1566
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1567
	"Prepend spaces if caller is requesting characters from before the start of the string"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1568
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1569
	[ from < 1 ] whileTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1570
		[ result := result, ' '.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1571
		from := from + 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1572
		len := len - 1 ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1573
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1574
	from + len - 1 > self inputKey size
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1575
		ifTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1576
			[ additionalSpaces := from + len - 1 - self inputKey size.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1577
			len := self inputKey size - from + 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1578
		ifFalse: [ additionalSpaces := 0 ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1579
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1580
	result := result, (self inputKey copyFrom: from to: (from+len-1 min: self inputKey size)).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1581
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1582
	[ additionalSpaces > 0 ] whileTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1583
		[ result := result, ' '.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1584
		additionalSpaces := additionalSpaces - 1 ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1585
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1586
	^result
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1587
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1588
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1589
keyRightString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1590
	^self keyMidString: lengthInteger from: self inputKey size - lengthInteger + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1591
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1592
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1593
performInitialProcessing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1594
	(#('GN' 'KN' 'PN' 'WR' 'PS') includes: (self inputKey copyFrom: 1 to: 2))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1595
		ifTrue: [ self incrementStartIndex ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1596
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1597
	(self keyAt: 1) = $X
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1598
		ifTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1599
			[ self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1600
				addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1601
				addSecondaryTranslation: 'S'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1602
			self incrementStartIndex ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1603
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1604
	(self keyAt: 1) isVowel
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1605
		ifTrue:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1606
			[ self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1607
				addPrimaryTranslation: 'A';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1608
				addSecondaryTranslation: 'A'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1609
			self incrementStartIndex ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1610
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1611
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1612
processB
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1613
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1614
                addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1615
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1616
        (self keyAt: (currentIndex + 1)) = $B
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1617
                ifTrue: [ self incrementSkipCount ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1618
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1619
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1620
processC
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1621
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1622
        ((((currentIndex >= 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1623
                and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1624
                and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1625
                and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1626
                and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1627
                                or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1628
                                                and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1629
                        ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1630
                                [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1631
                                self addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1632
                                self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1633
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1634
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1635
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1636
        (self inputKey beginsWith: 'CAESAR')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1637
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1638
                        [ self addPrimaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1639
                        self addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1640
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1641
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1642
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1643
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1644
        (self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1645
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1646
                        [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1647
                        self addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1648
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1649
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1650
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1651
        "iv"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1652
        (self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1653
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1654
                        [ (currentIndex > 1                "a"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1655
                                        and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1656
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1657
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1658
                                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1659
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1660
                                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1661
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1662
                        (currentIndex = 1          "b"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1663
                                        and: [ (self inputKey size > 5 and: [(self inputKey copyFrom: 1 to: 6) = 'CHARAC'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1664
                                                        or: [ (self inputKey copyFrom: 1 to: 6) = 'CHARIS' ]] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1665
                                                or: [self inputKey size > 4 and: [ ((((self inputKey copyFrom: 1 to: 4) = 'CHOR'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1666
                                                        or: [ (self inputKey copyFrom: 1 to: 4) = 'CHYM' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1667
                                                        or: [ (self inputKey copyFrom: 1 to: 4) = 'CHIA' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1668
                                                        or: [ (self inputKey copyFrom: 1 to: 4) = 'CHEM' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1669
                                                        and: [ (self inputKey copyFrom: 1 to: 4) ~= 'CHORE' ] ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1670
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1671
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1672
                                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1673
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1674
                                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1675
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1676
                        (((((#('VAN ' 'VON ') includes: (self inputKey copyFrom: 1 to: 4))              "c"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1677
                                        or: [ (self inputKey copyFrom: 1 to: 3) = 'SCH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1678
                                        or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1679
                                                        includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1680
                                        or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1681
                                        or: [ ((currentIndex = 1)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1682
                                                        or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1683
                                                and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1684
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1685
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1686
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1687
                                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1688
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1689
                                                ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1690
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1691
                                        [ currentIndex > 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1692
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1693
                                                        [ (self inputKey copyFrom: 1 to: 2) = 'MC'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1694
                                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1695
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1696
                                                                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1697
                                                                                                addSecondaryTranslation: 'K' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1698
                                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1699
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1700
                                                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1701
                                                                                                addSecondaryTranslation: 'K' ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1702
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1703
                                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1704
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1705
                                                                addSecondaryTranslation: 'X' ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1706
                                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1707
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1708
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1709
        "v"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1710
        (self keyAt: currentIndex+1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1711
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1712
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1713
                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1714
                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1715
                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1716
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1717
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1718
        "vi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1719
        (self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1720
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1721
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1722
                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1723
                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1724
                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1725
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1726
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1727
        "vii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1728
        ((self keyAt: currentIndex+1) = $C
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1729
                        and: [ ((currentIndex = 2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1730
                                and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1731
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1732
                        [ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1733
                                        and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1734
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1735
                                        [ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1736
                                                        or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1737
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1738
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1739
                                                                addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1740
                                                                addSecondaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1741
                                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1742
                                                                ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1743
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1744
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1745
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1746
                                                                addSecondaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1747
                                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1748
                                                                ^self ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1749
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1750
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1751
                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1752
                                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1753
                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1754
                                                ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1755
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1756
        "viii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1757
        (#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1758
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1759
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1760
                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1761
                                addSecondaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1762
                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1763
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1764
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1765
        "ix"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1766
        (#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1767
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1768
                        [ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1769
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1770
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1771
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1772
                                                addSecondaryTranslation: 'X' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1773
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1774
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1775
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1776
                                                addSecondaryTranslation: 'S'].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1777
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1778
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1779
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1780
        "x"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1781
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1782
                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1783
                addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1784
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1785
        "xi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1786
        (#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1787
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1788
                        [ self incrementSkipCount: 2 ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1789
                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1790
                        [ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1791
                                        and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1792
                                ifTrue: [ self incrementSkipCount: 1] ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1793
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1794
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1795
processCedille 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1796
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1797
		addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1798
		addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1799
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1800
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1801
processD
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1802
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1803
        (self keyAt: currentIndex+1) = $G
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1804
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1805
                        [ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1806
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1807
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1808
                                                addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1809
                                                addSecondaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1810
                                                incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1811
                                        ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1812
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1813
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1814
                                                addPrimaryTranslation: 'TK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1815
                                                addSecondaryTranslation: 'TK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1816
                                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1817
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1818
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1819
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1820
        (#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1821
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1822
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1823
                                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1824
                                addSecondaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1825
                                incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1826
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1827
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1828
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1829
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1830
                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1831
                addSecondaryTranslation: 'T'
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1832
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1833
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1834
processF
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1835
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1836
		addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1837
		addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1838
	(self keyAt: self currentIndex+1) = $F
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1839
		ifTrue: [ self incrementSkipCount: 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1840
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1841
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1842
processG
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1843
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1844
        case 'G':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1845
                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1846
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1847
        | word |
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1848
        (self keyAt: currentIndex + 1) = $H
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1849
        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1850
                "if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1851
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1852
                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1853
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1854
              " {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1855
                   MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1856
                   current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1857
                   break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1858
                }"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1859
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1860
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1861
                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1862
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1863
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1864
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1865
                "if(current < 3)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1866
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1867
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1868
                currentIndex < 4 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1869
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1870
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1871
                        " //'ghislane', ghiradelli
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1872
               if(current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1873
               { "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1874
                        currentIndex = 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1875
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1876
                                "if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1877
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1878
                                (self keyAt: currentIndex + 2) = $I
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1879
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1880
                                        "MetaphAdd(J);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1881
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1882
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1883
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1884
                                        "MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1885
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1886
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1887
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1888
                                "  current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1889
                                break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1890
                                ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1891
                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1892
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1893
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1894
                " //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1895
                if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1896
                //e.g., 'bough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1897
                OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1898
                //e.g., 'broughton'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1899
                OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1900
         "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1901
                (((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1902
                or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])  
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1903
                or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1904
                ifTrue: [                         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1905
                        "current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1906
                        break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1907
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1908
                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1909
                        " //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1910
               if((current > 2) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1911
               AND (GetAt(current - 1) == 'U') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1912
               AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1913
                        (currentIndex > 3 and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1914
                                ((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1915
                                        #($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1916
                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1917
                        ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1918
                                "MetaphAdd(F);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1919
                                self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1920
                                addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1921
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1922
                                " if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1923
                    MetaphAdd(K);"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1924
                                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1925
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1926
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1927
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1928
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1929
                        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1930
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1931
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1932
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1933
                "if(GetAt(current + 1) == 'N')"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1934
          (self keyAt: currentIndex + 1) = $N
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1935
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1936
                        "if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1937
                        (currentIndex = 2 and: [(self inputKey at: 1) isVowel and: [(self isSlavoGermanic: self inputKey) not]])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1938
               ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1939
                                "MetaphAdd(KN, N);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1940
                                self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1941
                                addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1942
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1943
                                " //not e.g. 'cagney'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1944
                                if(!!StringAt((current + 2), 2, EY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1945
                                AND (GetAt(current + 1) !!= 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1946
                                AND !!SlavoGermanic())"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1947
                                ((self inputKey size >= (currentIndex + 2)) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1948
                                        (self inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: self inputKey size)) ~= 'EY' and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1949
                                                (self keyAt: currentIndex + 1) ~= $Y and: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1950
                                                        (self isSlavoGermanic: self inputKey) not
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1951
                                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1952
                                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1953
                                ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1954
                                        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1955
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1956
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1957
                                        self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1958
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1959
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1960
                        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1961
                        ^self incrementSkipCount: 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1962
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1963
                " //'tagliaro'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1964
                if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1965
                ((self inputKey size >= (currentIndex + 3)) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1966
                        (self inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1967
                                (self isSlavoGermanic: self inputKey) not]])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1968
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1969
                        self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1970
                        addSecondaryTranslation: 'L'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1971
                        ^self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1972
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1973
                " //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1974
                if((current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1975
                AND ((GetAt(current + 1) == 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1976
                OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1977
                (currentIndex = 1 and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1978
                        ((self keyAt: currentIndex + 1) = $Y) or: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1979
                        (#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes: 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1980
                                (self inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1981
                ]]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1982
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1983
                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1984
                        ^self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1985
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1986
                " // -ger-,  -gy-
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1987
                if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1988
                AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1989
                AND !!StringAt((current - 1), 1, E, I, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1990
                AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1991
                "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1992
          (((self inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: self inputKey size)) = 'ER' or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1993
                                ((self keyAt: currentIndex + 1) = $Y)]) 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1994
                        and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := self inputKey copyFrom: 1 to: (6 min: self inputKey size))) not)
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1995
                                and: [(self keyAt: currentIndex - 1) ~= $E
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1996
                                        and: [(#('RGY' 'OGY') includes: (self inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1997
                 ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1998
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1999
                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2000
                        ^self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2001
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2002
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2003
          " // italian e.g, 'biaggi'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2004
           if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2005
           "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2006
                ((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (self inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: self inputKey size)))])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2007
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2008
                        " //obvious germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2009
                                        if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2010
                                                OR StringAt((current + 1), 2, ET, ))                                                MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2011
                        word := (self inputKey copyFrom: 1 to: 4).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2012
                        ((#('VAN ' 'VON ') includes: word) or: [(word copyFrom: 1 to: 3) = 'SCH' or: [(word copyFrom: 1 to: 2) = 'ET']]) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2013
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2014
                                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2015
                                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2016
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2017
                            " //always soft if french ending
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2018
                                                if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2019
                                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2020
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2021
                                                        MetaphAdd(J, K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2022
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2023
                                        break;"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2024
                                (((self inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: self inputKey size)), '    ') copyFrom: 1 to: 4) = 'IER '
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2025
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2026
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2027
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2028
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2029
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2030
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2031
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2032
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2033
                        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2034
                        ^self incrementSkipCount: 1.       
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2035
                ].                      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2036
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2037
        " if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2038
             current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2039
         else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2040
             current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2041
         MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2042
            break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2043
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2044
                (self keyAt: (currentIndex + 1)) = $G
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2045
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2046
                        self incrementSkipCount: 1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2047
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2048
                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2049
                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2050
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2051
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2052
processH
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2053
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2054
        case 'H':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2055
                                //only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2056
                                if(((current == 0) OR IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2057
                                        AND IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2058
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2059
                                        MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2060
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2061
                                }else//also takes care of 'HH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2062
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2063
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2064
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2065
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2066
        (((currentIndex = 1) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2067
                or: [ (self keyAt: currentIndex - 1) isVowel]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2068
        and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2069
        ifTrue: [               
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2070
                self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2071
                addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2072
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2073
        ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2074
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2075
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2076
processJ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2077
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2078
        case 'J':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2079
                                //obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2080
                                if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2081
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2082
                                        if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2083
                                                MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2084
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2085
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2086
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2087
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2088
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2089
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2090
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2091
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2092
                                if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2093
                                        MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2094
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2095
                                        //spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2096
                                        if(IsVowel(current - 1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2097
                                                AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2098
                                                        AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2099
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2100
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2101
                                                if(current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2102
                                                        MetaphAdd(J,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2103
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2104
                                                        if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2105
                                                                        AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2106
                                                                MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2107
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2108
                                if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2109
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2110
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2111
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2112
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2113
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2114
        | currentWord firstWord nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2115
        currentWord := self inputKey copyFrom: currentIndex to: (currentIndex + 3 min: self inputKey size).
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2116
        firstWord := self inputKey copyFrom: 1 to: (4 min: self inputKey size).
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2117
        nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2118
        (currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2119
        ifTrue: [       
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2120
                ((currentIndex = 1 and: [self inputKey size = 4 or: [self inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2121
                        or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2122
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2123
                        self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2124
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2125
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2126
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2127
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2128
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2129
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2130
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2131
        (currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2132
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2133
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2134
                addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2135
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2136
                ((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2137
                and: [(self isSlavoGermanic: self inputKey) not and: [nextLetter = $A or: [nextLetter = $O]]])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2138
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2139
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2140
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2141
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2142
                        currentIndex = self inputKey size 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2143
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2144
                                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2145
                                addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2146
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2147
                                ((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2148
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2149
                                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2150
                                        addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2151
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2152
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2153
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2154
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2155
        nextLetter = $J
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2156
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2157
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2158
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2159
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2160
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2161
processK
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2162
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2163
        case 'K':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2164
                                if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2165
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2166
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2167
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2168
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2169
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2170
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2171
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2172
        (self keyAt: currentIndex + 1) = $K
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2173
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2174
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2175
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2176
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2177
        addSecondaryTranslation: 'K'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2178
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2179
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2180
processL
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2181
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2182
"case 'L':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2183
                                if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2184
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2185
                                        //spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2186
                                        if(((current == (length - 3)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2187
                                                AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2188
                                                         OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, )) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2189
                                                                AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2190
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2191
                                                MetaphAdd(L,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2192
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2193
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2194
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2195
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2196
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2197
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2198
                                MetaphAdd(L);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2199
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2200
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2201
        | currentWord |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2202
        (self keyAt: currentIndex + 1) = $L 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2203
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2204
                (((currentIndex = (self inputKey size - 2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2205
                and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := self inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: self inputKey size))])])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2206
                or: [((#('AS' 'OS') includes: (self inputKey copyFrom: self inputKey size - 1 to: self inputKey size)) or: [#($A $O) includes: (self keyAt: self inputKey size)]) and: [currentWord = 'ALLE']
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2207
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2208
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2209
                        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2210
                        addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2211
                        ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2212
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2213
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2214
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2215
        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2216
        addSecondaryTranslation: 'L'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2217
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2218
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2219
processM
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2220
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2221
"case 'M':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2222
                                if((StringAt((current - 1), 3, UMB, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2223
                                        AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2224
                                                //'dumb','thumb'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2225
                                                OR  (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2226
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2227
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2228
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2229
                                MetaphAdd(M);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2230
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2231
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2232
        (((currentIndex > 1 and: [(self inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: self inputKey size)) = 'UMB'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2233
                and: [currentIndex + 1 = self inputKey size or: [(self inputKey copyFrom: (currentIndex + 2 min: self inputKey size) to: (currentIndex + 4 min: self inputKey size)) = 'ER']])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2234
                or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2235
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2236
                        self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2237
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2238
                self addPrimaryTranslation: 'M';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2239
                addSecondaryTranslation: 'M'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2240
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2241
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2242
processN
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2243
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2244
        case 'N':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2245
                                if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2246
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2247
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2248
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2249
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2250
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2251
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2252
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2253
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2254
        (self keyAt: currentIndex + 1) = $N
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2255
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2256
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2257
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2258
        self addPrimaryTranslation: 'N';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2259
        addSecondaryTranslation: 'N'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2260
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2261
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2262
processNtilde
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2263
        "case 'Ñ':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2264
                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2265
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2266
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2267
        "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2268
        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2269
        addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2270
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2271
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2272
processP
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2273
        "case 'P':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2274
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2275
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2276
                                        MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2277
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2278
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2279
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2280
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2281
                                //also account for campbell, raspberry
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2282
                                if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2283
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2284
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2285
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2286
                                        MetaphAdd(P);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2287
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2288
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2289
        | nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2290
        (nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2291
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2292
                self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2293
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2294
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2295
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2296
        (#($P $B) includes: nextLetter)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2297
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2298
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2299
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2300
                self addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2301
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2302
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2303
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2304
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2305
processQ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2306
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2307
        case 'Q':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2308
                                if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2309
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2310
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2311
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2312
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2313
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2314
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2315
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2316
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2317
        (self keyAt: currentIndex + 1) = $Q
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2318
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2319
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2320
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2321
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2322
        addSecondaryTranslation: 'K'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2323
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2324
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2325
processR
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2326
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2327
        case 'R':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2328
                                //french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2329
                                if((current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2330
                                        AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2331
                                                AND StringAt((current - 2), 2, IE, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2332
                                                        AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2333
                                        MetaphAdd(, R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2334
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2335
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2336
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2337
                                if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2338
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2339
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2340
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2341
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2342
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2343
        (currentIndex = self inputKey size and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2344
                (self isSlavoGermanic: self inputKey) not and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2345
                        (self inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2346
                                (#('ME' 'MA') includes: (self inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2347
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2348
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2349
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2350
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2351
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2352
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2353
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2354
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2355
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2356
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2357
        (self keyAt: currentIndex + 1) = $R
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2358
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2359
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2360
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2361
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2362
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2363
processRemainingCharacters
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2364
    self startIndex to: self inputKey size do:[ :i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2365
        | c methodSelector |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2366
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2367
        self skipCount = 0 ifTrue:[ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2368
            ((self primaryTranslation size > 4) and: [ self secondaryTranslation size > 4 ])
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2369
                ifTrue: [ ^self ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2370
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2371
            self currentIndex: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2372
            c := self keyAt: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2373
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2374
            (c isVowel not and: [c ~= $Y]) ifTrue:[ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2375
                c = $Ç ifTrue: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2376
                    methodSelector := #processCedille 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2377
                ] ifFalse: [ c = $Ñ ifTrue: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2378
                    methodSelector := #processNtilde 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2379
                ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2380
                    methodSelector := ('process', c asString) asSymbol 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2381
                ]].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2382
                self perform: methodSelector 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2383
            ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2384
        ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2385
            self decrementSkipCount 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2386
        ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2387
    ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2388
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2389
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2390
processS
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2391
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2392
        case 'S':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2393
                                //special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2394
                                if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2395
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2396
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2397
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2398
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2399
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2400
                                //special case 'sugar-'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2401
                                if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2402
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2403
                                        MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2404
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2405
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2406
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2407
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2408
                                if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2409
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2410
                                        //germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2411
                                        if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2412
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2413
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2414
                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2415
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2416
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2417
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2418
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2419
                                //italian & armenian
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2420
                                if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2421
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2422
                                        if(!!SlavoGermanic())
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2423
                                                MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2424
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2425
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2426
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2427
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2428
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2429
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2430
                                //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2431
                                //also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2432
                                if(((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2433
                                                AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2434
                                                        OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2435
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2436
                                        MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2437
                                        if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2438
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2439
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2440
                                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2441
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2442
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2443
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2444
                                if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2445
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2446
                                        //Schlesinger's rule
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2447
                                        if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2448
                                                //dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2449
                                                if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2450
                                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2451
                                                        //'schermerhorn', 'schenker'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2452
                                                        if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2453
                                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2454
                                                                MetaphAdd(X, SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2455
                                                        }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2456
                                                                MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2457
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2458
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2459
                                                }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2460
                                                        if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2461
                                                                MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2462
                                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2463
                                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2464
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2465
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2466
                                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2467
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2468
                                        if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2469
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2470
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2471
                                                current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2472
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2473
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2474
                                        //else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2475
                                        MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2476
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2477
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2478
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2479
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2480
                                //french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2481
                                if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2482
                                        MetaphAdd(, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2483
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2484
                                        MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2485
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2486
                                if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2487
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2488
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2489
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2490
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2491
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2492
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2493
        | nextChar char2 chars char |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2494
        (#('ISL' 'YSL') includes: (self inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: self inputKey size))) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2495
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2496
                ^self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2497
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2498
        (currentIndex = 1 and: [(self inputKey copyFrom: 1 to: (5 min: self inputKey size)) = 'SUGAR'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2499
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2500
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2501
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2502
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2503
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2504
        (self inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: self inputKey size)) = 'SH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2505
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2506
                (#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (self inputKey copyFrom: (currentIndex + 1 min: self inputKey size) to: ((currentIndex + 5) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2507
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2508
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2509
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2510
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2511
                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2512
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2513
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2514
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2515
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2516
        ((#('SIO' 'SIA') includes: (self inputKey copyFrom: currentIndex to: (currentIndex + 2 min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2517
                or: [(self inputKey copyFrom: currentIndex to: (currentIndex + 3 min: self inputKey size)) = 'SIAN'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2518
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2519
                (self isSlavoGermanic: self inputKey) not
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2520
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2521
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2522
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2523
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2524
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2525
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2526
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2527
                ^self incrementSkipCount: 2
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2528
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2529
        ((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2530
                or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2531
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2532
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2533
                addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2534
                nextChar = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2535
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2536
                        ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2537
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2538
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2539
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2540
        ((self inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: self inputKey size)) = 'SC')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2541
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2542
                (char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2543
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2544
                        (#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := self inputKey copyFrom: ((currentIndex + 3) min: self inputKey size) to: ((currentIndex + 4) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2545
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2546
                                (#('ER' 'EN') includes: chars)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2547
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2548
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2549
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2550
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2551
                                        self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2552
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2553
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2554
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2555
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2556
                                ((currentIndex = 1 and: [(char := self inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2557
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2558
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2559
                                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2560
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2561
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2562
                                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2563
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2564
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2565
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2566
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2567
                        (#($I $E $Y) includes: char2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2568
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2569
                                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2570
                                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2571
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2572
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2573
                                self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2574
                                addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2575
                                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2576
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2577
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2578
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2579
        (currentIndex = self inputKey size and: [(#('AI' 'OI') includes: (self inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2580
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2581
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2582
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2583
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2584
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2585
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2586
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2587
        (#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2588
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2589
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2590
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2591
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2592
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2593
processT
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2594
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2595
        case 'T':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2596
                                if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2597
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2598
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2599
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2600
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2601
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2602
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2603
                                if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2604
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2605
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2606
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2607
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2608
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2609
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2610
                                if(StringAt(current, 2, TH, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2611
                                        OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2612
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2613
                                        //special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2614
                                        if(StringAt((current + 2), 2, OM, AM, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2615
                                                OR StringAt(0, 4, VAN , VON , ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2616
                                                        OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2617
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2618
                                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2619
                                        }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2620
                                                MetaphAdd(0, T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2621
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2622
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2623
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2624
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2625
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2626
                                if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2627
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2628
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2629
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2630
                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2631
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2632
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2633
        ((self inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: self inputKey size)) = 'TION')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2634
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2635
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2636
                addSecondaryTranslation: 'X'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2637
                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2638
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2639
        (#('TIA' 'TCH') includes: (self inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2640
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2641
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2642
                addSecondaryTranslation: 'X'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2643
                ^self incrementSkipCount: 2.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2644
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2645
        (((self inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: self inputKey size)) = 'TH') or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2646
                ((self inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: self inputKey size)) = 'TTH')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2647
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2648
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2649
                ((#('OM' 'AM') includes: (self inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2650
                or: [(#('VAN ' 'VON ') includes: (self inputKey copyFrom: 1 to: (4 min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2651
                        or: [(self inputKey copyFrom: 1 to: (3 min: self inputKey size)) = 'SCH']
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2652
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2653
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2654
                        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2655
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2656
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2657
                        self addPrimaryTranslation: '0';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2658
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2659
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2660
                ^self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2661
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2662
        (#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2663
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2664
                self incrementSkipCount: 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2665
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2666
        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2667
        addSecondaryTranslation: 'T'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2668
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2669
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2670
processV
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2671
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2672
        case 'V':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2673
                                if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2674
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2675
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2676
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2677
                                MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2678
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2679
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2680
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2681
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2682
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2683
        (self keyAt: currentIndex + 1) = $V
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2684
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2685
                self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2686
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2687
        self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2688
        addSecondaryTranslation: 'F'.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2689
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2690
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2691
processW
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2692
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2693
        case 'W':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2694
                                //can also be in middle of word
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2695
                                if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2696
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2697
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2698
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2699
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2700
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2701
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2702
                                if((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2703
                                        AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2704
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2705
                                        //Wasserman should match Vasserman
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2706
                                        if(IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2707
                                                MetaphAdd(A, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2708
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2709
                                                //need Uomo to match Womo
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2710
                                                MetaphAdd(A);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2711
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2712
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2713
                                //Arnow should match Arnoff
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2714
                                if(((current == last) AND IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2715
                                        OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2716
                                                        OR StringAt(0, 3, SCH, ))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2717
                                  {
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2718
                                        MetaphAdd(, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2719
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2720
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2721
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2722
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2723
                                //polish e.g. 'filipowicz'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2724
                                if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2725
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2726
                                        MetaphAdd(TS, FX);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2727
                                        current +=4;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2728
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2729
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2730
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2731
                                //else skip it
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2732
                                current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2733
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2734
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2735
        | word nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2736
        ((word := self inputKey copyFrom: currentIndex to: (currentIndex + 1 min: self inputKey size)) = 'WR')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2737
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2738
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2739
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2740
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2741
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2742
        ((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2743
                word = 'WH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2744
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2745
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2746
                nextLetter isVowel
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2747
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2748
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2749
                        addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2750
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2751
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2752
                        addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2753
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2754
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2755
        ((((currentIndex = self inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2756
                or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (self inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: self inputKey size))])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2757
                        or: [(self inputKey copyFrom: 1 to: 3) = 'SCH'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2758
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2759
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2760
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2761
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2762
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2763
        (#('WICZ' 'WITZ') includes: (self inputKey copyFrom: currentIndex to: (currentIndex + 4 min: self inputKey size)))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2764
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2765
                self addPrimaryTranslation: 'TS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2766
                addSecondaryTranslation: 'FX'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2767
                ^self incrementSkipCount: 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2768
        ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2769
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2770
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2771
processX
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2772
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2773
        case 'X':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2774
                                //french e.g. breaux
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2775
                                if(!!((current == last) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2776
                                        AND (StringAt((current - 3), 3, IAU, EAU, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2777
                                                        OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2778
                                        MetaphAdd(KS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2779
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2780
                                if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2781
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2782
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2783
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2784
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2785
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2786
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2787
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2788
        ((currentIndex = self inputKey size) and: [(#('IAU' 'EAU') includes: (self inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex)) or: [(#('AU' 'OU') includes: (self inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]]) not
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2789
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2790
                self addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2791
                addSecondaryTranslation: 'KS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2792
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2793
        (#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2794
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2795
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2796
        ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2797
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2798
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2799
processZ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2800
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2801
        case 'Z':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2802
                                //chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2803
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2804
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2805
                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2806
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2807
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2808
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2809
                                        if(StringAt((current + 1), 2, ZO, ZI, ZA, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2810
                                                OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2811
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2812
                                                MetaphAdd(S, TS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2813
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2814
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2815
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2816
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2817
                                if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2818
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2819
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2820
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2821
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2822
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2823
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2824
        (self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2825
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2826
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2827
                addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2828
                ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2829
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2830
                ((#('ZO' 'ZI' 'ZA') includes: (self inputKey copyFrom: ((currentIndex + 1) min: self inputKey size) to: ((currentIndex + 2) min: self inputKey size))) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2831
                        (self isSlavoGermanic: self inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2832
                ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2833
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2834
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2835
                        addSecondaryTranslation: 'TS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2836
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2837
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2838
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2839
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2840
                (self keyAt: currentIndex + 1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2841
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2842
                        ^self incrementSkipCount: 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2843
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2844
        ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2845
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2846
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2847
!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2848
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2849
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2850
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2851
Miracode (also called American Soundex) is like Soundex with the addition that h and w are 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2852
discarded if they separate consonants.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2853
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2854
These variants may be specifically important because they were used in U.S. National Archives. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2855
Most archive data were encoded with Miracode, but there are some entries encoded with 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2856
Simplified Soundex. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2857
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2858
The HW-rule was documented as a standard in 1910, but actually data of 1880, 1900 and 1910 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2859
censuses were encoded with mixed methods.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2860
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2861
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2862
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2863
!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2864
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2865
phoneticStringsFor:aString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2866
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2867
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2868
    u := aString asUppercase.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2869
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2870
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2871
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2872
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2873
        (t notNil 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2874
        and:[ t ~= '0' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2875
        and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2876
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2877
            p size == 4 ifTrue:[^ Array with:p ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2878
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2879
        (c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2880
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2881
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2882
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2883
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2884
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2885
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2886
    ^ Array with:(p copyFrom:1 to:4)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2887
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2888
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2889
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2890
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2891
version
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2892
    ^ '$Header: /cvs/stx/stx/libbasic2/PhoneticStringUtilities.st,v 1.9 2009-10-01 08:46:37 cg Exp $'
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2893
!
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2894
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2895
version_CVS
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  2896
    ^ '$Header: /cvs/stx/stx/libbasic2/PhoneticStringUtilities.st,v 1.9 2009-10-01 08:46:37 cg Exp $'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2897
! !