PhoneticStringUtilities.st
author Claus Gittinger <cg@exept.de>
Tue, 01 Aug 2017 11:40:16 +0200
changeset 4488 51f2907c7389
parent 4487 908110f595e9
child 4489 2d7af11ffcd7
permissions -rw-r--r--
#BUGFIX by cg class: PhoneticStringUtilities class added: #miracodeCodeOf: comment/format in: #mySQLSoundexCodeOf: #soundexCodeOf: class: PhoneticStringUtilities::DoubleMetaphoneStringComparator removed: #decrementSkipCount #incrementSkipCount #incrementSkipCount: #incrementStartIndex comment/format in: #addPrimaryTranslation: #addSecondaryTranslation: #keyAt: #processB #processD #processJ #processL #processM #processR #processX #processZ changed:18 methods class: PhoneticStringUtilities::DoubleMetaphoneStringComparator class comment/format in: #documentation changed: #isSlavoGermanic: class: PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class definition added: #encode: removed: #phoneticStringsFor: comment/format in: #convertFirst: changed: #convertRest: class: PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class added: #examples comment/format in: #documentation class: PhoneticStringUtilities::MRAStringComparator class definition added: #encode: class: PhoneticStringUtilities::MRAStringComparator class added: #documentation #rCode class: PhoneticStringUtilities::MiracodeStringComparator added: #encode: removed: #phoneticStringsFor: class: PhoneticStringUtilities::MySQLSoundexStringComparator added: #encode: removed: #phoneticStringsFor: class: PhoneticStringUtilities::NYSIISStringComparator class definition added: #encode: removed: #phoneticStringsFor: class: PhoneticStringUtilities::PhonemStringComparator class definition added: #encode: removed: #phoneticStringsFor: class: PhoneticStringUtilities::SingleResultPhoneticStringComparator class definition added: #encode: #phoneticStringsFor: class: PhoneticStringUtilities::SingleResultPhoneticStringComparator class added: #documentation class: PhoneticStringUtilities::SoundexStringComparator class definition added: #encode: removed: #phoneticStringsFor:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
     1
"{ Encoding: utf8 }"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
     2
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    17
"{ NameSpace: Smalltalk }"
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    18
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	instanceVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	classVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	poolDictionaries:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    26
Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    27
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    28
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    29
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    30
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    31
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    32
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    33
PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    34
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    35
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    36
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    37
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    38
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    39
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    40
PhoneticStringUtilities::PhoneticStringComparator subclass:#SingleResultPhoneticStringComparator
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    41
	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    42
	classVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    43
	poolDictionaries:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    44
	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    45
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    46
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    47
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MRAStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    48
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    49
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    50
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    51
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    52
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    53
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    54
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SoundexStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    55
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    56
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    57
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    58
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    59
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    60
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    61
PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    62
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    63
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    64
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    65
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    66
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    67
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    68
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#NYSIISStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    69
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    70
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    71
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    72
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    73
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    74
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    75
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#PhonemStringComparator
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    76
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    77
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    78
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    79
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    80
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    81
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    82
PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    83
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    84
		currentIndex skipCount'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    85
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    86
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    87
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    88
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    89
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    90
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    91
	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    92
	classVariableNames:'CharacterTranslationDict'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    93
	poolDictionaries:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    94
	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    95
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    96
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    97
PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    98
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    99
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   100
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   101
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   102
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   103
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
copyright
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
documentation
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
"
2445
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   123
    Utilities which are helpful to perform phonetic string searches or comparisons.
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   124
    These are all variations or improvements of the soundex algorithm, which usually fails
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   125
    to provide good results for non-english languages.
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
   126
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   127
    soundexCode
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   128
        this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   129
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   130
    nysiis
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   131
        a modified soundex algorithm
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   132
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   133
    miracode
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   134
        another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   135
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   136
    mySQLSoundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   137
        another modified soundex algorithm used in mySQL.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   138
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   139
    koelner phoneticCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   140
        provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   141
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   142
    Double metaphone 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   143
        works with most european languages.
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   144
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   145
    phonem
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   146
        described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   147
        from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   148
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   149
    More info for german readers is found in:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   150
        http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   151
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   152
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   153
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   154
sampleData
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   155
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   156
    for the 50 most common german names, we get:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   157
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   158
                            ext. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   159
    name        soundex   soundex   metaphone   phonet  phonet2     phonix      daitsch phonem      koeln
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   160
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   161
    müller      M460    54600000    MLR         MÜLA    NILA        M4000000    689000  MYLR        657
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   162
    schmidt     S253    25300000    SKMTT       SHMIT   ZNIT        S5300000    463000  CMYD        8628
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   163
    schneider   S253    25360000    SKNTR       SHNEIDA ZNEITA      S5300000    463900  CNAYDR      8627
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   164
    fischer     F260    12600000    FSKR        FISHA   FIZA        F8000000    749000  VYCR        387
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   165
    weber       W160    16000000    WBR         WEBA    FEBA        $1000000    779000  VBR         317
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   166
    meyer       M600    56000000    MYR         MEIA    NEIA        M0000000    619000  MAYR        67
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   167
    wagner      W256    25600000    WKNR        WAKNA   FAKNA       $2500000    756900  VACNR       367
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   168
    schulz      S242    24200000    SKLS        SHULS   ZULZ        S4800000    484000  CULC        85
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   169
    becker      B260    12600000    BKR         BEKA    BEKA        B2000000    759000  BCR         147
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   170
    hoffmann    H155    15500000    HFMN        HOFMAN  UFNAN       $7550000    576600  OVMAN       036
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   171
    schäfer     S216    21600000    SKFR        SHEFA   ZEFA        S7000000    479000  CVR         837
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
    "return a koelner phonetic code.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
     The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
     it returns simular strings for similar sounding words. 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
     There are some differences to soundex, though: 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
        its length is not limited to 4, but depends on the length of the original string;
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   183
        it does not start with the first character of the input.
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   184
     This algorithm is described by Postel 1969"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   186
    ^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
     #(
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   190
        'Müller'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
        'Miller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   193
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   194
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   195
        'Mülherr'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
        'Myler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
        'Millar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
        'Myller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   199
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   200
        'Müler'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
        'Muehler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   202
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   203
        'Müllerr'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
        'Muehlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
        'Muellar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
        'Mueler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   207
        'Mülleer'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   209
        'Nüller'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   210
        'Nyller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   211
        'Niler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
        'Czerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   213
        'Tscherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   214
        'Czernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   215
        'Tschernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   216
        'Schernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   217
        'Scherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   218
        'Scherno'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   219
        'Czerne'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   220
        'Zerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   221
        'Tzernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   222
        'Breschnew'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   223
     ) do:[:w |
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   224
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
     ].
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
    "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   229
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   230
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   231
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   232
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   233
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   234
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   235
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   236
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   237
!
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   238
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   239
miracodeCodeOf:aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   240
    "return a miracode soundex phonetic code or nil.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   241
     Miracode is a slightly modified soundex algorithm.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   242
     Notice that there are better algorithms around (doubleMetaphone) "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   243
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   244
    ^ (MiracodeStringComparator new phoneticStringsFor:aString) first
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   245
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   246
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   247
     PhoneticStringUtilities miracodeCodeOf:'claus'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   248
     PhoneticStringUtilities miracodeCodeOf:'clause'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   249
     PhoneticStringUtilities miracodeCodeOf:'close'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   250
     PhoneticStringUtilities miracodeCodeOf:'smalltalk' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   251
     PhoneticStringUtilities miracodeCodeOf:'smaltalk'  
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   252
     PhoneticStringUtilities miracodeCodeOf:'smaltak'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   253
     PhoneticStringUtilities miracodeCodeOf:'smaltok'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   254
     PhoneticStringUtilities miracodeCodeOf:'smoltok'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   255
     PhoneticStringUtilities miracodeCodeOf:'aa'        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   256
     PhoneticStringUtilities miracodeCodeOf:'by'        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   257
     PhoneticStringUtilities miracodeCodeOf:'bab'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   258
     PhoneticStringUtilities miracodeCodeOf:'bob'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   259
     PhoneticStringUtilities miracodeCodeOf:'bop'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   260
     PhoneticStringUtilities miracodeCodeOf:'pub'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   261
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   262
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   263
    "Created: / 28-07-2017 / 15:32:41 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   264
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   265
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   266
mySQLSoundexCodeOf:aString
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   267
    "return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   268
     (no 4char limitation; different order of duplicate vowel vs. duplicate code elimination).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   269
     Notice that there are better algorithms around (doubleMetaphone) "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   270
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   271
    ^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   272
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   273
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   274
     #(
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   275
        'Müller'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   276
        'Miller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   277
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   278
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   279
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   280
        'Mülherr'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   281
        'Myler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   282
        'Millar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   283
        'Myller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   284
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   285
        'Müler'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   286
        'Muehler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   287
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   288
        'Müllerr'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   289
        'Muehlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   290
        'Muellar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   291
        'Mueler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   292
        'Mülleer'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   293
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   294
        'Nüller'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   295
        'Nyller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   296
        'Niler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   297
        'Czerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   298
        'Tscherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   299
        'Czernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   300
        'Tschernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   301
        'Schernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   302
        'Scherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   303
        'Scherno'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   304
        'Czerne'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   305
        'Zerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   306
        'Tzernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   307
        'Breschnew'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   308
     ) do:[:w |
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   309
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   310
     ].
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   311
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   312
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   313
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   314
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   315
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   316
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   317
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   318
     PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'. 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   319
     PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'. 
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   320
     PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   321
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   322
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   323
    "Modified (comment): / 28-07-2017 / 15:34:03 / cg"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   324
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   325
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   326
soundexCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   327
    "return a soundex phonetic code or nil.
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   328
     Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   329
     tool when searching for words where the correct spelling is unknown.
4194
12b5e3e2219b #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4184
diff changeset
   330
     (read Knuth or search the web if you don't know what a soundex code is).
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   331
     Caveat: 'similar sounding words' means: 'similar sounding in english'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   332
     Notice that there are better algorithms around (doubleMetaphone) "
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   333
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   334
    ^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   335
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   336
"/ old code - now use code in private class...
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   337
"/    |inStream codeStream ch last lch codeLength codes code lastCode|
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   338
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   339
"/    inStream := aString readStream.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   340
"/    inStream skipSeparators.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   341
"/    inStream atEnd ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   342
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   343
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   344
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   345
"/    ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   346
"/    ch isLetter ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   347
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   348
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   349
"/    codeLength := 0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   350
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   351
"/    codes := Dictionary new.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   352
"/    codes atAll:'bpfv'     put:$1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   353
"/    codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   354
"/    codes atAll:'dt'       put:$3.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   355
"/    codes atAll:'l'        put:$4.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   356
"/    codes atAll:'nm'       put:$5.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   357
"/    codes atAll:'r'        put:$6.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   358
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   359
"/    codeStream := WriteStream on:(String new:4).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   360
"/    codeStream nextPut:(ch asUppercase).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   361
"/    last := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   362
"/    lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   363
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   364
"/    [inStream atEnd] whileFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   365
"/        ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   366
"/        lch := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   367
"/        lch = last ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   368
"/            last := lch.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   369
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   370
"/            code := codes at:lch ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   371
"/            (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   372
"/                codeLength < 3 ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   373
"/                    codeStream nextPut:code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   374
"/                    codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   375
"/                    codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   376
"/                ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   377
"/            ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   378
"/            lastCode := code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   379
"/        ]
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   380
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   381
"/    [ codeLength < 3 ] whileTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   382
"/        codeStream nextPut:$0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   383
"/        codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   384
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   385
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   386
"/    ^ codeStream contents
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   387
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   388
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   389
     PhoneticStringUtilities soundexCodeOf:'claus'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   390
     PhoneticStringUtilities soundexCodeOf:'clause'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   391
     PhoneticStringUtilities soundexCodeOf:'close'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   392
     PhoneticStringUtilities soundexCodeOf:'smalltalk' 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   393
     PhoneticStringUtilities soundexCodeOf:'smaltalk'  
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   394
     PhoneticStringUtilities soundexCodeOf:'smaltak'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   395
     PhoneticStringUtilities soundexCodeOf:'smaltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   396
     PhoneticStringUtilities soundexCodeOf:'smoltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   397
     PhoneticStringUtilities soundexCodeOf:'aa'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   398
     PhoneticStringUtilities soundexCodeOf:'by'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   399
     PhoneticStringUtilities soundexCodeOf:'bab'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   400
     PhoneticStringUtilities soundexCodeOf:'bob'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   401
     PhoneticStringUtilities soundexCodeOf:'bop'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   402
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   403
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   404
    "Modified (comment): / 28-07-2017 / 15:33:53 / cg"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   405
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   406
3648
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   407
!PhoneticStringUtilities class methodsFor:'queries'!
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   408
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   409
isUtilityClass
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   410
    ^ self == PhoneticStringUtilities
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   411
! !
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   412
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   413
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   414
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   415
defaultClass
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   416
	^SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   417
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   418
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   419
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'documentation'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   420
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   421
documentation
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   422
"
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   423
    abstract superclass for various phonetic comparators.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   424
    They returns similar strings for similar sounding words, which can be used
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   425
    to find similar sounding words in a search list.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   426
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   427
    Notice, that some comparators are better for particular languages.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   428
"
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   429
!
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   430
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   431
examples
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   432
"
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   433
     PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   434
            does:'miller' soundLike:'miler'.   
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   435
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   436
     PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   437
            does:'miller' soundLike:'milner'.   
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   438
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   439
     PhoneticStringUtilities::SoundexStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   440
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   441
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   442
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   443
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   444
"
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   445
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   446
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   447
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   448
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   449
new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   450
    ^ self basicNew initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   451
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   452
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   453
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'queries'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   454
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   455
isAbstract
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   456
    ^ self == PhoneticStringUtilities::PhoneticStringComparator
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   457
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   458
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   459
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   460
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   461
does:aString soundLike:anotherString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   462
    |translations1 translations2|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   463
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   464
    translations1 := self phoneticStringsFor:aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   465
    translations2 := self phoneticStringsFor:anotherString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   466
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   467
    ^ translations1 contains:[:t1 | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   468
        translations2 contains:[:t2 | t1 = t2]]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   469
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   470
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   471
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   472
            does:'miller' soundLike:'miler'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   473
            
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   474
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   475
            does:'miller' soundLike:'milner'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   476
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   477
     PhoneticStringUtilities::SoundexStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   478
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   479
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   480
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   481
            does:'müller' soundLike:'mueller'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   482
    "
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   483
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   484
    "Modified (comment): / 13-07-2017 / 17:51:43 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   485
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   486
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   487
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   488
    "Should answer an array of alternate phonetic strings for the given input string."
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   489
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   490
    self subclassResponsibility
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   491
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   492
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   493
     (PhoneticStringUtilities::SoundexStringComparator new
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   494
            phoneticStringsFor:'miller') first 
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   495
            
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   496
     'miller' asSoundexCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   497
    "
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   498
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   499
    "Modified (comment): / 27-07-2017 / 15:07:59 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   500
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   501
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   502
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   503
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   504
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   505
    "Invoked when a new instance is created."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   506
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   507
    "/ please change as required (and remove this comment)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   508
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   509
    "/ super initialize.   -- commented since inherited method does nothing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   510
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   511
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   512
!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   513
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   514
documentation
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   515
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   516
    There are many extended and enhanced soundex variants around;
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   517
    here is one, called 'extended soundex'. It is destribed for example in
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   518
    http://www.epidata.dk/documentation.php.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   519
    An author or origin is unknown.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   520
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   521
    The number of digits is increased to 5 or 8;
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   522
    The first character is not used literally; instead it is encoded like the rest.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   523
    This might have a negative effect on names starting with a vovel, though.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   524
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   525
    Overall, it can be doubted if this is really an enhancement after all.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   526
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   527
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   528
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   529
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   530
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   531
phoneticStringsFor:aString
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   532
    "generates both an extended soundex of length 5 and one of length 8"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   533
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   534
    |first second u t prevCode|
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   535
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   536
    u := aString asUppercase.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   537
    first := second := ''.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   538
    u do:[:c | 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   539
        t := self translate:c.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   540
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   541
            first := first , t.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   542
            second := second , t.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   543
            second size == 8 ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   544
                ^ Array with:(first copyTo:5) with:second 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   545
            ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   546
        ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   547
        prevCode := t
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   548
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   549
    [ first size < 5 ] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   550
        first := first , '0'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   551
        second := second , '0'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   552
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   553
    [ second size < 8 ] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   554
        second := second , '0'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   555
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   556
    ^ Array with:first with:second
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   557
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   558
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   559
     self basicNew phoneticStringsFor:'müller'  #('87900' '87900000')  
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   560
     self basicNew phoneticStringsFor:'miller'  #('87900' '87900000')   
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   561
     self basicNew phoneticStringsFor:'muller'  #('87900' '87900000')    
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   562
     self basicNew phoneticStringsFor:'muler'   #('87900' '87900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   563
     self basicNew phoneticStringsFor:'schmidt'    #('38600' '38600000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   564
     self basicNew phoneticStringsFor:'schneider'  #('38690' '38690000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   565
     self basicNew phoneticStringsFor:'fischer'    #('23900' '23900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   566
     self basicNew phoneticStringsFor:'weber'      #('19000' '19000000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   567
     self basicNew phoneticStringsFor:'meyer'      #('89000' '89000000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   568
     self basicNew phoneticStringsFor:'wagner'     #('48900' '48900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   569
     self basicNew phoneticStringsFor:'schulz'     #('37500' '37500000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   570
     self basicNew phoneticStringsFor:'becker'     #('13900' '13900000')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   571
     self basicNew phoneticStringsFor:'hoffmann'   #('28800' '28800000')
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   572
     self basicNew phoneticStringsFor:'schäfer'    #('32900' '32900000')
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   573
    "
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   574
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   575
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   576
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   577
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   578
translate:aCharacter
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   579
    "use simple if's for more speed when compiled"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   580
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   581
    "vowels serve as separators"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   582
    aCharacter == $A ifTrue:[^ '0' ].         
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   583
    aCharacter == $E ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   584
    aCharacter == $I ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   585
    aCharacter == $O ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   586
    aCharacter == $U ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   587
    aCharacter == $Y ifTrue:[^ '0' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   588
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   589
    aCharacter == $B ifTrue:[^ '1' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   590
    aCharacter == $P ifTrue:[^ '1' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   591
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   592
    aCharacter == $F ifTrue:[^ '2' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   593
    aCharacter == $V ifTrue:[^ '2' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   594
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   595
    aCharacter == $C ifTrue:[^ '3' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   596
    aCharacter == $S ifTrue:[^ '3' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   597
    aCharacter == $K ifTrue:[^ '3' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   598
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   599
    aCharacter == $G ifTrue:[^ '4' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   600
    aCharacter == $J ifTrue:[^ '4' ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   601
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   602
    aCharacter == $Q ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   603
    aCharacter == $X ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   604
    aCharacter == $Z ifTrue:[^ '5' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   605
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   606
    aCharacter == $D ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   607
    aCharacter == $G ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   608
    aCharacter == $T ifTrue:[^ '6' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   609
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   610
    aCharacter == $L ifTrue:[^ '7' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   611
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   612
    aCharacter == $M ifTrue:[^ '8' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   613
    aCharacter == $N ifTrue:[^ '8' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   614
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   615
    aCharacter == $R ifTrue:[^ '9' ]. 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   616
    ^ nil
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   617
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   618
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   619
!PhoneticStringUtilities::SingleResultPhoneticStringComparator class methodsFor:'documentation'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   620
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   621
documentation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   622
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   623
    documentation to be added.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   624
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   625
    [author:]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   626
        cg
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   627
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   628
    [instance variables:]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   629
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   630
    [class variables:]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   631
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   632
    [see also:]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   633
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   634
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   635
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   636
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   637
!PhoneticStringUtilities::SingleResultPhoneticStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   638
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   639
encode:word
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   640
    ^ self subclassResponsibility
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   641
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   642
    "Created: / 28-07-2017 / 15:20:49 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   643
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   644
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   645
phoneticStringsFor:word 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   646
    ^ Array with:(self encode:word)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   647
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   648
    "Created: / 28-07-2017 / 15:20:38 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   649
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   650
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   651
!PhoneticStringUtilities::MRAStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   652
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   653
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   654
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   655
    Match Rating Approach Encoder
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   656
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   657
    The Western Airlines matching rating approach name encoder
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   658
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   659
    [see also:]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   660
        https://en.wikipedia.org/wiki/Match_Rating_Approach
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   661
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   662
        G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   663
            ''Accessing Individual Records from Personal Data Files Using Nonunique Identifiers'' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   664
            US National Institute of Standards and Technology, SP-500-2 (1977), p. 17.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   665
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   666
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   667
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   668
rCode
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   669
"<<END
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   670
## Copyright (c) 2015, James P. Howard, II <jh@jameshoward.us>
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   671
##
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   672
## Redistribution and use in source and binary forms, with or without
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   673
## modification, are permitted provided that the following conditions are
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   674
## met:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   675
##
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   676
##     Redistributions of source code must retain the above copyright
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   677
##     notice, this list of conditions and the following disclaimer.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   678
##
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   679
##     Redistributions in binary form must reproduce the above copyright
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   680
##     notice, this list of conditions and the following disclaimer in
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   681
##     the documentation and/or other materials provided with the
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   682
##     distribution.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   683
##
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   684
## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   685
## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   686
## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   687
## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   688
## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   689
## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   690
## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   691
## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   692
## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   693
## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   694
## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   695
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   696
#' @rdname mra
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   697
#' @title Match Rating Approach Encoder
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   698
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   699
#' @description
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   700
#' The Western Airlines matching rating approach name encoder
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   701
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   702
#' @param word string or vector of strings to encode
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   703
#' @param x MRA-encoded character vector
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   704
#' @param y MRA-encoded character vector
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   705
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   706
#' @details
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   707
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   708
#' The variable \code{word} is the name to be encoded.  The variable
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   709
#' \code{maxCodeLen} is \emph{not} supported in this algorithm encoder
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   710
#' because the algorithm itself is dependent upon its six-character
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   711
#' length.  The variables \code{x} and \code{y} are MRA-encoded and are
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   712
#' compared to each other using the MRA comparison specification.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   713
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   714
#' @return The \code{mra_encode} function returns match rating approach
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   715
#' encoded character vector.  The \code{mra_compare} returns a boolean
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   716
#' vector which is \code{TRUE} if \code{x} and \code{y} pass the MRA
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   717
#' comparison test.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   718
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   719
#' @references
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   720
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   721
#' G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   722
#' \emph{Accessing Individual Records from Personal Data Files Using
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   723
#' Nonunique Identifiers,} US National Institute of Standards and
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   724
#' Technology, SP-500-2 (1977), p. 17.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   725
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   726
#' @family phonics
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   727
#'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   728
#' @examples
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   729
#' mra_encode("William")
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   730
#' mra_encode(c("Peter", "Peady"))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   731
#' mra_encode("Stevenson")
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   732
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   733
#' @rdname mra
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   734
#' @name mra_encode
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   735
#' @export
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   736
mra_encode <- function(word) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   737
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   738
    ## First, remove any nonalphabetical characters and uppercase it
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   739
    word <- gsub("[^[:alpha:]]*", "", word)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   740
    word <- toupper(word)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   741
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   742
    ## First character of key = first character of name
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   743
    first <- substr(word, 1, 1)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   744
    word <- substr(word, 2, nchar(word))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   745
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   746
    ## Delete vowels not at the start of the word
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   747
    word <- gsub("[AEIOU]", "", word)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   748
    word <- paste(first, word, sep = "")
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   749
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   750
    ## Remove duplicate consecutive characters
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   751
    word <- gsub("([A-Z])\\1+", "\\1", word)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   752
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   753
    ## If longer than 6 characters, take first and last 3...and we have
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   754
    ## to vectorize it
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   755
    for(i in 1:length(word)) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   756
        if((l = nchar(word[i])) > 6) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   757
            first <- substr(word[i], 1, 3)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   758
            last <- substr(word[i], l - 2, l)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   759
            word[i] <- paste(first, last, sep = "");
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   760
        }
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   761
    }
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   762
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   763
    return(word)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   764
}
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   765
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   766
#' @rdname mra
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   767
#' @name mra_compare
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   768
#' @export
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   769
mra_compare <- function(x, y) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   770
    mra <- data.frame(x = x, y = y, sim = 0, min = 100, stringsAsFactors = FALSE)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   771
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   772
    ## Obtain the minimum rating value by calculating the length sum of
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   773
    ## the encoded strings and using table A (from Wikipedia).  We start
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   774
    ## by setting the minimum to be the sum and move from there.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   775
    mra$lensum <- nchar(mra$x) + nchar(mra$y)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   776
    mra$min[mra$lensum == 12] <- 2
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   777
    mra$min[mra$lensum > 7 && mra$lensum <= 11] <- 3
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   778
    mra$min[mra$lensum > 4 && mra$lensum <= 7] <- 4
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   779
    mra$min[mra$lensum <= 4] <- 5
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   780
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   781
    ## If the length difference between the encoded strings is 3 or
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   782
    ## greater, then no similarity comparison is done.  For us, we
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   783
    ## continue the similarity comparison out of laziness and ensure the
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   784
    ## minimum is impossibly high to meet.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   785
    mra$min[abs(nchar(mra$x) - nchar(mra$y)) >= 3] <- 100
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   786
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   787
    ## Start the comparison.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   788
    x <- strsplit(mra$x, split = "")
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   789
    y <- strsplit(mra$y, split = "")
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   790
    rows <- nrow(mra)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   791
    for(i in 1:rows) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   792
        ## Process the encoded strings from left to right and remove any
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   793
        ## identical characters found from both strings respectively.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   794
        j <- 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   795
        while(j < min(length(x[[i]]), length(y[[i]]))) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   796
            if(x[[i]][j] == y[[i]][j]) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   797
                x[[i]] <- x[[i]][-j]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   798
                y[[i]] <- y[[i]][-j]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   799
            } else
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   800
                j <- j + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   801
        }
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   802
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   803
        ## Process the unmatched characters from right to left and
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   804
        ## remove any identical characters found from both names
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   805
        ## respectively.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   806
        x[[i]] <- rev(x[[i]])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   807
        y[[i]] <- rev(y[[i]])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   808
        j <- 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   809
        while(j < min(length(x[[i]]), length(y[[i]]))) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   810
            if(x[[i]][j] == y[[i]][j]) {
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   811
                x[[i]] <- x[[i]][-j]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   812
                y[[i]] <- y[[i]][-j]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   813
            } else
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   814
                j <- j + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   815
        }
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   816
        ## Subtract the number of unmatched characters from 6 in the
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   817
        ## longer string. This is the similarity rating.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   818
        len <- min(length(x[[i]]), length(y[[i]]))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   819
        mra$sim[i] <- 6 - len
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   820
    }
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   821
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   822
    ## If the similarity is greater than or equal to the minimum
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   823
    ## required, it is a successful match.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   824
    mra$match <- (mra$sim >= mra$min)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   825
    return(mra$match)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   826
}
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   827
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   828
END>>
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   829
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   830
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   831
!PhoneticStringUtilities::MRAStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   832
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   833
encode:wordIn 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   834
    "see https://en.wikipedia.org/wiki/Match_Rating_Approach"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   835
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   836
    |word prev|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   837
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   838
    word := wordIn.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   839
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   840
    "/ First, remove any nonalphabetical characters and uppercase it
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   841
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   842
    word := word select:#isLetter thenCollect:#asUppercase.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   843
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   844
    "/ Delete vowels not at the start of the word
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   845
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   846
    word := word first asString , ((word from:2) reject:#isVowel).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   847
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   848
    "/ Remove duplicate consecutive characters
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   849
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   850
    prev := nil.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   851
    word := word 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   852
                collect:[:char |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   853
                    char == prev ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   854
                        $*
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   855
                    ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   856
                        prev := char.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   857
                        char.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   858
                    ].    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   859
                ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   860
                thenSelect:[:char | char ~~ $*].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   861
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   862
    "/ If longer than 6 characters, take first and last 3            
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   863
    word size > 6 ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   864
        word := (word copyFirst:3),(word copyLast:3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   865
    ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   866
    ^ word.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   867
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   868
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   869
     self new encode:'Catherine'            -> 'CTHRN'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   870
     self new encode:'CatherineCatherine'   -> 'CTHHRN'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   871
     self new encode:'Butter'               -> 'BTR'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   872
     self new encode:'Byrne'                -> 'BYRN'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   873
     self new encode:'Boern'                -> 'BRN'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   874
     self new encode:'Smith'                -> 'SMTH'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   875
     self new encode:'Smyth'                -> 'SMYTH'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   876
     self new encode:'Kathryn'              -> 'KTHRYN'
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   877
    "
4486
d4624d2ed9f1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4485
diff changeset
   878
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   879
    "Created: / 28-07-2017 / 15:19:22 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   880
    "Modified (comment): / 31-07-2017 / 15:14:31 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   881
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   882
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   883
!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   884
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   885
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   886
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   887
    WARNING: this is the so called 'simplified soundex' algorithm;
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   888
      there are more variants like miracode (american soundex) or mysqlSoundex around.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   889
      Be sure to use the correct algorithm, if the generated strings must be compatible
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   890
      (otherwise, the differences are probably too small to be noticed as effect, but
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   891
      your search will be different)
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   892
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   893
    The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   894
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   895
    SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   896
    components of names, but by doing so reports more matches. 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   897
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   898
    There are some variations around in the literature; 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   899
    the following is called 'simplified soundex', and the rules for coding a name are:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   900
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   901
    1. The first letter of the name is used in its un-coded form to serve as the prefix
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   902
       character of the code. (The rest of the code is numerical).
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   903
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   904
    2. Thereafter, W and H are ignored entirely.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   905
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   906
    3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   907
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   908
    4. Other letters of the name are converted to a numerical equivalent:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   909
                 B, P, F, V              1 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   910
                 C, G, J, K, Q, S, X, Z  2 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   911
                 D, T                    3 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   912
                 L                       4 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   913
                 M, N                    5 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   914
                 R                       6 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   915
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   916
    5. There are two exceptions: 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   917
        1. Letters that follow prefix letters which would, if coded, have the same
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   918
           numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   919
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   920
        2. The second letter of any pair of consonants having the same code number is likewise ignored, 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   921
           i.e. unless there is a ''separator'' between them in the name.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   922
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   923
    6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   924
       Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   925
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   926
    Notice, that in another variant, w and h are treated slightly differently.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   927
    This is only of relevance, if you need to reconstruct original soundex codes of other programs
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
   928
    or for the original 1880 us census data.
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   929
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   930
    Also notice, that soundex deals better with english. 
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   931
    For german and other languages, other algorithms may provide better results.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   932
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   933
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   934
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   935
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   936
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   937
encode:word 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   938
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   939
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   940
    u := word asUppercase.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   941
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   942
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   943
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   944
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   945
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   946
            p := p , t.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   947
            p size == 4 ifTrue:[^ p ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   948
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   949
        prevCode := t
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   950
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   951
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   952
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   953
    ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   954
    ^ (p copyFrom:1 to:4)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   955
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   956
    "Created: / 28-07-2017 / 15:21:23 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   957
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   958
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   959
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   960
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   961
translate:aCharacter
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   962
    "use simple if's for more speed when compiled"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   963
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   964
    "vowels serve as separators"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   965
    aCharacter == $A ifTrue:[^ '0' ].         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   966
    aCharacter == $E ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   967
    aCharacter == $I ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   968
    aCharacter == $O ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   969
    aCharacter == $U ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   970
    aCharacter == $Y ifTrue:[^ '0' ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   971
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   972
    aCharacter == $B ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   973
    aCharacter == $P ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   974
    aCharacter == $F ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   975
    aCharacter == $V ifTrue:[^ '1' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   976
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   977
    aCharacter == $C ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   978
    aCharacter == $S ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   979
    aCharacter == $K ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   980
    aCharacter == $G ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   981
    aCharacter == $J ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   982
    aCharacter == $Q ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   983
    aCharacter == $X ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   984
    aCharacter == $Z ifTrue:[^ '2' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   985
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   986
    aCharacter == $D ifTrue:[^ '3' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   987
    aCharacter == $T ifTrue:[^ '3' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   988
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   989
    aCharacter == $L ifTrue:[^ '4' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   990
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   991
    aCharacter == $M ifTrue:[^ '5' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   992
    aCharacter == $N ifTrue:[^ '5' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   993
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   994
    aCharacter == $R ifTrue:[^ '6' ]. 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   995
    ^ nil
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   996
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   997
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   998
!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   999
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1000
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1001
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1002
    MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1003
    and also removing vokals first, then removing duplicate codes
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1004
    (whereas the soundex code does this in reverse order).
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1005
4133
eda6f1bfc8d2 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 3839
diff changeset
  1006
    These variations are important, if you need the miracode soundex codes to be generated.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1007
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1008
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1009
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1010
!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1011
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1012
encode:word 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1013
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1014
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1015
    u := word asUppercase.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1016
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1017
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1018
    u from:2 to:u size do:[:c |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1019
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1020
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1021
            p := p , t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1022
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1023
        (t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1024
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1025
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1026
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1027
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1028
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1029
    ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1030
    ^ p
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1031
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1032
    "Created: / 28-07-2017 / 15:23:41 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1033
    "Modified: / 31-07-2017 / 17:53:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1034
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1035
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1036
!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1037
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1038
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1039
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1040
    NYSIIS Algorithm:
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1041
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1042
    1.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1043
        remove all ''S'' and ''Z'' chars from the end of the surname 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1044
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1045
    2.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1046
        transcode initial strings
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1047
            MAC => MC
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1048
            PF => F
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1049
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1050
    3.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1051
        Transcode trailing strings as follows,
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1052
        
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1053
            IX => IC
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1054
            EX => EC
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1055
            YE,EE,IE => Y
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1056
            NT,ND => D 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1057
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1058
    4.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1059
        transcode ''EV'' to ''EF'' if not at start of name
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1060
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1061
    5.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1062
        use first character of name as first character of key 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1063
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1064
    6.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1065
        remove any ''W'' that follows a vowel 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1066
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1067
    7.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1068
        replace all vowels with ''A'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1069
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1070
    8.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1071
        transcode ''GHT'' to ''GT'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1072
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1073
    9.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1074
        transcode ''DG'' to ''G'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1075
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1076
    10.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1077
        transcode ''PH'' to ''F'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1078
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1079
    11.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1080
        if not first character, eliminate all ''H'' preceded or followed by a vowel 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1081
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1082
    12.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1083
        change ''KN'' to ''N'', else ''K'' to ''C'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1084
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1085
    13.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1086
        if not first character, change ''M'' to ''N'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1087
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1088
    14.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1089
        if not first character, change ''Q'' to ''G'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1090
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1091
    15.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1092
        transcode ''SH'' to ''S'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1093
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1094
    16.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1095
        transcode ''SCH'' to ''S'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1096
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1097
    17.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1098
        transcode ''YW'' to ''Y'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1099
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1100
    18.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1101
        if not first or last character, change ''Y'' to ''A'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1102
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1103
    19.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1104
        transcode ''WR'' to ''R'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1105
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1106
    20.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1107
        if not first character, change ''Z'' to ''S'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1108
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1109
    21.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1110
        transcode terminal ''AY'' to ''Y'' 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1111
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1112
    22.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1113
        remove traling vowels 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1114
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1115
    23.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1116
        collapse all strings of repeated characters 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1117
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1118
    24.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1119
        if first char of original surname was a vowel, append it to the code
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1120
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1121
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1122
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1123
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1124
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1125
encode:aString 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1126
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1127
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1128
    k := self rule1:(aString asUppercase).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1129
    k := self rule2:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1130
    k := self rule3:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1131
    k := self rule4:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1132
    k := self rule5:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1133
    k := self rule6:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1134
    k := self rule7:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1135
    k := self rule8:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1136
    k := self rule9:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1137
    k := self rule10:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1138
    k := self rule11:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1139
    k := self rule12:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1140
    k := self rule13:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1141
    k := self rule14:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1142
    k := self rule15:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1143
    k := self rule16:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1144
    k := self rule17:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1145
    k := self rule18:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1146
    k := self rule19:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1147
    k := self rule20:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1148
    k := self rule21:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1149
    k := self rule22:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1150
    k := self rule23:k.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1151
    k := self rule24:k originalKey:aString.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1152
    ^ k
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1153
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1154
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1155
     self new encode:'hello'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1156
     self new encode:'bliss'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1157
    "
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1158
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1159
     self new phoneticStringsFor:'hello'
3839
6874980a5d05 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3685
diff changeset
  1160
     self new phoneticStringsFor:'bliss'
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1161
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1162
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1163
    "Created: / 28-07-2017 / 15:34:52 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1164
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1165
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1166
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1167
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1168
rule10:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1169
    "10. transcode 'PH' to 'F' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1170
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1171
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1172
        transcodeAll:'PH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1173
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1174
        to:'F'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1175
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1176
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1177
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1178
rule11:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1179
    |k c|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1180
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1181
    "11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1182
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1183
    c := SortedCollection sortBlock:[:a :b | b < a ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1184
    2 to:key size do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1185
        (key at:i) = $H ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1186
            ((key at:i - 1) isVowel 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1187
                or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1188
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1189
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1190
    c do:[:n | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1191
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1192
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1193
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1194
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1195
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1196
rule12:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1197
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1198
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1199
    "12. change 'KN' to 'N', else 'K' to 'C' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1200
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1201
                transcodeAll:'KN'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1202
                of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1203
                to:'K'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1204
                startingAt:1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1205
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1206
                transcodeAll:'K'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1207
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1208
                to:'C'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1209
                startingAt:1.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1210
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1211
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1212
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1213
rule13:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1214
    "13. if not first character, change 'M' to 'N' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1215
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1216
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1217
        transcodeAll:'M'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1218
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1219
        to:'N'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1220
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1221
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1222
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1223
rule14:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1224
    "14. if not first character, change 'Q' to 'G' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1225
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1226
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1227
        transcodeAll:'Q'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1228
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1229
        to:'G'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1230
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1231
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1232
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1233
rule15:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1234
    "15. transcode 'SH' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1235
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1236
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1237
        transcodeAll:'SH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1238
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1239
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1240
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1241
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1242
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1243
rule16:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1244
    "16. transcode 'SCH' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1245
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1246
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1247
        transcodeAll:'SCH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1248
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1249
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1250
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1251
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1252
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1253
rule17:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1254
    "17. transcode 'YW' to 'Y' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1255
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1256
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1257
        transcodeAll:'YW'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1258
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1259
        to:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1260
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1261
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1262
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1263
rule18:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1264
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1265
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1266
    "18. if not first or last character, change 'Y' to 'A' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1267
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1268
                transcodeAll:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1269
                of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1270
                to:'A'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1271
                startingAt:2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1272
    key last = $Y ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1273
        k at:k size put:$Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1274
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1275
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1276
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1277
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1278
rule19:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1279
    "19. transcode 'WR' to 'R' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1280
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1281
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1282
        transcodeAll:'WR'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1283
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1284
        to:'R'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1285
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1286
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1287
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1288
rule1:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1289
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1290
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1291
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1292
     "1. Remove all 'S' and 'Z' chars from the end of the name"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1293
    [
3839
6874980a5d05 #OTHER by cg
Claus Gittinger <cg@exept.de>
parents: 3685
diff changeset
  1294
        'SZ' includes:k last
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1295
    ] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1296
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1297
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1298
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1299
rule20:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1300
    "20. if not first character, change 'Z' to 'S' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1301
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1302
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1303
        transcodeAll:'Z'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1304
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1305
        to:'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1306
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1307
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1308
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1309
rule21:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1310
    "21. transcode terminal 'AY' to 'Y' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1311
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1312
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1313
        transcodeAll:'AY'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1314
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1315
        to:'Y'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1316
        startingAt:key size - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1317
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1318
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1319
rule22:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1320
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1321
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1322
    "22. remove trailing vowels "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1323
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1324
    [ k last isVowel ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1325
        k := k copyFrom:1 to:k size - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1326
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1327
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1328
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1329
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1330
rule23:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1331
    |k c|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1332
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1333
    "23. collapse all strings of repeated characters "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1334
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1335
    c := SortedCollection sortBlock:[:a :b | b < a ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1336
    k size to:2 do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1337
        (k at:i) = (k at:i - 1) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1338
            c add:i
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1339
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1340
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1341
    c do:[:n | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1342
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1343
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1344
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1345
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1346
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1347
rule24:key originalKey:originalKey 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1348
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1349
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1350
    "24. if first char of original surname was a vowel, append it to the code"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1351
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1352
    originalKey first isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1353
        k := k , originalKey first asString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1354
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1355
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1356
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1357
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1358
rule2:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1359
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1360
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1361
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1362
     "2. Transcode initial strings:  MAC => MC   PF => F"
4184
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1363
    (k startsWith:'MAC') ifTrue:[
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1364
        k := 'MC' , (k copyFrom:4)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1365
    ].
4184
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1366
    (k startsWith:'PF') ifTrue:[
c65ef322c227 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4133
diff changeset
  1367
        k := 'F' , (k copyFrom:3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1368
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1369
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1370
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1371
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1372
rule3:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1373
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1374
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1375
    "3. Transcode trailing strings as follows:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1376
        IX => IC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1377
          EX => EC
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1378
          YE, EE, IE => Y
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1379
           NT, ND => D"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1380
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1381
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1382
                transcodeTrailing:#( 'IX' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1383
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1384
                to:'IC'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1385
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1386
                transcodeTrailing:#( 'EX' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1387
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1388
                to:'EC'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1389
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1390
                transcodeTrailing:#( 'YE' 'EE' 'IE' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1391
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1392
                to:'Y'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1393
    k := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1394
                transcodeTrailing:#( 'NT' 'ND' )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1395
                of:k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1396
                to:'D'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1397
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1398
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1399
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1400
rule4:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1401
    "4. Transcode 'EV' to 'EF' if not at start of name"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1402
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1403
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1404
        transcodeAll:'EV'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1405
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1406
        to:'EF'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1407
        startingAt:2
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1408
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1409
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1410
rule5:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1411
    "5. Use first character of name as first character of key.  Ignored because we're doing an in-place conversion"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1412
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1413
    ^ key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1414
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1415
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1416
rule6:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1417
    |k i|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1418
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1419
    "6. Remove any 'W' that follows a vowel"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1420
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1421
    i := 2.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1422
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1423
        (i := k indexOf:$W startingAt:i) > 0
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1424
    ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1425
        (k at:i - 1) isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1426
            k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1427
            i := i - 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1428
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1429
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1430
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1431
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1432
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1433
rule7:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1434
    |k|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1435
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1436
    "7. replace all vowels with 'A' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1437
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1438
    1 to:key size do:[:i | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1439
        (key at:i) isVowel ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1440
            k at:i put:$A
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1441
        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1442
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1443
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1444
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1445
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1446
rule8:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1447
    "8. transcode 'GHT' to 'GT' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1448
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1449
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1450
        transcodeAll:'GHT'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1451
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1452
        to:'GT'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1453
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1454
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1455
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1456
rule9:key 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1457
    "9. transcode 'DG' to 'G' "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1458
    
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1459
    ^ self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1460
        transcodeAll:'DG'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1461
        of:key
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1462
        to:'G'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1463
        startingAt:1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1464
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1465
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1466
transcodeAll:aString of:key to:replacementString startingAt:start 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1467
    |k i|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1468
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1469
    k := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1470
    [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1471
        (i := k indexOfSubCollection:aString startingAt:start) > 0
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1472
    ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1473
        k := (k copyFrom:1 to:i - 1) , replacementString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1474
                    , (k copyFrom:i + aString size to:k size)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1475
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1476
    ^ k
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1477
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1478
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1479
transcodeTrailing:anArrayOfStrings of:key to:replacementString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1480
    |answer|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1481
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1482
    answer := key copy.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1483
    anArrayOfStrings do:[:aString | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1484
        answer := self 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1485
                    transcodeAll:aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1486
                    of:answer
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1487
                    to:replacementString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1488
                    startingAt:(answer size - aString size) + 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1489
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1490
    ^ answer
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1491
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1492
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1493
!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1494
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1495
documentation
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1496
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1497
    Implementation of the PHONEM algorithm, as described in
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1498
    'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1499
    Ein Programm fuer kontextsensitive phonetische Textumwandlung
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  1500
    ct Magazin fuer Computer & Technik 25/1998'
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1501
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1502
    This algorithm deals better with the german language (it cares for umlauts)
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1503
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1504
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1505
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1506
!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1507
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1508
encode:aString 
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1509
    |s idx t t2|
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1510
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1511
    s := aString asUppercase.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1512
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1513
    idx := 1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1514
    [idx < (s size-1)] whileTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1515
        t2 := nil.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1516
        t := s copyFrom:idx to:idx+1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1517
        t = 'SC' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1518
        ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1519
        ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1520
        ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1521
        ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1522
        ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1523
        ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1524
        ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1525
        ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1526
        ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1527
        ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1528
        ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1529
        ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1530
        ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1531
        ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1532
        ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1533
        ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1534
        t2 notNil ifTrue:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1535
            s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1536
        ] ifFalse:[
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1537
            idx := idx + 1.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1538
        ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1539
    ].
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1540
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1541
    "/ single character substitutions via tr
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1542
    s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1543
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1544
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1545
    ^ s
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1546
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1547
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1548
     self basicNew encode:'müller'  -> 'MYLR'    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1549
     self basicNew encode:'mueller' -> 'MYLR'    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1550
     self basicNew encode:'möller'  -> 'MYLR'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1551
     self basicNew encode:'miller'  -> 'MYLR'     
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1552
     self basicNew encode:'muller'  -> 'MULR' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1553
     self basicNew encode:'muler'   -> 'MULR' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1554
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1555
     self basicNew phoneticStringsFor:'müller'  #('MYLR')    
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1556
     self basicNew phoneticStringsFor:'mueller' #('MYLR')    
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1557
     self basicNew phoneticStringsFor:'möller'  #('MYLR')
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1558
     self basicNew phoneticStringsFor:'miller'  #('MYLR')     
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1559
     self basicNew phoneticStringsFor:'muller'  #('MULR') 
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1560
     self basicNew phoneticStringsFor:'muler'   #('MULR') 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1561
     
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1562
     self basicNew phoneticStringsFor:'schmidt'     #('CMYD')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1563
     self basicNew phoneticStringsFor:'schneider'   #('CNAYDR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1564
     self basicNew phoneticStringsFor:'fischer'     #('VYCR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1565
     self basicNew phoneticStringsFor:'weber'       #('VBR')
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1566
     self basicNew phoneticStringsFor:'weeber'      #('VBR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1567
     self basicNew phoneticStringsFor:'webber'      #('VBR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1568
     self basicNew phoneticStringsFor:'wepper'      #('VBR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1569
     
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1570
     self basicNew phoneticStringsFor:'meyer'       #('MAYR')
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1571
     self basicNew phoneticStringsFor:'maier'       #('MAYR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1572
     self basicNew phoneticStringsFor:'mayer'       #('MAYR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1573
     self basicNew phoneticStringsFor:'mayr'        #('MAYR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1574
     self basicNew phoneticStringsFor:'meir'        #('MAYR')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1575
     
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1576
     self basicNew phoneticStringsFor:'wagner'      #('VACNR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1577
     self basicNew phoneticStringsFor:'schulz'      #('CULC')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1578
     self basicNew phoneticStringsFor:'becker'      #('BCR')
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1579
     self basicNew phoneticStringsFor:'hoffmann'    #('OVMAN')
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1580
     self basicNew phoneticStringsFor:'haus'        #('AUS')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1581
     
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1582
     self basicNew phoneticStringsFor:'schäfer'     #('CVR')
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1583
     self basicNew phoneticStringsFor:'scheffer'    #('CVR')
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1584
     self basicNew phoneticStringsFor:'schaeffer'   #('CVR')
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  1585
     self basicNew phoneticStringsFor:'schaefer'    #('CVR')
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1586
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1587
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1588
    "Created: / 28-07-2017 / 15:38:08 / cg"
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1589
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1590
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1591
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1592
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1593
copyright
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1594
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1595
Copyright (c) 2002-2004 Robert Jarvis
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1596
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1597
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1598
files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1599
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1600
the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1601
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1602
The above copyright notice and this permission notice shall be included in all copies or substantial 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1603
portions of the Software.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1604
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1605
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1606
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1607
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1608
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1609
USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1610
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1611
! !
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1612
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1613
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1614
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1615
isSlavoGermanic:aString
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1616
    ^ #('w' 'k' 'cz' 'witz' 'ä' 'ö' 'ü' 'ß') contains:[:sub | aString includesString:sub]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1617
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1618
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1619
     self isSlavoGermanic:'walter'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1620
     self isSlavoGermanic:'horowitz'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1621
     self isSlavoGermanic:'müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1622
     self isSlavoGermanic:'miller'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1623
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1624
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1625
    "Modified: / 28-07-2017 / 10:14:38 / cg"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1626
! !
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1627
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1628
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1629
3685
01ebbac96899 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3648
diff changeset
  1630
documentation
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1631
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1632
    The Double Metaphone algorithm
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1633
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1634
    see internet: https://en.wikipedia.org/wiki/Metaphone
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1635
"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1636
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1637
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1638
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1639
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1640
currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1641
	^currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1642
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1643
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1644
currentIndex: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1645
	currentIndex := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1646
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1647
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1648
inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1649
	^inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1650
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1651
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1652
inputKey: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1653
	inputKey := aString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1654
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1655
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1656
primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1657
	^primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1658
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1659
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1660
primaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1661
	primaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1662
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1663
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1664
secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1665
	^secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1666
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1667
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1668
secondaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1669
	secondaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1670
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1671
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1672
skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1673
	^skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1674
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1675
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1676
skipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1677
	skipCount := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1678
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1679
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1680
startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1681
	^startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1682
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1683
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1684
startIndex: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1685
	startIndex := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1686
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1687
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1688
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1689
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1690
phoneticStringsFor:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1691
    "Private - Answers an array of alternate phonetic strings for the given input string."
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1692
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1693
    inputKey := aString.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1694
    self performInitialProcessing.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1695
    self processRemainingCharacters.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1696
    ^ Array with:primaryTranslation with:secondaryTranslation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1697
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1698
    "Modified (format): / 28-07-2017 / 11:25:02 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1699
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1700
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1701
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1702
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1703
initialize
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1704
    super initialize.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1705
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1706
    startIndex := 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1707
    primaryTranslation := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1708
    secondaryTranslation := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1709
    skipCount := 0.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1710
    currentIndex := 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1711
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1712
    "Modified: / 28-07-2017 / 11:18:44 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1713
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1714
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1715
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1716
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1717
addPrimaryTranslation:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1718
    primaryTranslation := (primaryTranslation , aString)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1719
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1720
    "Modified: / 28-07-2017 / 11:19:09 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1721
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1722
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1723
addSecondaryTranslation:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1724
    secondaryTranslation := secondaryTranslation , aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1725
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1726
    "Modified: / 28-07-2017 / 11:17:11 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1727
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1728
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1729
isSlavoGermanic: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1730
	^((aString includesAnyOf: 'WK') or:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1731
		[ (aString indexOfSubCollection: 'CZ' startingAt: 1) >= 1 ]) or:
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1732
		[ (aString indexOfSubCollection: 'WITZ' startingAt: 1) >= 1 ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1733
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1734
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1735
keyAt: anInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1736
    (anInteger between:1 and:inputKey size) ifTrue: [ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1737
        ^ inputKey at: anInteger 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1738
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1739
    ^ Character space
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1740
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1741
    "Modified: / 28-07-2017 / 11:38:30 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1742
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1743
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1744
keyLeftString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1745
	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1746
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1747
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1748
keyMidString: lengthInteger from: fromInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1749
        | result from len additionalSpaces |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1750
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1751
        result := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1752
        from := fromInteger.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1753
        len := lengthInteger.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1754
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1755
        "Prepend spaces if caller is requesting characters from before the start of the string"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1756
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1757
        [ from < 1 ] whileTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1758
                [ result := result, ' '.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1759
                from := from + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1760
                len := len - 1 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1761
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1762
        from + len - 1 > inputKey size
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1763
                ifTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1764
                        [ additionalSpaces := from + len - 1 - inputKey size.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1765
                        len := inputKey size - from + 1 ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1766
                ifFalse: [ additionalSpaces := 0 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1767
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1768
        result := result, (inputKey copyFrom: from to: (from+len-1 min: inputKey size)).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1769
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1770
        [ additionalSpaces > 0 ] whileTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1771
                [ result := result, ' '.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1772
                additionalSpaces := additionalSpaces - 1 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1773
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1774
        ^result
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1775
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1776
    "Modified: / 28-07-2017 / 11:20:43 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1777
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1778
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1779
keyRightString: lengthInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1780
        ^self keyMidString: lengthInteger from: inputKey size - lengthInteger + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1781
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1782
    "Modified: / 28-07-2017 / 11:20:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1783
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1784
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1785
performInitialProcessing
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1786
    (#( 'GN' 'KN' 'PN' 'WR' 'PS' ) includes:(inputKey copyFrom:1 to:2)) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1787
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1788
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1789
    (self keyAt:1) = $X ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1790
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1791
            addPrimaryTranslation:'S';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1792
            addSecondaryTranslation:'S'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1793
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1794
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1795
    (self keyAt:1) isVowel ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1796
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1797
            addPrimaryTranslation:'A';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1798
            addSecondaryTranslation:'A'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1799
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1800
    ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1801
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1802
    "Modified: / 28-07-2017 / 11:36:31 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1803
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1804
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1805
processB
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1806
    self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1807
        addPrimaryTranslation: 'P';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1808
        addSecondaryTranslation: 'P'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1809
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1810
    (self keyAt: (currentIndex + 1)) == $B ifTrue: [ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1811
        skipCount := skipCount + 1 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1812
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1813
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1814
    "Modified: / 28-07-2017 / 11:26:03 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1815
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1816
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1817
processC
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1818
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1819
        ((((currentIndex >= 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1820
                and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1821
                and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1822
                and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1823
                and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1824
                                or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1825
                                                and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1826
                        ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1827
                                [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1828
                                self addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1829
                                skipCount := skipCount + 2.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1830
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1831
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1832
        "ii"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1833
        (inputKey beginsWith: 'CAESAR')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1834
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1835
                        [ self addPrimaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1836
                        self addSecondaryTranslation: 'S'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1837
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1838
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1839
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1840
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1841
        (self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1842
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1843
                        [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1844
                        self addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1845
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1846
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1847
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1848
        "iv"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1849
        (self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1850
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1851
                        [ (currentIndex > 1                "a"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1852
                                        and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1853
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1854
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1855
                                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1856
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1857
                                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1858
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1859
                        (currentIndex = 1          "b"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1860
                                        and: [ (inputKey size > 5 and: [(inputKey copyFrom: 1 to: 6) = 'CHARAC'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1861
                                                        or: [ (inputKey copyFrom: 1 to: 6) = 'CHARIS' ]] )
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1862
                                                or: [inputKey size > 4 and: [ ((((inputKey copyFrom: 1 to: 4) = 'CHOR'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1863
                                                        or: [ (inputKey copyFrom: 1 to: 4) = 'CHYM' ])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1864
                                                        or: [ (inputKey copyFrom: 1 to: 4) = 'CHIA' ])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1865
                                                        or: [ (inputKey copyFrom: 1 to: 4) = 'CHEM' ])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1866
                                                        and: [ (inputKey copyFrom: 1 to: 4) ~= 'CHORE' ] ] ] ])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1867
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1868
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1869
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1870
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1871
                                          ^self ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1872
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1873
                        (((((#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: 4))              "c"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1874
                                        or: [ (inputKey copyFrom: 1 to: 3) = 'SCH' ])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1875
                                        or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1876
                                                        includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1877
                                        or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1878
                                        or: [ ((currentIndex = 1)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1879
                                                        or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1880
                                                and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1881
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1882
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1883
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1884
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1885
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1886
                                          ^self ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1887
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1888
                                        [ currentIndex > 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1889
                                                ifTrue:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1890
                                                        [ (inputKey copyFrom: 1 to: 2) = 'MC'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1891
                                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1892
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1893
                                                                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1894
                                                                                                addSecondaryTranslation: 'K' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1895
                                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1896
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1897
                                                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1898
                                                                                                addSecondaryTranslation: 'K' ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1899
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1900
                                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1901
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1902
                                                                addSecondaryTranslation: 'X' ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1903
                                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1904
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1905
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1906
        "v"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1907
        (self keyAt: currentIndex+1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1908
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1909
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1910
                                addPrimaryTranslation: 'S';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1911
                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1912
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1913
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1914
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1915
        "vi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1916
        (self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1917
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1918
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1919
                                addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1920
                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1921
                          skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1922
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1923
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1924
        "vii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1925
        ((self keyAt: currentIndex+1) = $C
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1926
                        and: [ ((currentIndex = 2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1927
                                and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1928
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1929
                        [ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1930
                                        and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1931
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1932
                                        [ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1933
                                                        or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1934
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1935
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1936
                                                                addPrimaryTranslation: 'KS';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1937
                                                                addSecondaryTranslation: 'KS'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1938
                                                         skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1939
                                                         ^self ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1940
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1941
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1942
                                                                addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1943
                                                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1944
                                                         skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1945
                                                         ^self ] ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1946
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1947
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1948
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1949
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1950
                                          skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1951
                                          ^self ] ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1952
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1953
        "viii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1954
        (#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1955
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1956
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1957
                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1958
                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1959
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1960
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1961
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1962
        "ix"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1963
        (#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1964
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1965
                        [ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1966
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1967
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1968
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1969
                                                addSecondaryTranslation: 'X' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1970
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1971
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1972
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1973
                                                addSecondaryTranslation: 'S'].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1974
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1975
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1976
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1977
        "x"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1978
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1979
                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1980
                addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1981
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1982
        "xi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1983
        (#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1984
                ifTrue:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1985
                        [ skipCount := skipCount + 2 ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1986
                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1987
                        [ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1988
                                        and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1989
                                ifTrue: [ skipCount := skipCount + 1] ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1990
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1991
    "Modified: / 28-07-2017 / 11:29:11 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1992
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1993
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1994
processCedille 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1995
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1996
		addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1997
		addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1998
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1999
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2000
processD
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2001
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2002
        (self keyAt: currentIndex+1) = $G
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2003
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2004
                        [ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2005
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2006
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2007
                                                addPrimaryTranslation: 'J';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2008
                                                addSecondaryTranslation: 'J'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2009
                                         skipCount := skipCount + 2.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2010
                                        ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2011
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2012
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2013
                                                addPrimaryTranslation: 'TK';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2014
                                                addSecondaryTranslation: 'TK'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2015
                                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2016
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2017
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2018
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2019
        (#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2020
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2021
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2022
                                addPrimaryTranslation: 'T';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2023
                                addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2024
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2025
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2026
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2027
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2028
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2029
                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2030
                addSecondaryTranslation: 'T'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2031
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2032
    "Modified: / 28-07-2017 / 11:27:39 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2033
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2034
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2035
processF
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2036
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2037
                addPrimaryTranslation: 'F';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2038
                addSecondaryTranslation: 'F'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2039
                
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2040
        (self keyAt: currentIndex+1) = $F
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2041
                ifTrue: [ skipCount := skipCount + 1 ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2042
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2043
    "Modified (format): / 28-07-2017 / 11:29:21 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2044
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2045
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2046
processG
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2047
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2048
        case 'G':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2049
                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2050
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2051
        | word |
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2052
        (self keyAt: currentIndex + 1) = $H
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2053
        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2054
                "if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2055
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2056
                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2057
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2058
              " {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2059
                   MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2060
                   current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2061
                   break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2062
                }"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2063
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2064
                        self 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2065
                            addPrimaryTranslation: 'K';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2066
                            addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2067
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2068
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2069
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2070
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2071
                "if(current < 3)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2072
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2073
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2074
                currentIndex < 4 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2075
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2076
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2077
                        " //'ghislane', ghiradelli
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2078
               if(current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2079
               { "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2080
                        currentIndex = 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2081
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2082
                                "if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2083
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2084
                                (self keyAt: currentIndex + 2) = $I
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2085
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2086
                                        "MetaphAdd(J);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2087
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2088
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2089
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2090
                                        "MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2091
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2092
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2093
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2094
                                "  current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2095
                                break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2096
                                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2097
                                ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2098
                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2099
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2100
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2101
                " //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2102
                if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2103
                //e.g., 'bough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2104
                OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2105
                //e.g., 'broughton'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2106
                OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2107
         "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2108
                (((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2109
                or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])  
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2110
                or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2111
                ifTrue: [                         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2112
                        "current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2113
                        break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2114
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2115
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2116
                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2117
                        " //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2118
               if((current > 2) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2119
               AND (GetAt(current - 1) == 'U') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2120
               AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2121
                        (currentIndex > 3 and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2122
                                ((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2123
                                        #($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2124
                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2125
                        ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2126
                                "MetaphAdd(F);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2127
                                self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2128
                                addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2129
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2130
                                " if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2131
                    MetaphAdd(K);"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2132
                                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2133
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2134
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2135
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2136
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2137
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2138
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2139
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2140
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2141
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2142
                "if(GetAt(current + 1) == 'N')"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2143
          (self keyAt: currentIndex + 1) = $N
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2144
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2145
                        "if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2146
                        (currentIndex = 2 and: [(inputKey at: 1) isVowel and: [(self isSlavoGermanic: inputKey) not]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2147
               ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2148
                                "MetaphAdd(KN, N);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2149
                                self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2150
                                addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2151
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2152
                                " //not e.g. 'cagney'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2153
                                if(!!StringAt((current + 2), 2, EY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2154
                                AND (GetAt(current + 1) !!= 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2155
                                AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2156
                                ((inputKey size >= (currentIndex + 2)) and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2157
                                        (inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: inputKey size)) ~= 'EY' and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2158
                                                (self keyAt: currentIndex + 1) ~= $Y and: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2159
                                                        (self isSlavoGermanic: inputKey) not
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2160
                                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2161
                                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2162
                                ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2163
                                        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2164
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2165
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2166
                                        self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2167
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2168
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2169
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2170
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2171
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2172
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2173
                " //'tagliaro'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2174
                if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2175
                ((inputKey size >= (currentIndex + 3)) and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2176
                        (inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2177
                                (self isSlavoGermanic: inputKey) not]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2178
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2179
                        self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2180
                        addSecondaryTranslation: 'L'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2181
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2182
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2183
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2184
                " //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2185
                if((current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2186
                AND ((GetAt(current + 1) == 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2187
                OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2188
                (currentIndex = 1 and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2189
                        ((self keyAt: currentIndex + 1) = $Y) or: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2190
                        (#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes: 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2191
                                (inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2192
                ]]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2193
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2194
                        addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2195
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2196
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2197
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2198
                " // -ger-,  -gy-
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2199
                if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2200
                AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2201
                AND !!StringAt((current - 1), 1, E, I, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2202
                AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2203
                "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2204
          (((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: inputKey size)) = 'ER' or: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2205
                                ((self keyAt: currentIndex + 1) = $Y)]) 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2206
                        and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := inputKey copyFrom: 1 to: (6 min: inputKey size))) not)
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2207
                                and: [(self keyAt: currentIndex - 1) ~= $E
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2208
                                        and: [(#('RGY' 'OGY') includes: (inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2209
                 ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2210
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2211
                        addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2212
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2213
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2214
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2215
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2216
          " // italian e.g, 'biaggi'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2217
           if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2218
           "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2219
                ((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size)))])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2220
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2221
                        " //obvious germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2222
                                        if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2223
                                                OR StringAt((current + 1), 2, ET, ))                                                MetaphAdd(K);"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2224
                        word := (inputKey copyFrom: 1 to: 4).
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2225
                        ((#('VAN ' 'VON ') includes: word) or: [(word copyFrom: 1 to: 3) = 'SCH' or: [(word copyFrom: 1 to: 2) = 'ET']]) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2226
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2227
                                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2228
                                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2229
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2230
                            " //always soft if french ending
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2231
                                                if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2232
                                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2233
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2234
                                                        MetaphAdd(J, K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2235
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2236
                                        break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2237
                                (((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: inputKey size)), '    ') copyFrom: 1 to: 4) = 'IER '
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2238
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2239
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2240
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2241
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2242
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2243
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2244
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2245
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2246
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2247
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2248
                        ^self.       
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2249
                ].                      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2250
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2251
        " if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2252
             current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2253
         else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2254
             current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2255
         MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2256
            break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2257
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2258
                (self keyAt: (currentIndex + 1)) = $G
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2259
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2260
                        skipCount := skipCount + 1.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2261
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2262
                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2263
                addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2264
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2265
    "Modified: / 28-07-2017 / 11:31:33 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2266
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2267
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2268
processH
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2269
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2270
        case 'H':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2271
                                //only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2272
                                if(((current == 0) OR IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2273
                                        AND IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2274
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2275
                                        MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2276
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2277
                                }else//also takes care of 'HH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2278
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2279
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2280
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2281
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2282
        (((currentIndex = 1) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2283
                or: [ (self keyAt: currentIndex - 1) isVowel]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2284
        and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2285
        ifTrue: [               
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2286
                self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2287
                addSecondaryTranslation: 'H'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2288
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2289
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2290
        ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2291
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2292
    "Modified: / 28-07-2017 / 11:29:52 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2293
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2294
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2295
processJ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2296
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2297
        case 'J':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2298
                                //obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2299
                                if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2300
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2301
                                        if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2302
                                                MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2303
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2304
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2305
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2306
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2307
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2308
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2309
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2310
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2311
                                if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2312
                                        MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2313
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2314
                                        //spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2315
                                        if(IsVowel(current - 1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2316
                                                AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2317
                                                        AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2318
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2319
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2320
                                                if(current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2321
                                                        MetaphAdd(J,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2322
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2323
                                                        if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2324
                                                                        AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2325
                                                                MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2326
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2327
                                if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2328
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2329
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2330
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2331
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2332
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2333
        | currentWord firstWord nextLetter |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2334
        currentWord := inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2335
        firstWord := inputKey copyFrom: 1 to: (4 min: inputKey size).
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2336
        nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2337
        (currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2338
        ifTrue: [       
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2339
                ((currentIndex = 1 and: [inputKey size = 4 or: [inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2340
                        or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2341
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2342
                        self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2343
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2344
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2345
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2346
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2347
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2348
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2349
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2350
        (currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2351
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2352
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2353
                addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2354
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2355
                ((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2356
                and: [(self isSlavoGermanic: inputKey) not and: [nextLetter == $A or: [nextLetter == $O]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2357
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2358
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2359
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2360
                ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2361
                        currentIndex = inputKey size 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2362
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2363
                                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2364
                                addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2365
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2366
                                ((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2367
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2368
                                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2369
                                        addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2370
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2371
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2372
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2373
        ].
3489
6ef5f530df03 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3488
diff changeset
  2374
        nextLetter == $J
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2375
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2376
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2377
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2378
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2379
    "Modified: / 28-07-2017 / 11:31:41 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2380
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2381
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2382
processK
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2383
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2384
        case 'K':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2385
                                if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2386
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2387
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2388
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2389
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2390
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2391
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2392
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2393
        (self keyAt: currentIndex + 1) = $K
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2394
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2395
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2396
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2397
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2398
        addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2399
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2400
    "Modified: / 28-07-2017 / 11:31:46 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2401
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2402
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2403
processL
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2404
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2405
"case 'L':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2406
                                if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2407
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2408
                                        //spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2409
                                        if(((current == (length - 3)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2410
                                                AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2411
                                                         OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, )) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2412
                                                                AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2413
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2414
                                                MetaphAdd(L,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2415
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2416
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2417
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2418
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2419
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2420
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2421
                                MetaphAdd(L);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2422
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2423
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2424
        | currentWord |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2425
        (self keyAt: currentIndex + 1) = $L 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2426
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2427
                (((currentIndex = (inputKey size - 2))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2428
                and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size))])])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2429
                or: [((#('AS' 'OS') includes: (inputKey copyFrom: inputKey size - 1 to: inputKey size)) or: [#($A $O) includes: (self keyAt: inputKey size)]) and: [currentWord = 'ALLE']
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2430
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2431
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2432
                        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2433
                        addSecondaryTranslation: ' '.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2434
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2435
                        ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2436
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2437
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2438
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2439
        self addPrimaryTranslation: 'L';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2440
        addSecondaryTranslation: 'L'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2441
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2442
    "Modified: / 28-07-2017 / 11:32:03 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2443
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2444
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2445
processM
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2446
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2447
"case 'M':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2448
                                if((StringAt((current - 1), 3, UMB, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2449
                                        AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2450
                                                //'dumb','thumb'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2451
                                                OR  (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2452
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2453
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2454
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2455
                                MetaphAdd(M);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2456
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2457
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2458
        (((currentIndex > 1 and: [(inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: inputKey size)) = 'UMB'])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2459
                and: [currentIndex + 1 = inputKey size or: [(inputKey copyFrom: (currentIndex + 2 min: inputKey size) to: (currentIndex + 4 min: inputKey size)) = 'ER']])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2460
                or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2461
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2462
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2463
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2464
                self addPrimaryTranslation: 'M';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2465
                addSecondaryTranslation: 'M'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2466
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2467
    "Modified: / 28-07-2017 / 11:32:08 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2468
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2469
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2470
processN
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2471
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2472
        case 'N':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2473
                                if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2474
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2475
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2476
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2477
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2478
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2479
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2480
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2481
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2482
        (self keyAt: currentIndex + 1) = $N
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2483
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2484
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2485
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2486
        self addPrimaryTranslation: 'N';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2487
        addSecondaryTranslation: 'N'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2489
    "Modified: / 28-07-2017 / 11:32:14 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2490
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2491
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2492
processNtilde
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2493
        "case 'Ñ':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2494
                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2495
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2496
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2497
        "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2498
        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2499
        addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2500
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2501
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2502
processP
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2503
        "case 'P':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2504
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2505
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2506
                                        MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2507
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2508
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2509
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2510
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2511
                                //also account for campbell, raspberry
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2512
                                if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2513
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2514
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2515
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2516
                                        MetaphAdd(P);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2517
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2518
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2519
        | nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2520
        (nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2521
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2522
                self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2523
                addSecondaryTranslation: 'F'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2524
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2525
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2526
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2527
        (#($P $B) includes: nextLetter)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2528
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2529
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2530
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2531
                self addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2532
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2533
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2534
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2535
    "Modified: / 28-07-2017 / 11:32:28 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2536
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2537
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2538
processQ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2539
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2540
        case 'Q':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2541
                                if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2542
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2543
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2544
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2545
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2546
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2547
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2548
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2549
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2550
        (self keyAt: currentIndex + 1) = $Q
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2551
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2552
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2553
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2554
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2555
        addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2556
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2557
    "Modified: / 28-07-2017 / 11:32:32 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2558
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2559
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2560
processR
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2561
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2562
        case 'R':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2563
                                //french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2564
                                if((current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2565
                                        AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2566
                                                AND StringAt((current - 2), 2, IE, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2567
                                                        AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2568
                                        MetaphAdd(, R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2569
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2570
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2571
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2572
                                if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2573
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2574
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2575
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2576
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2577
        "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2578
        (currentIndex = inputKey size and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2579
                (self isSlavoGermanic: inputKey) not and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2580
                        (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2581
                                (#('ME' 'MA') includes: (inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2582
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2583
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2584
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2585
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2586
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2587
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2588
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2589
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2590
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2591
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2592
        (self keyAt: currentIndex + 1) = $R
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2593
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2594
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2595
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2596
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2597
    "Modified: / 28-07-2017 / 11:32:37 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2598
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2599
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2600
processRemainingCharacters
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2601
    startIndex to: inputKey size do:[ :i | 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2602
        | c methodSelector |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2603
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2604
        skipCount = 0 ifTrue:[ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2605
            ((primaryTranslation size > 4) and: [ secondaryTranslation size > 4 ])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2606
                ifTrue: [ ^self ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2607
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2608
            currentIndex := i.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2609
            c := self keyAt: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2610
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2611
            (c isVowel not and: [c ~= $Y]) ifTrue:[ 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2612
                c == $Ç ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2613
                    methodSelector := #processCedille 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2614
                ] ifFalse: [ c == $Ñ ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2615
                    methodSelector := #processNtilde 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2616
                ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2617
                    methodSelector := ('process', c asString) asSymbol 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2618
                ]].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2619
                self perform: methodSelector 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2620
            ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2621
        ] ifFalse: [ 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2622
            skipCount := skipCount - 1
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2623
        ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2624
    ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2625
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2626
    "Modified: / 28-07-2017 / 11:24:15 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2627
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2628
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2629
processS
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2630
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2631
        case 'S':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2632
                                //special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2633
                                if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2634
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2635
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2636
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2637
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2638
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2639
                                //special case 'sugar-'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2640
                                if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2641
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2642
                                        MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2643
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2644
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2645
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2646
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2647
                                if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2648
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2649
                                        //germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2650
                                        if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2651
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2652
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2653
                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2654
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2655
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2656
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2657
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2658
                                //italian & armenian
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2659
                                if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2660
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2661
                                        if(!!SlavoGermanic())
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2662
                                                MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2663
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2664
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2665
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2666
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2667
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2668
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2669
                                //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2670
                                //also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2671
                                if(((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2672
                                                AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2673
                                                        OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2674
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2675
                                        MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2676
                                        if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2677
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2678
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2679
                                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2680
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2681
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2682
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2683
                                if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2684
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2685
                                        //Schlesinger's rule
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2686
                                        if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2687
                                                //dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2688
                                                if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2689
                                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2690
                                                        //'schermerhorn', 'schenker'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2691
                                                        if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2692
                                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2693
                                                                MetaphAdd(X, SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2694
                                                        }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2695
                                                                MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2696
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2697
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2698
                                                }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2699
                                                        if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2700
                                                                MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2701
                                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2702
                                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2703
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2704
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2705
                                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2706
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2707
                                        if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2708
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2709
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2710
                                                current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2711
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2712
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2713
                                        //else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2714
                                        MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2715
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2716
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2717
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2718
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2719
                                //french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2720
                                if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2721
                                        MetaphAdd(, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2722
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2723
                                        MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2724
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2725
                                if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2726
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2727
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2728
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2729
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2730
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2731
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2732
        | nextChar char2 chars char |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2733
        (#('ISL' 'YSL') includes: (inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: inputKey size))) 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2734
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2735
                ^self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2736
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2737
        (currentIndex = 1 and: [(inputKey copyFrom: 1 to: (5 min: inputKey size)) = 'SUGAR'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2738
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2739
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2740
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2741
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2742
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2743
        (inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SH'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2744
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2745
                (#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (inputKey copyFrom: (currentIndex + 1 min: inputKey size) to: ((currentIndex + 5) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2746
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2747
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2748
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2749
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2750
                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2751
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2752
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2753
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2754
                ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2755
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2756
        ((#('SIO' 'SIA') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 2 min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2757
                or: [(inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size)) = 'SIAN'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2758
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2759
                (self isSlavoGermanic: inputKey) not
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2760
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2761
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2762
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2763
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2764
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2765
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2766
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2767
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2768
                ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2769
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2770
        ((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2771
                or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2772
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2773
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2774
                addSecondaryTranslation: 'X'.
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
  2775
                nextChar == $Z
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2776
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2777
                    skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2778
                        ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2779
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2780
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2781
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2782
        ((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SC')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2783
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2784
                (char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2785
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2786
                        (#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := inputKey copyFrom: ((currentIndex + 3) min: inputKey size) to: ((currentIndex + 4) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2787
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2788
                                (#('ER' 'EN') includes: chars)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2789
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2790
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2791
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2792
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2793
                                        self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2794
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2795
                                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2796
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2797
                                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2798
                        ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2799
                                ((currentIndex = 1 and: [(char := inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2800
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2801
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2802
                                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2803
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2804
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2805
                                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2806
                                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2807
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2808
                                ^self .
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2809
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2810
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2811
                        (#($I $E $Y) includes: char2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2812
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2813
                                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2814
                                addSecondaryTranslation: 'S'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2815
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2816
                                ^self .
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2817
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2818
                                self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2819
                                addSecondaryTranslation: 'SK'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2820
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2821
                                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2822
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2823
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2824
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2825
        (currentIndex = inputKey size and: [(#('AI' 'OI') includes: (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2826
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2827
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2828
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2829
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2830
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2831
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2832
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2833
        (#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2834
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2835
            skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2836
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2837
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2838
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2839
    "Modified: / 28-07-2017 / 11:34:18 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2840
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2841
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2842
processT
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2843
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2844
        case 'T':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2845
                                if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2846
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2847
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2848
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2849
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2850
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2851
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2852
                                if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2853
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2854
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2855
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2856
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2857
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2858
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2859
                                if(StringAt(current, 2, TH, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2860
                                        OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2861
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2862
                                        //special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2863
                                        if(StringAt((current + 2), 2, OM, AM, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2864
                                                OR StringAt(0, 4, VAN , VON , ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2865
                                                        OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2866
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2867
                                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2868
                                        }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2869
                                                MetaphAdd(0, T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2870
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2871
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2872
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2873
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2874
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2875
                                if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2876
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2877
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2878
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2879
                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2880
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2881
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2882
        ((inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: inputKey size)) = 'TION')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2883
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2884
                self addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2885
                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2886
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2887
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2888
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2889
        (#('TIA' 'TCH') includes: (inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2890
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2891
                self addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2892
                addSecondaryTranslation: 'X'. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2893
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2894
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2895
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2896
        (((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'TH') or: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2897
                ((inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)) = 'TTH')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2898
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2899
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2900
                ((#('OM' 'AM') includes: (inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2901
                or: [(#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: (4 min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2902
                        or: [(inputKey copyFrom: 1 to: (3 min: inputKey size)) = 'SCH']
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2903
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2904
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2905
                        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2906
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2907
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2908
                        self addPrimaryTranslation: '0';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2909
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2910
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2911
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2912
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2913
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2914
        (#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2915
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2916
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2917
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2918
        self addPrimaryTranslation: 'T';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2919
        addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2920
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2921
    "Modified: / 28-07-2017 / 11:33:33 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2922
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2923
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2924
processV
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2925
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2926
        case 'V':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2927
                                if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2928
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2929
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2930
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2931
                                MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2932
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2933
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2934
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2935
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2936
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2937
        (self keyAt: currentIndex + 1) = $V
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2938
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2939
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2940
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2941
        self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2942
        addSecondaryTranslation: 'F'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2943
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2944
    "Modified: / 28-07-2017 / 11:34:27 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2945
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2946
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2947
processW
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2948
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2949
        case 'W':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2950
                                //can also be in middle of word
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2951
                                if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2952
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2953
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2954
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2955
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2956
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2957
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2958
                                if((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2959
                                        AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2960
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2961
                                        //Wasserman should match Vasserman
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2962
                                        if(IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2963
                                                MetaphAdd(A, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2964
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2965
                                                //need Uomo to match Womo
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2966
                                                MetaphAdd(A);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2967
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2968
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2969
                                //Arnow should match Arnoff
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2970
                                if(((current == last) AND IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2971
                                        OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2972
                                                        OR StringAt(0, 3, SCH, ))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2973
                                  {
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2974
                                        MetaphAdd(, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2975
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2976
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2977
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2978
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2979
                                //polish e.g. 'filipowicz'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2980
                                if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2981
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2982
                                        MetaphAdd(TS, FX);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2983
                                        current +=4;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2984
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2985
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2986
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2987
                                //else skip it
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2988
                                current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2989
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2990
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2991
        | word nextLetter |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2992
        ((word := inputKey copyFrom: currentIndex to: (currentIndex + 1 min: inputKey size)) = 'WR')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2993
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2994
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2995
                addSecondaryTranslation: 'R'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2996
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2997
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2998
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2999
        ((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3000
                word = 'WH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3001
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3002
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3003
                nextLetter isVowel
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3004
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3005
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3006
                        addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3007
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3008
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3009
                        addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3010
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3011
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3012
        ((((currentIndex = inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3013
                or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: inputKey size))])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3014
                        or: [inputKey startsWith:'SCH'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3015
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3016
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3017
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3018
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3019
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3020
        (#('WICZ' 'WITZ') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 4 min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3021
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3022
                self addPrimaryTranslation: 'TS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3023
                addSecondaryTranslation: 'FX'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3024
                skipCount := skipCount + 3.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3025
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3026
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3027
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3028
    "Modified: / 28-07-2017 / 11:34:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3029
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3030
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3031
processX
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3032
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3033
        case 'X':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3034
                                //french e.g. breaux
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3035
                                if(!!((current == last) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3036
                                        AND (StringAt((current - 3), 3, IAU, EAU, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3037
                                                        OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3038
                                        MetaphAdd(KS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3039
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3040
                                if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3041
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3042
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3043
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3044
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3045
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3046
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3047
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3048
        ((currentIndex = inputKey size) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3049
        and: [(#('IAU' 'EAU') includes: (inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex)) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3050
              or: [(#('AU' 'OU') includes: (inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]]) 
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  3051
        ifFalse: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3052
                self addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3053
                addSecondaryTranslation: 'KS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3054
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3055
        (#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3056
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3057
            skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3058
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3059
        ]
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  3060
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3061
    "Modified: / 28-07-2017 / 11:34:44 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3062
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3063
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3064
processZ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3065
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3066
        case 'Z':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3067
                                //chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3068
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3069
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3070
                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3071
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3072
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3073
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3074
                                        if(StringAt((current + 1), 2, ZO, ZI, ZA, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3075
                                                OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3076
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3077
                                                MetaphAdd(S, TS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3078
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3079
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3080
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3081
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3082
                                if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3083
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3084
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3085
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3086
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3087
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3088
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3089
        (self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3090
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3091
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3092
                addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3093
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3094
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3095
        ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3096
                ((#('ZO' 'ZI' 'ZA') includes: (inputKey copyFrom: ((currentIndex + 1) min: inputKey size) to: ((currentIndex + 2) min: inputKey size))) or: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3097
                        (self isSlavoGermanic: inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3098
                ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3099
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3100
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3101
                        addSecondaryTranslation: 'TS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3102
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3103
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3104
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3105
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3106
                (self keyAt: currentIndex + 1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3107
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3108
                    skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3109
                        ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3110
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  3111
        ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3112
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3113
    "Modified: / 28-07-2017 / 11:35:12 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3114
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3115
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3116
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3117
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3118
documentation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3119
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3120
     The 'Kölner Phonetik' (cologne phonetic) code is for the german language 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3121
     what the soundex code is for english:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3122
        it returns similar strings for similar sounding words 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3123
     (but is specifically aware of the pronunciation of German and eastern languages) . 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3124
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3125
     There are some other differences to soundex, though: 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3126
        its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3127
        it does not start with the first character of the input, but returns a pure numeric string.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3128
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3129
     This algorithm was described by Postel 1969,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3130
     See  http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3131
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3132
    self new phoneticStringsFor:'Müller-Lüdenscheidt' -> #('65752682')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3133
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3134
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3135
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3136
examples
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3137
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3138
   words sounding similar (german pronunciation) will deliver a similar code:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3139
   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3140
     #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3141
        'Müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3142
        'Miller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3143
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3144
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3145
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3146
        'Mülherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3147
        'Myler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3148
        'Millar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3149
        'Myller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3150
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3151
        'Müler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3152
        'Muehler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3153
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3154
        'Müllerr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3155
        'Muehlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3156
        'Muellar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3157
        'Mueler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3158
        'Mülleer'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3159
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3160
        'Nüller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3161
        'Nyller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3162
        'Niler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3163
        'Czerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3164
        'Tscherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3165
        'Czernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3166
        'Tschernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3167
        'Schernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3168
        'Scherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3169
        'Scherno'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3170
        'Czerne'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3171
        'Zerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3172
        'Tzernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3173
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3174
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3175
        'Breschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3176
        'Breschnjeff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3177
        'Braeschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3178
        'Braessneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3179
        'Pressneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3180
        'Presznäph'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3181
        'Präschnäf' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3182
        'Breschnjeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3183
        'Breschnijeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3184
        'Breschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3185
        'Bräschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3186
        'Braschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3187
        'Broschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3188
     ) do:[:w |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3189
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3190
     ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3191
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3192
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3193
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3194
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3195
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3196
encode: aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3197
    "return a koelner phonetic code.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3198
     The koelnerPhonetic code is for the german language what the soundex code is for english;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3199
     it returns simular strings for similar sounding words. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3200
     There are some differences to soundex, though: 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3201
        its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3202
        it does not start with the first character of the input.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3203
     This algorithm is described by Postel 1969"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3204
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3205
    |in ret val rslt|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3206
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3207
    in := aString withoutSeparators asLowercase.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3208
    in := in copyReplaceString:'ph' withString:'f'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3209
    (in includesAny:'öäüß') ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3210
        in := in copyReplaceAll:$ü withAll:'u'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3211
        in := in copyReplaceAll:$ä withAll:'a'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3212
        in := in copyReplaceAll:$ö withAll:'o'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3213
        in := in copyReplaceAll:$ß withAll:'ss'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3214
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3215
    in := in select:[:ch | ch isLetter].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3216
    in := '#',in,'#'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3217
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3218
    ret := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3219
    1 to:in size-2 do:[:i |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3220
        |sub|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3221
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3222
        sub := in copyFrom:i to:i+2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3223
        val := (i==1) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3224
                    ifTrue:[ self convertFirst:sub ] 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3225
                    ifFalse:[ self convertRest:sub ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3226
        ret := ret,val
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3227
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3228
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3229
    ret := ret select:[:ch | ch ~= $-].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3230
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3231
    (ret startsWith:'0') ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3232
        ret := '0',(ret select:[:ch | ch ~= $0]).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3233
    ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3234
        ret := ret select:[:ch | ch ~= $0].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3235
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3236
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3237
    rslt := String streamContents:[:s |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3238
        |prev|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3239
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3240
        ret do:[:ch |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3241
            ch ~= prev ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3242
                s nextPut:ch
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3243
            ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3244
            prev := ch.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3245
        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3246
      ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3247
    ^ rslt.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3248
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3249
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3250
     #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3251
        'Müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3252
        'Miller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3253
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3254
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3255
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3256
        'Mülherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3257
        'Myler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3258
        'Millar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3259
        'Myller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3260
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3261
        'Müler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3262
        'Muehler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3263
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3264
        'Müllerr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3265
        'Muehlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3266
        'Muellar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3267
        'Mueler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3268
        'Mülleer'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3269
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3270
        'Nüller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3271
        'Nyller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3272
        'Niler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3273
        'Czerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3274
        'Tscherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3275
        'Czernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3276
        'Tschernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3277
        'Schernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3278
        'Scherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3279
        'Scherno'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3280
        'Czerne'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3281
        'Zerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3282
        'Tzernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3283
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3284
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3285
        'Breschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3286
        'Breschnjeff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3287
        'Braeschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3288
        'Braessneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3289
        'Pressneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3290
        'Presznäph'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3291
        'Präschnäf' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3292
        'Breschnjeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3293
        'Breschnijeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3294
        'Breschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3295
     ) do:[:w |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3296
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3297
     ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3298
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3299
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3300
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3301
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnew' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3302
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3303
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braeschneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3304
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braessneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3305
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Pressneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3306
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Presznäph' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3307
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Präschnäf' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3308
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnjeff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3309
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnijeff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3310
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnieff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3311
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3312
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3313
     self basicNew encode:'müller'      -> '657'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3314
     self basicNew encode:'möller'      -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3315
     self basicNew encode:'miller'      -> '657'     
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3316
     self basicNew encode:'muller'      -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3317
     self basicNew encode:'muler'       -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3318
     self basicNew encode:'schmidt'     -> '862'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3319
     self basicNew encode:'schneider'   -> '8627' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3320
     self basicNew encode:'fischer'     -> '387' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3321
     self basicNew encode:'weber'       -> '317' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3322
     self basicNew encode:'meyer'       -> '67' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3323
     self basicNew encode:'wagner'      -> '3467' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3324
     self basicNew encode:'schulz'      -> '858'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3325
     self basicNew encode:'becker'      -> '147'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3326
     self basicNew encode:'hoffmann'    -> '036'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3327
     self basicNew encode:'schäfer'     -> '837' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3328
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3329
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3330
    "Created: / 28-07-2017 / 15:24:33 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3331
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3332
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3333
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3334
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3335
convertFirst:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3336
    |c2 c3|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3337
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3338
    chars size == 3 ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3339
        c2 := (chars at:2).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3340
        c2 == $a ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3341
        c2 == $e ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3342
        c2 == $i ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3343
        c2 == $j ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3344
        c2 == $y ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3345
        c2 == $o ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3346
        c2 == $u ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3347
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3348
        c2 == $c ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3349
            c3 := (chars at:3).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3350
            (c3 == $a) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3351
            (c3 == $h) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3352
            (c3 == $k) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3353
            (c3 == $l) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3354
            (c3 == $o) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3355
            (c3 == $q) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3356
            (c3 == $r) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3357
            (c3 == $u) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3358
            (c3 == $x) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3359
            ^ '8'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3360
        ].    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3361
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3362
"/        #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3363
"/            ('#a#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3364
"/            ('#e#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3365
"/            ('#i#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3366
"/            ('#j#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3367
"/            ('#y#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3368
"/            ('#o#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3369
"/            ('#u#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3370
"/
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3371
"/            ('#ca' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3372
"/            ('#ch' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3373
"/            ('#ck' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3374
"/            ('#cl' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3375
"/            ('#co' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3376
"/            ('#cq' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3377
"/            ('#cr' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3378
"/            ('#cu' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3379
"/            ('#cx' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3380
"/
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3381
"/            ('#c#' '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3382
"/        ) do:[:pair | 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3383
"/            (pair first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3384
"/                ^ pair second
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3385
"/            ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3386
"/        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3387
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3388
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3389
    ^ self convertRest:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3390
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3391
    "Modified: / 29-07-2017 / 14:22:20 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3392
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3393
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3394
convertRest:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3395
    chars size == 3 ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3396
        self error:'cannot happen'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3397
        ^ '?' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3398
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3399
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3400
    #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3401
        "/ used to be matchpattern code,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3402
        "/ but doing these glob-matches is too slow.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3403
        "/ changed to:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3404
        "/    start nil  code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3405
        "/    nil   end  code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3406
        "/    nil   char code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3407
        "/    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3408
        (nil 'ds' " '#ds' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3409
        (nil 'dc' " '#dc' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3410
        (nil 'dz' " '#dz' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3411
        (nil 'ts' " '#ts' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3412
        (nil 'tc' " '#tc' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3413
        (nil 'tz' " '#tz' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3414
        (nil $d   " '#d#' " '2')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3415
        (nil $t   " '#t#' " '2')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3416
        ('cx' nil " 'cx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3417
        ('kx' nil " 'kx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3418
        ('qx' nil " 'qx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3419
        (nil $x   " '#x#' " '48')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3420
        ('sc' nil " 'sc#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3421
        ('sz' nil " 'sz#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3422
        (nil 'ca' " '#ca' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3423
        (nil 'co' " '#co' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3424
        (nil 'cu' " '#cu' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3425
        (nil 'ch' " '#ch' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3426
        (nil 'ck' " '#ck' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3427
        (nil 'cx' " '#cx' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3428
        (nil 'cq' " '#cq' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3429
        (nil $c   " '#c#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3430
        (nil $a   " '#a#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3431
        (nil $e   " '#e#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3432
        (nil $i   " '#i#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3433
        (nil $j   " '#j#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3434
        (nil $y   " '#y#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3435
        (nil $o   " '#o#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3436
        (nil $u   " '#u#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3437
        (nil $h   " '#h#' " '-')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3438
        (nil $l   " '#l#' " '5')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3439
        (nil $r   " '#r#' " '7')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3440
        (nil $m   " '#m#' " '6')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3441
        (nil $n   " '#n#' " '6')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3442
        (nil $s   " '#s#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3443
        (nil $z   " '#z#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3444
        (nil $b   " '#b#' " '1')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3445
        (nil $p   " '#p#' " '1')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3446
        (nil $f   " '#f#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3447
        (nil $v   " '#v#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3448
        (nil $w   " '#w#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3449
        (nil $g   " '#g#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3450
        (nil $k   " '#k#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3451
        (nil $q   " '#q#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3452
        (nil nil  " '###' " '?')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3453
    ) do:[:vector |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3454
        |v1 v2|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3455
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3456
        (v1 := vector at:1) notNil ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3457
            "/ prefix
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3458
            (chars startsWith:v1) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3459
        ] ifFalse:[                       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3460
            (v2 := vector at:2) isCharacter ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3461
                "/ middle character compare
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3462
                (chars at:2) == v2 ifTrue:[^ (vector at:3) ]. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3463
            ] ifFalse:[    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3464
                v2 isString ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3465
                    "/ suffix
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3466
                    (chars endsWith:v2) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3467
                ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3468
                   ^ '?' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3469
                ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3470
            ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3471
        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3472
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3473
        "/ (vector first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3474
        "/     ^ vector second
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3475
        "/ ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3476
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3477
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3478
    self error:'cannot happen'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3479
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3480
    "Modified: / 29-07-2017 / 14:17:38 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3481
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3482
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3483
!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3484
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3485
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3486
"
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3487
    Miracode (also called American Soundex) is like Soundex with the addition that h and w are 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3488
    discarded if they separate consonants.
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3489
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3490
    These variants may be specifically important because they were used in U.S. National Archives. 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3491
    Most archive data were encoded with Miracode, but there are some entries encoded with 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3492
    Simplified Soundex. 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3493
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3494
    The HW-rule was documented as a standard in 1910, but actually data of 1880, 1900 and 1910 
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3495
    censuses were encoded with mixed methods.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3496
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3497
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3498
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3499
!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3500
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3501
encode:word 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3502
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3503
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3504
    u := word asUppercase.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3505
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3506
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3507
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3508
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3509
        (t notNil 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3510
        and:[ t ~= '0' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3511
        and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3512
            p := p , t.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3513
            p size == 4 ifTrue:[^ p ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3514
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3515
        (c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3516
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3517
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3518
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3519
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3520
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3521
    ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3522
    ^ (p copyFrom:1 to:4)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3523
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  3524
    "Created: / 28-07-2017 / 15:23:16 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3525
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  3526
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  3527
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  3528
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  3529
version
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  3530
    ^ '$Header$'
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  3531
!
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  3532
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  3533
version_CVS
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  3534
    ^ '$Header$'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  3535
! !
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  3536