PhoneticStringUtilities.st
author Claus Gittinger <cg@exept.de>
Tue, 25 Jun 2019 14:28:51 +0200
changeset 5050 44fa8672d102
parent 4521 cfe4f333794f
child 5212 76ae0b6f061e
permissions -rw-r--r--
#DOCUMENTATION by cg class: SharedQueue comment/format in: #next #nextWithTimeout:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
     1
"{ Encoding: utf8 }"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
     2
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    17
"{ NameSpace: Smalltalk }"
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    18
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	instanceVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	classVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	poolDictionaries:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    26
Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    27
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    28
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    29
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    30
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    31
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    32
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    33
PhoneticStringUtilities::PhoneticStringComparator subclass:#DaitchMokotoffStringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    34
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    35
		currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    36
	classVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    37
	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    38
	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    39
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    40
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    41
PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    42
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    43
		currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    44
	classVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    45
	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    46
	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    47
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    48
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    49
PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    50
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    51
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    52
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    53
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    54
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    55
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    56
PhoneticStringUtilities::PhoneticStringComparator subclass:#SingleResultPhoneticStringComparator
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    57
	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    58
	classVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    59
	poolDictionaries:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    60
	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    61
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    62
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    63
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MRAStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    64
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    65
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    66
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    67
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    68
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    69
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    70
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    71
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    72
		currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    73
	classVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    74
	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    75
	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    76
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    77
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    78
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SoundexStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    79
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    80
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    81
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    82
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    83
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    84
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    85
PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    86
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    87
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    88
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    89
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    90
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    91
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    92
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#NYSIISStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    93
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    94
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    95
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    96
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    97
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    98
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    99
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#PhonemStringComparator
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   100
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   101
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   102
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   103
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   104
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   105
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   106
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#Caverphone2StringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   107
	instanceVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   108
	classVariableNames:'CharacterTranslationDict'
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   109
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   110
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   111
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   112
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   113
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   114
	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   115
	classVariableNames:'CharacterTranslationDict'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   116
	poolDictionaries:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   117
	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   118
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   119
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   120
PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   121
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   122
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   123
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   124
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   125
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   126
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   127
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SpanishPhoneticCodeStringComparator
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   128
	instanceVariableNames:''
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   129
	classVariableNames:'CharacterTranslationDict'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   130
	poolDictionaries:''
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   131
	privateIn:PhoneticStringUtilities
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   132
!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   133
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
copyright
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
documentation
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
"
2445
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   153
    Utilities which are helpful to perform phonetic string searches or comparisons.
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   154
    These are all variations or improvements of the soundex algorithm, which usually fails
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   155
    to provide good results for non-english languages.
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
   156
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   157
    soundexCode
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   158
        this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   159
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   160
    nysiis
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   161
        a modified soundex algorithm
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   162
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   163
    miracode
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   164
        another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   165
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   166
    mySQLSoundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   167
        another modified soundex algorithm used in mySQL.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   168
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   169
    koelner phoneticCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   170
        provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   171
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   172
    Double metaphone 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   173
        works with most european languages.
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   174
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   175
    phonem
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   176
        described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   177
        from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   178
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   179
    mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   180
        Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   181
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   182
    caverphone2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   183
        better than soundex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   184
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   185
    spanish phonetic code
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   186
        an algorithm slightly adjusted to spanish names
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   187
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   188
    More info for german readers is found in:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   189
        http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   190
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   191
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   192
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   193
sampleData
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   194
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   195
    for the 50 most common german names, we get:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   196
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   197
                            ext. 
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   198
    name        soundex   soundex   metaphone   phonet  phonet2     phonix      daitsch phonem      koeln  caverphone2  mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   199
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   200
    müller      M460    54600000    MLR         MÜLA    NILA        M4000000    689000  MYLR        657    MLA1111111   MLR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   201
    schmidt     S530    25300000    SKMTT       SHMIT   ZNIT        S5300000    463000  CMYD        862    SKMT111111   SCHMDT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   202
    schneider   S536    25360000    SKNTR       SHNEIDA ZNEITA      S5300000    463900  CNAYDR      8627   SKNTA11111   SCHNDR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   203
    fischer     F260    12600000    FSKR        FISHA   FIZA        F8000000    749000  VYCR        387    FSKA111111   FSCHR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   204
    weber       W160    16000000    WBR         WEBA    FEBA        $1000000    779000  VBR         317    WPA1111111   WBR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   205
    meyer       M600    56000000    MYR         MEIA    NEIA        M0000000    619000  MAYR        67     MA11111111   MYR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   206
    wagner      W256    25600000    WKNR        WAKNA   FAKNA       $2500000    756900  VACNR       3467   WKNA111111   WGNR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   207
    schulz      S420    24200000    SKLS        SHULS   ZULZ        S4800000    484000  CULC        858    SKS1111111   SCHLZ
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   208
    becker      B260    12600000    BKR         BEKA    BEKA        B2000000    759000  BCR         147    PKA1111111   BCKR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   209
    hoffmann    H155    15500000    HFMN        HOFMAN  UFNAN       $7550000    576600  OVMAN       036    AFMN111111   HFMN
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   210
    schäfer     S16ß    21600000    SKFR        SHEFA   ZEFA        S7000000    479000  CVR         837    SKFA111111   SCHFR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   211
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   212
    |cls|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   213
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   214
    cls := MRAStringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   215
    cls := SoundexStringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   216
    cls := KoelnerPhoneticCodeStringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   217
    cls := Caverphone2StringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   218
    #('müller' 'schmidt' 'schneider' 'fischer' 'weber' 'meyer' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   219
      'wagner' 'schulz'  'becker'    'hoffmann' 'schäfer')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   220
    do:[:name |
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   221
        Transcript show:''''; show:name; show:''' -> '''; show:(cls encode:name); showCR:''''.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   222
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   223
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   224
    KoelnerPhoneticCodeStringComparator encode:'Müller-Lüdenscheidt'  -> '65752682'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
    "return a koelner phonetic code.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
     The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
     it returns simular strings for similar sounding words. 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
     There are some differences to soundex, though: 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
        its length is not limited to 4, but depends on the length of the original string;
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   236
        it does not start with the first character of the input.
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   237
     This algorithm is described by Postel 1969"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   239
    ^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   240
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   241
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   242
     #(
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   243
        'Müller'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   244
        'Miller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   245
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   246
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   247
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   248
        'Mülherr'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   249
        'Myler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   250
        'Millar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   251
        'Myller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   252
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   253
        'Müler'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   254
        'Muehler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   255
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   256
        'Müllerr'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   257
        'Muehlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   258
        'Muellar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   259
        'Mueler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   260
        'Mülleer'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   262
        'Nüller'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
        'Nyller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
        'Niler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
        'Czerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
        'Tscherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
        'Czernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   268
        'Tschernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   269
        'Schernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   270
        'Scherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
        'Scherno'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
        'Czerne'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
        'Zerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
        'Tzernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
        'Breschnew'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
     ) do:[:w |
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
     ].
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
    "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   282
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   283
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   284
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   285
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   286
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   287
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   288
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   289
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   290
!
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   291
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   292
miracodeCodeOf:aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   293
    "return a miracode soundex phonetic code or nil.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   294
     Miracode is a slightly modified soundex algorithm.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   295
     Notice that there are better algorithms around (doubleMetaphone) "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   296
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   297
    ^ (MiracodeStringComparator new phoneticStringsFor:aString) first
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   298
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   299
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   300
     PhoneticStringUtilities miracodeCodeOf:'claus'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   301
     PhoneticStringUtilities miracodeCodeOf:'clause'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   302
     PhoneticStringUtilities miracodeCodeOf:'close'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   303
     PhoneticStringUtilities miracodeCodeOf:'smalltalk' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   304
     PhoneticStringUtilities miracodeCodeOf:'smaltalk'  
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   305
     PhoneticStringUtilities miracodeCodeOf:'smaltak'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   306
     PhoneticStringUtilities miracodeCodeOf:'smaltok'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   307
     PhoneticStringUtilities miracodeCodeOf:'smoltok'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   308
     PhoneticStringUtilities miracodeCodeOf:'aa'        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   309
     PhoneticStringUtilities miracodeCodeOf:'by'        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   310
     PhoneticStringUtilities miracodeCodeOf:'bab'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   311
     PhoneticStringUtilities miracodeCodeOf:'bob'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   312
     PhoneticStringUtilities miracodeCodeOf:'bop'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   313
     PhoneticStringUtilities miracodeCodeOf:'pub'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   314
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   315
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   316
    "Created: / 28-07-2017 / 15:32:41 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   317
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   318
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   319
mySQLSoundexCodeOf:aString
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   320
    "return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   321
     (no 4char limitation; different order of duplicate vowel vs. duplicate code elimination).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   322
     Notice that there are better algorithms around (doubleMetaphone) "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   323
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   324
    ^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   325
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   326
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   327
     #(
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   328
        'Müller'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   329
        'Miller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   330
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   331
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   332
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   333
        'Mülherr'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   334
        'Myler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   335
        'Millar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   336
        'Myller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   337
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   338
        'Müler'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   339
        'Muehler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   340
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   341
        'Müllerr'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   342
        'Muehlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   343
        'Muellar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   344
        'Mueler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   345
        'Mülleer'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   346
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   347
        'Nüller'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   348
        'Nyller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   349
        'Niler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   350
        'Czerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   351
        'Tscherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   352
        'Czernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   353
        'Tschernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   354
        'Schernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   355
        'Scherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   356
        'Scherno'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   357
        'Czerne'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   358
        'Zerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   359
        'Tzernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   360
        'Breschnew'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   361
     ) do:[:w |
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   362
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   363
     ].
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   364
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   365
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   366
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   367
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   368
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   369
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   370
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   371
     PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'. 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   372
     PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'. 
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   373
     PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   374
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   375
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   376
    "Modified (comment): / 28-07-2017 / 15:34:03 / cg"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   377
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   378
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   379
soundexCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   380
    "return a soundex phonetic code or nil.
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   381
     Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   382
     tool when searching for words where the correct spelling is unknown.
4194
12b5e3e2219b #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4184
diff changeset
   383
     (read Knuth or search the web if you don't know what a soundex code is).
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   384
     Caveat: 'similar sounding words' means: 'similar sounding in english'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   385
     Notice that there are better algorithms around (doubleMetaphone) "
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   386
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   387
    ^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   388
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   389
"/ old code - now use code in private class...
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   390
"/    |inStream codeStream ch last lch codeLength codes code lastCode|
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   391
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   392
"/    inStream := aString readStream.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   393
"/    inStream skipSeparators.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   394
"/    inStream atEnd ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   395
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   396
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   397
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   398
"/    ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   399
"/    ch isLetter ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   400
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   401
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   402
"/    codeLength := 0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   403
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   404
"/    codes := Dictionary new.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   405
"/    codes atAll:'bpfv'     put:$1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   406
"/    codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   407
"/    codes atAll:'dt'       put:$3.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   408
"/    codes atAll:'l'        put:$4.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   409
"/    codes atAll:'nm'       put:$5.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   410
"/    codes atAll:'r'        put:$6.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   411
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   412
"/    codeStream := WriteStream on:(String new:4).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   413
"/    codeStream nextPut:(ch asUppercase).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   414
"/    last := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   415
"/    lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   416
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   417
"/    [inStream atEnd] whileFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   418
"/        ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   419
"/        lch := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   420
"/        lch = last ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   421
"/            last := lch.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   422
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   423
"/            code := codes at:lch ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   424
"/            (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   425
"/                codeLength < 3 ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   426
"/                    codeStream nextPut:code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   427
"/                    codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   428
"/                    codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   429
"/                ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   430
"/            ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   431
"/            lastCode := code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   432
"/        ]
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   433
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   434
"/    [ codeLength < 3 ] whileTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   435
"/        codeStream nextPut:$0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   436
"/        codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   437
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   438
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   439
"/    ^ codeStream contents
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   440
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   441
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   442
     PhoneticStringUtilities soundexCodeOf:'claus'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   443
     PhoneticStringUtilities soundexCodeOf:'clause'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   444
     PhoneticStringUtilities soundexCodeOf:'close'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   445
     PhoneticStringUtilities soundexCodeOf:'smalltalk' 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   446
     PhoneticStringUtilities soundexCodeOf:'smaltalk'  
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   447
     PhoneticStringUtilities soundexCodeOf:'smaltak'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   448
     PhoneticStringUtilities soundexCodeOf:'smaltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   449
     PhoneticStringUtilities soundexCodeOf:'smoltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   450
     PhoneticStringUtilities soundexCodeOf:'aa'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   451
     PhoneticStringUtilities soundexCodeOf:'by'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   452
     PhoneticStringUtilities soundexCodeOf:'bab'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   453
     PhoneticStringUtilities soundexCodeOf:'bob'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   454
     PhoneticStringUtilities soundexCodeOf:'bop'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   455
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   456
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   457
    "Modified (comment): / 28-07-2017 / 15:33:53 / cg"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   458
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   459
3648
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   460
!PhoneticStringUtilities class methodsFor:'queries'!
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   461
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   462
isUtilityClass
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   463
    ^ self == PhoneticStringUtilities
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   464
! !
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   465
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   466
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   467
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   468
defaultClass
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   469
	^SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   470
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   471
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   472
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'documentation'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   473
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   474
documentation
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   475
"
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   476
    abstract superclass for various phonetic comparators.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   477
    They returns similar strings for similar sounding words, which can be used
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   478
    to find similar sounding words in a search list.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   479
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   480
    Notice, that some comparators are better for particular languages.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   481
"
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   482
!
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   483
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   484
examples
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   485
"
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   486
     PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   487
            does:'miller' soundLike:'miler'.   
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   488
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   489
     PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   490
            does:'miller' soundLike:'milner'.   
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   491
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   492
     PhoneticStringUtilities::SoundexStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   493
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   494
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   495
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   496
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   497
"
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   498
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   499
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   500
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   501
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   502
new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   503
    ^ self basicNew initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   504
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   505
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   506
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'queries'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   507
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   508
isAbstract
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   509
    ^ self == PhoneticStringUtilities::PhoneticStringComparator
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   510
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   511
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   512
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'utilities'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   513
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   514
encode:word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   515
    ^ (self new phoneticStringsFor:word) first
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   516
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   517
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   518
     SoundexStringComparator encode:'Fischer'             -> 'F260'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   519
     Caverphone2StringComparator encode:'Fischer'         -> 'FSKA111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   520
     KoelnerPhoneticCodeStringComparator encode:'Fischer' -> '387'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   521
     MRAStringComparator encode:'Fischer'                 -> 'FSCHR'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   522
     SpanishPhoneticCodeStringComparator encode:'Fischer' -> '24429'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   523
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   524
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   525
    "Created: / 02-08-2017 / 01:15:50 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   526
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   527
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   528
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   529
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   530
does:aString soundLike:anotherString 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   531
    |translations1 translations2|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   532
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   533
    translations1 := self phoneticStringsFor:aString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   534
    translations2 := self phoneticStringsFor:anotherString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   535
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   536
    ^ translations1 contains:[:t1 | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   537
        translations2 contains:[:t2 | t1 = t2]]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   538
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   539
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   540
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   541
            does:'miller' soundLike:'miler'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   542
            
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   543
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   544
            does:'miller' soundLike:'milner'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   545
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   546
     PhoneticStringUtilities::SoundexStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   547
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   548
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   549
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   550
            does:'müller' soundLike:'mueller'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   551
    "
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   552
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   553
    "Modified (comment): / 13-07-2017 / 17:51:43 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   554
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   555
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   556
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   557
    "Should answer an array of alternate phonetic strings for the given input string."
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   558
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   559
    self subclassResponsibility
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   560
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   561
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   562
     (PhoneticStringUtilities::SoundexStringComparator new
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   563
            phoneticStringsFor:'miller') first 
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   564
            
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   565
     'miller' asSoundexCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   566
    "
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   567
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   568
    "Modified (comment): / 27-07-2017 / 15:07:59 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   569
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   570
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   571
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   572
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   573
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   574
    "Invoked when a new instance is created."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   575
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   576
    "/ please change as required (and remove this comment)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   577
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   578
    "/ super initialize.   -- commented since inherited method does nothing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   579
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   580
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   581
!PhoneticStringUtilities::DaitchMokotoffStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   582
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   583
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   584
"
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   585
    self encode:'AUERBACH' -> 097400, 097500
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   586
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   587
    Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   588
    The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   589
    yielding greater accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   590
    but differences in spelling.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   591
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   592
    The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   593
        - coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   594
        - the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   595
        - rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   596
        - multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   597
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   598
    This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   599
        encode:aString            - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   600
        phoneticStringsFor:String - branching enabled, all codes will be returned, separated by '|'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   601
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   602
    [see also:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   603
        'Wikipedia - Daitch-Mokotoff Soundex'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   604
            http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   605
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   606
        'Avotaynu - Soundexing and Genealogy'    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   607
            http://www.avotaynu.com/soundex.htm
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   608
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   609
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   610
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   611
javaCode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   612
"<<END
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   613
/*
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   614
 * Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   615
 * contributor license agreements.  See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   616
 * this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   617
 * The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   618
 * (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   619
 * the License.  You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   620
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   621
 *      http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   622
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   623
 * Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   624
 * distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   625
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   626
 * See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   627
 * limitations under the License.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   628
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   629
package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   630
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   631
import org.apache.commons.codec.CharEncoding;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   632
import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   633
import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   634
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   635
import java.io.InputStream;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   636
import java.util.*;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   637
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   638
/**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   639
 * Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   640
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   641
 * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   642
 * accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   643
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   644
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   645
 * The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   646
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   647
 * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   648
 * <li>coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   649
 * <li>the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   650
 * <li>rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   651
 * <li>multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   652
 * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   653
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   654
 * This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   655
 * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   656
 * <li>{@link #encode(String)} - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   657
 * <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '|'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   658
 * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   659
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   660
 * Note: this implementation has additional branching rules compared to the original description of the algorithm. The
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   661
 * rules can be customized by overriding the default rules contained in the resource file
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   662
 * {@code org/apache/commons/codec/language/dmrules.txt}.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   663
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   664
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   665
 * This class is thread-safe.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   666
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   667
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   668
 * @see Soundex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   669
 * @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   670
 * @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   671
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   672
 * @version $Id$
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   673
 * @since 1.10
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   674
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   675
public class DaitchMokotoffSoundex implements StringEncoder {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   676
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   677
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   678
     * Inner class representing a branch during DM soundex encoding.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   679
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   680
    private static final class Branch {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   681
        private final StringBuilder builder;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   682
        private String cachedString;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   683
        private String lastReplacement;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   684
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   685
        private Branch() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   686
            builder = new StringBuilder();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   687
            lastReplacement = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   688
            cachedString = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   689
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   690
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   691
        /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   692
         * Creates a new branch, identical to this branch.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   693
         *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   694
         * @return a new, identical branch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   695
         */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   696
        public Branch createBranch() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   697
            final Branch branch = new Branch();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   698
            branch.builder.append(toString());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   699
            branch.lastReplacement = this.lastReplacement;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   700
            return branch;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   701
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   702
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   703
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   704
        public boolean equals(final Object other) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   705
            if (this == other) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   706
                return true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   707
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   708
            if (!!(other instanceof Branch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   709
                return false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   710
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   711
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   712
            return toString().equals(((Branch) other).toString());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   713
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   714
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   715
        /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   716
         * Finish this branch by appending '0's until the maximum code length has been reached.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   717
         */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   718
        public void finish() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   719
            while (builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   720
                builder.append('0');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   721
                cachedString = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   722
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   723
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   724
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   725
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   726
        public int hashCode() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   727
            return toString().hashCode();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   728
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   729
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   730
        /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   731
         * Process the next replacement to be added to this branch.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   732
         *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   733
         * @param replacement
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   734
         *            the next replacement to append
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   735
         * @param forceAppend
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   736
         *            indicates if the default processing shall be overridden
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   737
         */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   738
        public void processNextReplacement(final String replacement, final boolean forceAppend) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   739
            final boolean append = lastReplacement == null || !!lastReplacement.endsWith(replacement) || forceAppend;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   740
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   741
            if (append && builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   742
                builder.append(replacement);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   743
                // remove all characters after the maximum length
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   744
                if (builder.length() > MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   745
                    builder.delete(MAX_LENGTH, builder.length());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   746
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   747
                cachedString = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   748
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   749
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   750
            lastReplacement = replacement;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   751
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   752
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   753
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   754
        public String toString() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   755
            if (cachedString == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   756
                cachedString = builder.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   757
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   758
            return cachedString;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   759
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   760
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   761
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   762
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   763
     * Inner class for storing rules.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   764
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   765
    private static final class Rule {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   766
        private final String pattern;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   767
        private final String[] replacementAtStart;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   768
        private final String[] replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   769
        private final String[] replacementDefault;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   770
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   771
        protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   772
                final String replacementDefault) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   773
            this.pattern = pattern;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   774
            this.replacementAtStart = replacementAtStart.split("\\|");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   775
            this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   776
            this.replacementDefault = replacementDefault.split("\\|");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   777
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   778
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   779
        public int getPatternLength() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   780
            return pattern.length();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   781
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   782
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   783
        public String[] getReplacements(final String context, final boolean atStart) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   784
            if (atStart) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   785
                return replacementAtStart;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   786
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   787
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   788
            final int nextIndex = getPatternLength();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   789
            final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   790
            if (nextCharIsVowel) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   791
                return replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   792
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   793
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   794
            return replacementDefault;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   795
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   796
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   797
        private boolean isVowel(final char ch) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   798
            return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u';
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   799
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   800
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   801
        public boolean matches(final String context) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   802
            return context.startsWith(pattern);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   803
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   804
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   805
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   806
        public String toString() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   807
            return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart),
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   808
                    Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault));
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   809
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   810
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   811
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   812
    private static final String COMMENT = "//";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   813
    private static final String DOUBLE_QUOTE = "\"";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   814
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   815
    private static final String MULTILINE_COMMENT_END = "*/";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   816
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   817
    private static final String MULTILINE_COMMENT_START = "/*";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   818
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   819
    /** The resource file containing the replacement and folding rules */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   820
    private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   821
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   822
    /** The code length of a DM soundex value. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   823
    private static final int MAX_LENGTH = 6;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   824
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   825
    /** Transformation rules indexed by the first character of their pattern. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   826
    private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   827
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   828
    /** Folding rules. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   829
    private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   830
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   831
    static {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   832
        final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   833
        if (rulesIS == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   834
            throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   835
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   836
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   837
        final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   838
        parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   839
        scanner.close();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   840
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   841
        // sort RULES by pattern length in descending order
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   842
        for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   843
            final List<Rule> ruleList = rule.getValue();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   844
            Collections.sort(ruleList, new Comparator<Rule>() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   845
                @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   846
                public int compare(final Rule rule1, final Rule rule2) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   847
                    return rule2.getPatternLength() - rule1.getPatternLength();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   848
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   849
            });
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   850
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   851
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   852
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   853
    private static void parseRules(final Scanner scanner, final String location,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   854
            final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   855
        int currentLine = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   856
        boolean inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   857
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   858
        while (scanner.hasNextLine()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   859
            currentLine++;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   860
            final String rawLine = scanner.nextLine();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   861
            String line = rawLine;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   862
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   863
            if (inMultilineComment) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   864
                if (line.endsWith(MULTILINE_COMMENT_END)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   865
                    inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   866
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   867
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   868
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   869
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   870
            if (line.startsWith(MULTILINE_COMMENT_START)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   871
                inMultilineComment = true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   872
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   873
                // discard comments
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   874
                final int cmtI = line.indexOf(COMMENT);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   875
                if (cmtI >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   876
                    line = line.substring(0, cmtI);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   877
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   878
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   879
                // trim leading-trailing whitespace
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   880
                line = line.trim();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   881
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   882
                if (line.length() == 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   883
                    continue; // empty lines can be safely skipped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   884
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   885
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   886
                if (line.contains("=")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   887
                    // folding
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   888
                    final String[] parts = line.split("=");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   889
                    if (parts.length !!= 2) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   890
                        throw new IllegalArgumentException("Malformed folding statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   891
                                " parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   892
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   893
                        final String leftCharacter = parts[0];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   894
                        final String rightCharacter = parts[1];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   895
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   896
                        if (leftCharacter.length() !!= 1 || rightCharacter.length() !!= 1) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   897
                            throw new IllegalArgumentException("Malformed folding statement - " +
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   898
                                    "patterns are not single characters: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   899
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   900
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   901
                        asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0));
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   902
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   903
                } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   904
                    // rule
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   905
                    final String[] parts = line.split("\\s+");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   906
                    if (parts.length !!= 4) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   907
                        throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   908
                                " parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   909
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   910
                        try {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   911
                            final String pattern = stripQuotes(parts[0]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   912
                            final String replacement1 = stripQuotes(parts[1]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   913
                            final String replacement2 = stripQuotes(parts[2]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   914
                            final String replacement3 = stripQuotes(parts[3]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   915
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   916
                            final Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   917
                            final char patternKey = r.pattern.charAt(0);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   918
                            List<Rule> rules = ruleMapping.get(patternKey);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   919
                            if (rules == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   920
                                rules = new ArrayList<Rule>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   921
                                ruleMapping.put(patternKey, rules);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   922
                            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   923
                            rules.add(r);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   924
                        } catch (final IllegalArgumentException e) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   925
                            throw new IllegalStateException(
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   926
                                    "Problem parsing line '" + currentLine + "' in " + location, e);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   927
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   928
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   929
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   930
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   931
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   932
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   933
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   934
    private static String stripQuotes(String str) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   935
        if (str.startsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   936
            str = str.substring(1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   937
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   938
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   939
        if (str.endsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   940
            str = str.substring(0, str.length() - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   941
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   942
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   943
        return str;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   944
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   945
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   946
    /** Whether to use ASCII folding prior to encoding. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   947
    private final boolean folding;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   948
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   949
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   950
     * Creates a new instance with ASCII-folding enabled.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   951
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   952
    public DaitchMokotoffSoundex() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   953
        this(true);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   954
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   955
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   956
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   957
     * Creates a new instance.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   958
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   959
     * With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   960
     * è -&gt; e.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   961
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   962
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   963
     * @param folding
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   964
     *            if ASCII-folding shall be performed before encoding
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   965
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   966
    public DaitchMokotoffSoundex(final boolean folding) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   967
        this.folding = folding;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   968
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   969
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   970
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   971
     * Performs a cleanup of the input string before the actual soundex transformation.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   972
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   973
     * Removes all whitespace characters and performs ASCII folding if enabled.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   974
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   975
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   976
     * @param input
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   977
     *            the input string to cleanup
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   978
     * @return a cleaned up string
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   979
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   980
    private String cleanup(final String input) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   981
        final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   982
        for (char ch : input.toCharArray()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   983
            if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   984
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   985
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   986
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   987
            ch = Character.toLowerCase(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   988
            if (folding && FOLDINGS.containsKey(ch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   989
                ch = FOLDINGS.get(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   990
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   991
            sb.append(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   992
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   993
        return sb.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   994
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   995
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   996
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   997
     * Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   998
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   999
     * This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1000
     * EncoderException if the supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1001
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1002
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1003
     * @see #soundex(String)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1004
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1005
     * @param obj
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1006
     *            Object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1007
     * @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1008
     *         supplied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1009
     * @throws EncoderException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1010
     *             if the parameter supplied is not of type java.lang.String
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1011
     * @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1012
     *             if a character is not mapped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1013
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1014
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1015
    public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1016
        if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1017
            throw new EncoderException(
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1018
                    "Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1019
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1020
        return encode((String) obj);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1021
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1022
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1023
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1024
     * Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1025
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1026
     * @see #soundex(String)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1027
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1028
     * @param source
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1029
     *            A String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1030
     * @return A DM Soundex code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1031
     * @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1032
     *             if a character is not mapped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1033
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1034
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1035
    public String encode(final String source) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1036
        if (source == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1037
            return null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1038
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1039
        return soundex(source, false)[0];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1040
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1041
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1042
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1043
     * Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1044
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1045
     * In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1046
     * separated by '|'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1047
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1048
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1049
     * Example: the name "AUERBACH" is encoded as both
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1050
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1051
     * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1052
     * <li>097400</li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1053
     * <li>097500</li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1054
     * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1055
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1056
     * Thus the result will be "097400|097500".
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1057
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1058
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1059
     * @param source
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1060
     *            A String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1061
     * @return A string containing a set of DM Soundex codes corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1062
     * @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1063
     *             if a character is not mapped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1064
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1065
    public String soundex(final String source) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1066
        final String[] branches = soundex(source, true);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1067
        final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1068
        int index = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1069
        for (final String branch : branches) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1070
            sb.append(branch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1071
            if (++index < branches.length) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1072
                sb.append('|');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1073
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1074
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1075
        return sb.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1076
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1077
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1078
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1079
     * Perform the actual DM Soundex algorithm on the input string.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1080
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1081
     * @param source
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1082
     *            A String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1083
     * @param branching
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1084
     *            If branching shall be performed
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1085
     * @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1086
     *         selected branching mode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1087
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1088
    private String[] soundex(final String source, final boolean branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1089
        if (source == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1090
            return null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1091
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1092
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1093
        final String input = cleanup(source);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1094
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1095
        final Set<Branch> currentBranches = new LinkedHashSet<Branch>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1096
        currentBranches.add(new Branch());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1097
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1098
        char lastChar = '\0';
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1099
        for (int index = 0; index < input.length(); index++) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1100
            final char ch = input.charAt(index);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1101
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1102
            // ignore whitespace inside a name
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1103
            if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1104
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1105
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1106
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1107
            final String inputContext = input.substring(index);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1108
            final List<Rule> rules = RULES.get(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1109
            if (rules == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1110
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1111
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1112
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1113
            // use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1114
            @SuppressWarnings("unchecked")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1115
            final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1116
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1117
            for (final Rule rule : rules) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1118
                if (rule.matches(inputContext)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1119
                    if (branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1120
                        nextBranches.clear();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1121
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1122
                    final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1123
                    final boolean branchingRequired = replacements.length > 1 && branching;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1124
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1125
                    for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1126
                        for (final String nextReplacement : replacements) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1127
                            // if we have multiple replacements, always create a new branch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1128
                            final Branch nextBranch = branchingRequired ? branch.createBranch() : branch;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1129
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1130
                            // special rule: occurrences of mn or nm are treated differently
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1131
                            final boolean force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1132
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1133
                            nextBranch.processNextReplacement(nextReplacement, force);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1134
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1135
                            if (branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1136
                                nextBranches.add(nextBranch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1137
                            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1138
                                break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1139
                            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1140
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1141
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1142
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1143
                    if (branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1144
                        currentBranches.clear();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1145
                        currentBranches.addAll(nextBranches);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1146
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1147
                    index += rule.getPatternLength() - 1;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1148
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1149
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1150
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1151
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1152
            lastChar = ch;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1153
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1154
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1155
        final String[] result = new String[currentBranches.size()];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1156
        int index = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1157
        for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1158
            branch.finish();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1159
            result[index++] = branch.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1160
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1161
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1162
        return result;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1163
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1164
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1165
END>>"
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1166
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1167
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1168
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1169
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1170
copyright
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1171
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1172
Copyright (c) 2002-2004 Robert Jarvis
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1173
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1174
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1175
files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1176
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1177
the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1178
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1179
The above copyright notice and this permission notice shall be included in all copies or substantial 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1180
portions of the Software.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1181
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1182
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1183
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1184
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1185
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1186
USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1187
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1188
! !
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1189
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1190
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1191
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1192
isSlavoGermanic:aString
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1193
    ^ #('w' 'k' 'cz' 'witz' 'ä' 'ö' 'ü' 'ß') contains:[:sub | aString includesString:sub]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1194
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1195
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1196
     self isSlavoGermanic:'walter'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1197
     self isSlavoGermanic:'horowitz'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1198
     self isSlavoGermanic:'müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1199
     self isSlavoGermanic:'miller'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1200
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1201
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1202
    "Modified: / 28-07-2017 / 10:14:38 / cg"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1203
! !
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1204
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1205
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1206
3685
01ebbac96899 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3648
diff changeset
  1207
documentation
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1208
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1209
    The Double Metaphone algorithm
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1210
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1211
    see internet: https://en.wikipedia.org/wiki/Metaphone
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1212
"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1213
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1214
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1215
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1216
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1217
currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1218
	^currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1219
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1220
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1221
currentIndex: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1222
	currentIndex := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1223
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1224
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1225
inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1226
	^inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1227
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1228
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1229
inputKey: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1230
	inputKey := aString asUppercase
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1231
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1232
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1233
primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1234
	^primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1235
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1236
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1237
primaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1238
	primaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1239
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1240
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1241
secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1242
	^secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1243
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1244
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1245
secondaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1246
	secondaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1247
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1248
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1249
skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1250
	^skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1251
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1252
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1253
skipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1254
	skipCount := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1255
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1256
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1257
startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1258
	^startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1259
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1260
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1261
startIndex: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1262
	startIndex := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1263
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1264
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1265
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1266
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1267
phoneticStringsFor:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1268
    "Private - Answers an array of alternate phonetic strings for the given input string."
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1269
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1270
    inputKey := aString.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1271
    self performInitialProcessing.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1272
    self processRemainingCharacters.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1273
    ^ Array with:primaryTranslation with:secondaryTranslation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1274
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1275
    "Modified (format): / 28-07-2017 / 11:25:02 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1276
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1277
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1278
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1279
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1280
initialize
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1281
    super initialize.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1282
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1283
    startIndex := 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1284
    primaryTranslation := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1285
    secondaryTranslation := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1286
    skipCount := 0.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1287
    currentIndex := 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1288
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1289
    "Modified: / 28-07-2017 / 11:18:44 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1290
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1291
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1292
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1293
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1294
addPrimaryTranslation:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1295
    primaryTranslation := (primaryTranslation , aString)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1296
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1297
    "Modified: / 28-07-2017 / 11:19:09 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1298
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1299
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1300
addSecondaryTranslation:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1301
    secondaryTranslation := secondaryTranslation , aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1302
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1303
    "Modified: / 28-07-2017 / 11:17:11 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1304
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1305
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1306
isSlavoGermanic: aString
4521
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1307
        ^((aString includesAny: 'WK') or:
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1308
                [ (aString indexOfSubCollection: 'CZ' startingAt: 1) >= 1 ]) or:
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1309
                [ (aString indexOfSubCollection: 'WITZ' startingAt: 1) >= 1 ]
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1310
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1311
    "Modified: / 09-10-2017 / 17:10:46 / stefan"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1312
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1313
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1314
keyAt: anInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1315
    (anInteger between:1 and:inputKey size) ifTrue: [ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1316
        ^ inputKey at: anInteger 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1317
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1318
    ^ Character space
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1319
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1320
    "Modified: / 28-07-2017 / 11:38:30 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1321
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1322
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1323
keyLeftString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1324
	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1325
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1326
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1327
keyMidString: lengthInteger from: fromInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1328
        | result from len additionalSpaces |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1329
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1330
        result := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1331
        from := fromInteger.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1332
        len := lengthInteger.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1333
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1334
        "Prepend spaces if caller is requesting characters from before the start of the string"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1335
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1336
        [ from < 1 ] whileTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1337
                [ result := result, ' '.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1338
                from := from + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1339
                len := len - 1 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1340
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1341
        from + len - 1 > inputKey size
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1342
                ifTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1343
                        [ additionalSpaces := from + len - 1 - inputKey size.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1344
                        len := inputKey size - from + 1 ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1345
                ifFalse: [ additionalSpaces := 0 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1346
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1347
        result := result, (inputKey copyFrom: from to: (from+len-1 min: inputKey size)).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1348
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1349
        [ additionalSpaces > 0 ] whileTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1350
                [ result := result, ' '.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1351
                additionalSpaces := additionalSpaces - 1 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1352
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1353
        ^result
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1354
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1355
    "Modified: / 28-07-2017 / 11:20:43 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1356
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1357
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1358
keyRightString: lengthInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1359
        ^self keyMidString: lengthInteger from: inputKey size - lengthInteger + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1360
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1361
    "Modified: / 28-07-2017 / 11:20:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1362
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1363
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1364
performInitialProcessing
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1365
    inputKey size > 1 ifTrue:[
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1366
        (#( 'GN' 'KN' 'PN' 'WR' 'PS' ) includes:(inputKey copyFrom:1 to:2)) ifTrue:[
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1367
            startIndex := startIndex + 1
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1368
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1369
    ].
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1370
    
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1371
    (self keyAt:1) = $X ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1372
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1373
            addPrimaryTranslation:'S';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1374
            addSecondaryTranslation:'S'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1375
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1376
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1377
    (self keyAt:1) isVowel ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1378
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1379
            addPrimaryTranslation:'A';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1380
            addSecondaryTranslation:'A'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1381
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1382
    ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1383
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1384
    "Modified: / 01-08-2017 / 19:29:19 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1385
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1386
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1387
processB
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1388
    self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1389
        addPrimaryTranslation: 'P';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1390
        addSecondaryTranslation: 'P'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1391
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1392
    (self keyAt: (currentIndex + 1)) == $B ifTrue: [ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1393
        skipCount := skipCount + 1 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1394
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1395
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1396
    "Modified: / 28-07-2017 / 11:26:03 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1397
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1398
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1399
processC
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1400
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1401
        ((((currentIndex >= 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1402
                and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1403
                and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1404
                and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1405
                and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1406
                                or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1407
                                                and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1408
                        ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1409
                                [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1410
                                self addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1411
                                skipCount := skipCount + 2.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1412
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1413
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1414
        "ii"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1415
        (inputKey beginsWith: 'CAESAR')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1416
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1417
                        [ self addPrimaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1418
                        self addSecondaryTranslation: 'S'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1419
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1420
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1421
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1422
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1423
        (self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1424
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1425
                        [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1426
                        self addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1427
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1428
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1429
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1430
        "iv"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1431
        (self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1432
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1433
                        [ (currentIndex > 1                "a"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1434
                                        and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1435
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1436
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1437
                                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1438
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1439
                                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1440
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1441
                        (currentIndex = 1          "b"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1442
                                        and: [ (inputKey size > 5 and: [(inputKey copyFrom: 1 to: 6) = 'CHARAC'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1443
                                                        or: [ (inputKey copyFrom: 1 to: 6) = 'CHARIS' ]] )
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1444
                                                or: [inputKey size > 4 and: [ ((((inputKey copyFrom: 1 to: 4) = 'CHOR'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1445
                                                        or: [ (inputKey copyFrom: 1 to: 4) = 'CHYM' ])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1446
                                                        or: [ (inputKey copyFrom: 1 to: 4) = 'CHIA' ])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1447
                                                        or: [ (inputKey copyFrom: 1 to: 4) = 'CHEM' ])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1448
                                                        and: [ (inputKey copyFrom: 1 to: 4) ~= 'CHORE' ] ] ] ])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1449
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1450
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1451
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1452
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1453
                                          ^self ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1454
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1455
                        (((((#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: 4))              "c"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1456
                                        or: [ (inputKey copyFrom: 1 to: 3) = 'SCH' ])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1457
                                        or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1458
                                                        includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1459
                                        or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1460
                                        or: [ ((currentIndex = 1)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1461
                                                        or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1462
                                                and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1463
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1464
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1465
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1466
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1467
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1468
                                          ^self ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1469
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1470
                                        [ currentIndex > 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1471
                                                ifTrue:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1472
                                                        [ (inputKey copyFrom: 1 to: 2) = 'MC'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1473
                                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1474
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1475
                                                                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1476
                                                                                                addSecondaryTranslation: 'K' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1477
                                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1478
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1479
                                                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1480
                                                                                                addSecondaryTranslation: 'K' ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1481
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1482
                                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1483
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1484
                                                                addSecondaryTranslation: 'X' ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1485
                                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1486
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1487
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1488
        "v"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1489
        (self keyAt: currentIndex+1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1490
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1491
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1492
                                addPrimaryTranslation: 'S';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1493
                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1494
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1495
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1496
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1497
        "vi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1498
        (self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1499
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1500
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1501
                                addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1502
                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1503
                          skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1504
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1505
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1506
        "vii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1507
        ((self keyAt: currentIndex+1) = $C
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1508
                        and: [ ((currentIndex = 2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1509
                                and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1510
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1511
                        [ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1512
                                        and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1513
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1514
                                        [ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1515
                                                        or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1516
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1517
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1518
                                                                addPrimaryTranslation: 'KS';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1519
                                                                addSecondaryTranslation: 'KS'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1520
                                                         skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1521
                                                         ^self ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1522
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1523
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1524
                                                                addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1525
                                                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1526
                                                         skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1527
                                                         ^self ] ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1528
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1529
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1530
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1531
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1532
                                          skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1533
                                          ^self ] ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1534
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1535
        "viii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1536
        (#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1537
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1538
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1539
                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1540
                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1541
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1542
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1543
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1544
        "ix"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1545
        (#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1546
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1547
                        [ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1548
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1549
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1550
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1551
                                                addSecondaryTranslation: 'X' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1552
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1553
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1554
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1555
                                                addSecondaryTranslation: 'S'].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1556
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1557
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1558
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1559
        "x"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1560
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1561
                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1562
                addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1563
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1564
        "xi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1565
        (#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1566
                ifTrue:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1567
                        [ skipCount := skipCount + 2 ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1568
                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1569
                        [ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1570
                                        and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1571
                                ifTrue: [ skipCount := skipCount + 1] ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1572
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1573
    "Modified: / 28-07-2017 / 11:29:11 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1574
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1575
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1576
processCedille 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1577
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1578
		addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1579
		addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1580
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1581
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1582
processD
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1583
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1584
        (self keyAt: currentIndex+1) = $G
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1585
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1586
                        [ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1587
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1588
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1589
                                                addPrimaryTranslation: 'J';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1590
                                                addSecondaryTranslation: 'J'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1591
                                         skipCount := skipCount + 2.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1592
                                        ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1593
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1594
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1595
                                                addPrimaryTranslation: 'TK';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1596
                                                addSecondaryTranslation: 'TK'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1597
                                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1598
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1599
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1600
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1601
        (#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1602
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1603
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1604
                                addPrimaryTranslation: 'T';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1605
                                addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1606
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1607
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1608
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1609
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1610
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1611
                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1612
                addSecondaryTranslation: 'T'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1613
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1614
    "Modified: / 28-07-2017 / 11:27:39 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1615
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1616
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1617
processF
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1618
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1619
                addPrimaryTranslation: 'F';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1620
                addSecondaryTranslation: 'F'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1621
                
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1622
        (self keyAt: currentIndex+1) = $F
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1623
                ifTrue: [ skipCount := skipCount + 1 ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1624
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1625
    "Modified (format): / 28-07-2017 / 11:29:21 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1626
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1627
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1628
processG
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1629
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1630
        case 'G':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1631
                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1632
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1633
        | word |
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1634
        (self keyAt: currentIndex + 1) = $H
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1635
        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1636
                "if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1637
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1638
                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1639
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1640
              " {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1641
                   MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1642
                   current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1643
                   break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1644
                }"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1645
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1646
                        self 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1647
                            addPrimaryTranslation: 'K';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1648
                            addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1649
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1650
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1651
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1652
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1653
                "if(current < 3)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1654
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1655
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1656
                currentIndex < 4 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1657
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1658
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1659
                        " //'ghislane', ghiradelli
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1660
               if(current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1661
               { "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1662
                        currentIndex = 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1663
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1664
                                "if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1665
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1666
                                (self keyAt: currentIndex + 2) = $I
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1667
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1668
                                        "MetaphAdd(J);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1669
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1670
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1671
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1672
                                        "MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1673
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1674
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1675
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1676
                                "  current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1677
                                break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1678
                                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1679
                                ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1680
                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1681
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1682
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1683
                " //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1684
                if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1685
                //e.g., 'bough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1686
                OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1687
                //e.g., 'broughton'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1688
                OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1689
         "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1690
                (((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1691
                or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])  
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1692
                or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1693
                ifTrue: [                         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1694
                        "current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1695
                        break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1696
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1697
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1698
                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1699
                        " //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1700
               if((current > 2) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1701
               AND (GetAt(current - 1) == 'U') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1702
               AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1703
                        (currentIndex > 3 and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1704
                                ((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1705
                                        #($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1706
                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1707
                        ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1708
                                "MetaphAdd(F);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1709
                                self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1710
                                addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1711
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1712
                                " if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1713
                    MetaphAdd(K);"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1714
                                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1715
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1716
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1717
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1718
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1719
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1720
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1721
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1722
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1723
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1724
                "if(GetAt(current + 1) == 'N')"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1725
          (self keyAt: currentIndex + 1) = $N
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1726
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1727
                        "if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1728
                        (currentIndex = 2 and: [(inputKey at: 1) isVowel and: [(self isSlavoGermanic: inputKey) not]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1729
               ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1730
                                "MetaphAdd(KN, N);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1731
                                self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1732
                                addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1733
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1734
                                " //not e.g. 'cagney'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1735
                                if(!!StringAt((current + 2), 2, EY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1736
                                AND (GetAt(current + 1) !!= 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1737
                                AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1738
                                ((inputKey size >= (currentIndex + 2)) and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1739
                                        (inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: inputKey size)) ~= 'EY' and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1740
                                                (self keyAt: currentIndex + 1) ~= $Y and: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1741
                                                        (self isSlavoGermanic: inputKey) not
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1742
                                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1743
                                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1744
                                ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1745
                                        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1746
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1747
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1748
                                        self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1749
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1750
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1751
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1752
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1753
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1754
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1755
                " //'tagliaro'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1756
                if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1757
                ((inputKey size >= (currentIndex + 3)) and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1758
                        (inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1759
                                (self isSlavoGermanic: inputKey) not]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1760
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1761
                        self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1762
                        addSecondaryTranslation: 'L'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1763
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1764
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1765
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1766
                " //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1767
                if((current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1768
                AND ((GetAt(current + 1) == 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1769
                OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1770
                (currentIndex = 1 and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1771
                        ((self keyAt: currentIndex + 1) = $Y) or: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1772
                        (#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes: 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1773
                                (inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1774
                ]]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1775
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1776
                        addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1777
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1778
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1779
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1780
                " // -ger-,  -gy-
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1781
                if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1782
                AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1783
                AND !!StringAt((current - 1), 1, E, I, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1784
                AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1785
                "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1786
          (((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: inputKey size)) = 'ER' or: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1787
                                ((self keyAt: currentIndex + 1) = $Y)]) 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1788
                        and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := inputKey copyFrom: 1 to: (6 min: inputKey size))) not)
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1789
                                and: [(self keyAt: currentIndex - 1) ~= $E
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1790
                                        and: [(#('RGY' 'OGY') includes: (inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1791
                 ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1792
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1793
                        addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1794
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1795
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1796
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1797
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1798
          " // italian e.g, 'biaggi'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1799
           if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1800
           "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1801
                ((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size)))])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1802
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1803
                        " //obvious germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1804
                                        if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1805
                                                OR StringAt((current + 1), 2, ET, ))                                                MetaphAdd(K);"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1806
                        word := (inputKey copyFrom: 1 to: 4).
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1807
                        ((#('VAN ' 'VON ') includes: word) or: [(word copyFrom: 1 to: 3) = 'SCH' or: [(word copyFrom: 1 to: 2) = 'ET']]) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1808
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1809
                                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1810
                                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1811
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1812
                            " //always soft if french ending
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1813
                                                if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1814
                                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1815
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1816
                                                        MetaphAdd(J, K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1817
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1818
                                        break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1819
                                (((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: inputKey size)), '    ') copyFrom: 1 to: 4) = 'IER '
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1820
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1821
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1822
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1823
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1824
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1825
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1826
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1827
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1828
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1829
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1830
                        ^self.       
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1831
                ].                      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1832
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1833
        " if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1834
             current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1835
         else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1836
             current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1837
         MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1838
            break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1839
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1840
                (self keyAt: (currentIndex + 1)) = $G
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1841
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1842
                        skipCount := skipCount + 1.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1843
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1844
                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1845
                addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1846
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1847
    "Modified: / 28-07-2017 / 11:31:33 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1848
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1849
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1850
processH
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1851
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1852
        case 'H':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1853
                                //only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1854
                                if(((current == 0) OR IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1855
                                        AND IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1856
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1857
                                        MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1858
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1859
                                }else//also takes care of 'HH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1860
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1861
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1862
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1863
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1864
        (((currentIndex = 1) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1865
                or: [ (self keyAt: currentIndex - 1) isVowel]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1866
        and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1867
        ifTrue: [               
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1868
                self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1869
                addSecondaryTranslation: 'H'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1870
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1871
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1872
        ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1873
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1874
    "Modified: / 28-07-2017 / 11:29:52 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1875
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1876
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1877
processJ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1878
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1879
        case 'J':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1880
                                //obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1881
                                if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1882
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1883
                                        if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1884
                                                MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1885
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1886
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1887
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1888
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1889
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1890
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1891
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1892
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1893
                                if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1894
                                        MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1895
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1896
                                        //spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1897
                                        if(IsVowel(current - 1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1898
                                                AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1899
                                                        AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1900
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1901
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1902
                                                if(current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1903
                                                        MetaphAdd(J,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1904
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1905
                                                        if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1906
                                                                        AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1907
                                                                MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1908
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1909
                                if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1910
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1911
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1912
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1913
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1914
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1915
        | currentWord firstWord nextLetter |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1916
        currentWord := inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1917
        firstWord := inputKey copyFrom: 1 to: (4 min: inputKey size).
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1918
        nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1919
        (currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1920
        ifTrue: [       
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1921
                ((currentIndex = 1 and: [inputKey size = 4 or: [inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1922
                        or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1923
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1924
                        self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1925
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1926
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1927
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1928
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1929
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1930
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1931
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1932
        (currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1933
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1934
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1935
                addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1936
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1937
                ((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1938
                and: [(self isSlavoGermanic: inputKey) not and: [nextLetter == $A or: [nextLetter == $O]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1939
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1940
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1941
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1942
                ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1943
                        currentIndex = inputKey size 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1944
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1945
                                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1946
                                addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1947
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1948
                                ((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1949
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1950
                                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1951
                                        addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1952
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1953
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1954
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1955
        ].
3489
6ef5f530df03 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3488
diff changeset
  1956
        nextLetter == $J
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1957
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1958
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1959
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1960
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1961
    "Modified: / 28-07-2017 / 11:31:41 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1962
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1963
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1964
processK
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1965
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1966
        case 'K':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1967
                                if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1968
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1969
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1970
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1971
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1972
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1973
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1974
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1975
        (self keyAt: currentIndex + 1) = $K
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1976
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1977
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1978
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1979
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1980
        addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1981
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1982
    "Modified: / 28-07-2017 / 11:31:46 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1983
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1984
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1985
processL
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1986
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1987
"case 'L':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1988
                                if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1989
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1990
                                        //spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1991
                                        if(((current == (length - 3)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1992
                                                AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1993
                                                         OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, )) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1994
                                                                AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1995
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1996
                                                MetaphAdd(L,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1997
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1998
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1999
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2000
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2001
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2002
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2003
                                MetaphAdd(L);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2004
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2005
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2006
        | currentWord |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2007
        (self keyAt: currentIndex + 1) = $L 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2008
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2009
                (((currentIndex = (inputKey size - 2))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2010
                and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size))])])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2011
                or: [((#('AS' 'OS') includes: (inputKey copyFrom: inputKey size - 1 to: inputKey size)) or: [#($A $O) includes: (self keyAt: inputKey size)]) and: [currentWord = 'ALLE']
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2012
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2013
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2014
                        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2015
                        addSecondaryTranslation: ' '.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2016
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2017
                        ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2018
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2019
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2020
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2021
        self addPrimaryTranslation: 'L';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2022
        addSecondaryTranslation: 'L'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2023
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2024
    "Modified: / 28-07-2017 / 11:32:03 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2025
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2026
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2027
processM
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2028
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2029
"case 'M':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2030
                                if((StringAt((current - 1), 3, UMB, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2031
                                        AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2032
                                                //'dumb','thumb'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2033
                                                OR  (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2034
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2035
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2036
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2037
                                MetaphAdd(M);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2038
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2039
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2040
        (((currentIndex > 1 and: [(inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: inputKey size)) = 'UMB'])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2041
                and: [currentIndex + 1 = inputKey size or: [(inputKey copyFrom: (currentIndex + 2 min: inputKey size) to: (currentIndex + 4 min: inputKey size)) = 'ER']])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2042
                or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2043
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2044
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2045
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2046
                self addPrimaryTranslation: 'M';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2047
                addSecondaryTranslation: 'M'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2048
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2049
    "Modified: / 28-07-2017 / 11:32:08 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2050
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2051
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2052
processN
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2053
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2054
        case 'N':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2055
                                if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2056
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2057
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2058
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2059
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2060
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2061
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2062
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2063
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2064
        (self keyAt: currentIndex + 1) = $N
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2065
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2066
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2067
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2068
        self addPrimaryTranslation: 'N';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2069
        addSecondaryTranslation: 'N'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2070
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2071
    "Modified: / 28-07-2017 / 11:32:14 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2072
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2073
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2074
processNtilde
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2075
        "case 'Ñ':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2076
                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2077
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2078
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2079
        "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2080
        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2081
        addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2082
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2083
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2084
processP
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2085
        "case 'P':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2086
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2087
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2088
                                        MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2089
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2090
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2091
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2092
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2093
                                //also account for campbell, raspberry
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2094
                                if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2095
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2096
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2097
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2098
                                        MetaphAdd(P);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2099
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2100
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2101
        | nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2102
        (nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2103
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2104
                self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2105
                addSecondaryTranslation: 'F'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2106
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2107
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2108
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2109
        (#($P $B) includes: nextLetter)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2110
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2111
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2112
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2113
                self addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2114
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2115
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2116
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2117
    "Modified: / 28-07-2017 / 11:32:28 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2118
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2119
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2120
processQ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2121
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2122
        case 'Q':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2123
                                if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2124
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2125
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2126
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2127
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2128
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2129
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2130
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2131
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2132
        (self keyAt: currentIndex + 1) = $Q
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2133
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2134
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2135
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2136
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2137
        addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2138
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2139
    "Modified: / 28-07-2017 / 11:32:32 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2140
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2141
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2142
processR
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2143
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2144
        case 'R':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2145
                                //french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2146
                                if((current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2147
                                        AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2148
                                                AND StringAt((current - 2), 2, IE, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2149
                                                        AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2150
                                        MetaphAdd(, R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2151
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2152
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2153
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2154
                                if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2155
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2156
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2157
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2158
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2159
        "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2160
        (currentIndex = inputKey size and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2161
                (self isSlavoGermanic: inputKey) not and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2162
                        (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2163
                                (#('ME' 'MA') includes: (inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2164
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2165
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2166
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2167
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2168
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2169
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2170
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2171
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2172
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2173
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2174
        (self keyAt: currentIndex + 1) = $R
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2175
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2176
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2177
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2178
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2179
    "Modified: / 28-07-2017 / 11:32:37 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2180
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2181
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2182
processRemainingCharacters
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2183
    startIndex to: inputKey size do:[ :i | 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2184
        | c methodSelector |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2185
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2186
        skipCount = 0 ifTrue:[ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2187
            ((primaryTranslation size > 4) and: [ secondaryTranslation size > 4 ])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2188
                ifTrue: [ ^self ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2189
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2190
            currentIndex := i.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2191
            c := self keyAt: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2192
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2193
            (c isVowel not and: [c ~= $Y]) ifTrue:[ 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2194
                c == $Ç ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2195
                    methodSelector := #processCedille 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2196
                ] ifFalse: [ c == $Ñ ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2197
                    methodSelector := #processNtilde 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2198
                ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2199
                    methodSelector := ('process', c asString) asSymbol 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2200
                ]].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2201
                self perform: methodSelector 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2202
            ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2203
        ] ifFalse: [ 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2204
            skipCount := skipCount - 1
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2205
        ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2206
    ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2207
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2208
    "Modified: / 28-07-2017 / 11:24:15 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2209
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2210
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2211
processS
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2212
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2213
        case 'S':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2214
                                //special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2215
                                if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2216
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2217
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2218
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2219
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2220
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2221
                                //special case 'sugar-'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2222
                                if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2223
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2224
                                        MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2225
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2226
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2227
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2228
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2229
                                if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2230
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2231
                                        //germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2232
                                        if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2233
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2234
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2235
                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2236
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2237
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2238
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2239
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2240
                                //italian & armenian
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2241
                                if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2242
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2243
                                        if(!!SlavoGermanic())
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2244
                                                MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2245
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2246
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2247
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2248
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2249
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2250
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2251
                                //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2252
                                //also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2253
                                if(((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2254
                                                AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2255
                                                        OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2256
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2257
                                        MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2258
                                        if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2259
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2260
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2261
                                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2262
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2263
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2264
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2265
                                if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2266
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2267
                                        //Schlesinger's rule
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2268
                                        if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2269
                                                //dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2270
                                                if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2271
                                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2272
                                                        //'schermerhorn', 'schenker'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2273
                                                        if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2274
                                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2275
                                                                MetaphAdd(X, SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2276
                                                        }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2277
                                                                MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2278
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2279
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2280
                                                }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2281
                                                        if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2282
                                                                MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2283
                                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2284
                                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2285
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2286
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2287
                                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2288
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2289
                                        if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2290
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2291
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2292
                                                current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2293
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2294
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2295
                                        //else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2296
                                        MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2297
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2298
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2299
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2300
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2301
                                //french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2302
                                if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2303
                                        MetaphAdd(, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2304
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2305
                                        MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2306
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2307
                                if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2308
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2309
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2310
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2311
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2312
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2313
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2314
        | nextChar char2 chars char |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2315
        (#('ISL' 'YSL') includes: (inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: inputKey size))) 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2316
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2317
                ^self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2318
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2319
        (currentIndex = 1 and: [(inputKey copyFrom: 1 to: (5 min: inputKey size)) = 'SUGAR'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2320
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2321
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2322
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2323
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2324
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2325
        (inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SH'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2326
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2327
                (#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (inputKey copyFrom: (currentIndex + 1 min: inputKey size) to: ((currentIndex + 5) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2328
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2329
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2330
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2331
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2332
                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2333
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2334
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2335
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2336
                ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2337
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2338
        ((#('SIO' 'SIA') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 2 min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2339
                or: [(inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size)) = 'SIAN'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2340
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2341
                (self isSlavoGermanic: inputKey) not
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2342
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2343
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2344
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2345
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2346
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2347
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2348
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2349
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2350
                ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2351
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2352
        ((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2353
                or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2354
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2355
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2356
                addSecondaryTranslation: 'X'.
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
  2357
                nextChar == $Z
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2358
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2359
                    skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2360
                        ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2361
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2362
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2363
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2364
        ((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SC')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2365
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2366
                (char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2367
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2368
                        (#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := inputKey copyFrom: ((currentIndex + 3) min: inputKey size) to: ((currentIndex + 4) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2369
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2370
                                (#('ER' 'EN') includes: chars)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2371
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2372
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2373
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2374
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2375
                                        self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2376
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2377
                                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2378
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2379
                                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2380
                        ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2381
                                ((currentIndex = 1 and: [(char := inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2382
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2383
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2384
                                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2385
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2386
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2387
                                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2388
                                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2389
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2390
                                ^self .
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2391
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2392
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2393
                        (#($I $E $Y) includes: char2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2394
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2395
                                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2396
                                addSecondaryTranslation: 'S'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2397
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2398
                                ^self .
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2399
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2400
                                self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2401
                                addSecondaryTranslation: 'SK'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2402
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2403
                                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2404
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2405
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2406
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2407
        (currentIndex = inputKey size and: [(#('AI' 'OI') includes: (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2408
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2409
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2410
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2411
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2412
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2413
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2414
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2415
        (#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2416
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2417
            skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2418
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2419
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2420
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2421
    "Modified: / 28-07-2017 / 11:34:18 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2422
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2423
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2424
processT
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2425
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2426
        case 'T':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2427
                                if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2428
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2429
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2430
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2431
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2432
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2433
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2434
                                if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2435
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2436
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2437
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2438
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2439
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2440
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2441
                                if(StringAt(current, 2, TH, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2442
                                        OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2443
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2444
                                        //special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2445
                                        if(StringAt((current + 2), 2, OM, AM, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2446
                                                OR StringAt(0, 4, VAN , VON , ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2447
                                                        OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2448
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2449
                                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2450
                                        }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2451
                                                MetaphAdd(0, T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2452
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2453
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2454
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2455
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2456
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2457
                                if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2458
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2459
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2460
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2461
                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2462
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2463
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2464
        ((inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: inputKey size)) = 'TION')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2465
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2466
                self addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2467
                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2468
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2469
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2470
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2471
        (#('TIA' 'TCH') includes: (inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2472
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2473
                self addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2474
                addSecondaryTranslation: 'X'. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2475
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2476
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2477
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2478
        (((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'TH') or: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2479
                ((inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)) = 'TTH')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2480
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2481
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2482
                ((#('OM' 'AM') includes: (inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2483
                or: [(#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: (4 min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2484
                        or: [(inputKey copyFrom: 1 to: (3 min: inputKey size)) = 'SCH']
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2485
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2486
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2487
                        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2488
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2489
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2490
                        self addPrimaryTranslation: '0';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2491
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2492
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2493
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2494
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2495
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2496
        (#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2497
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2498
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2499
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2500
        self addPrimaryTranslation: 'T';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2501
        addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2502
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2503
    "Modified: / 28-07-2017 / 11:33:33 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2504
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2505
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2506
processV
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2507
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2508
        case 'V':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2509
                                if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2510
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2511
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2512
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2513
                                MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2514
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2515
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2516
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2517
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2518
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2519
        (self keyAt: currentIndex + 1) = $V
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2520
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2521
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2522
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2523
        self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2524
        addSecondaryTranslation: 'F'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2525
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2526
    "Modified: / 28-07-2017 / 11:34:27 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2527
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2528
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2529
processW
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2530
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2531
        case 'W':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2532
                                //can also be in middle of word
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2533
                                if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2534
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2535
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2536
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2537
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2538
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2539
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2540
                                if((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2541
                                        AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2542
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2543
                                        //Wasserman should match Vasserman
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2544
                                        if(IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2545
                                                MetaphAdd(A, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2546
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2547
                                                //need Uomo to match Womo
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2548
                                                MetaphAdd(A);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2549
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2550
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2551
                                //Arnow should match Arnoff
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2552
                                if(((current == last) AND IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2553
                                        OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2554
                                                        OR StringAt(0, 3, SCH, ))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2555
                                  {
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2556
                                        MetaphAdd(, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2557
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2558
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2559
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2560
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2561
                                //polish e.g. 'filipowicz'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2562
                                if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2563
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2564
                                        MetaphAdd(TS, FX);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2565
                                        current +=4;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2566
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2567
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2568
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2569
                                //else skip it
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2570
                                current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2571
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2572
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2573
        | word nextLetter |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2574
        ((word := inputKey copyFrom: currentIndex to: (currentIndex + 1 min: inputKey size)) = 'WR')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2575
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2576
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2577
                addSecondaryTranslation: 'R'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2578
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2579
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2580
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2581
        ((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2582
                word = 'WH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2583
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2584
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2585
                nextLetter isVowel
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2586
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2587
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2588
                        addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2589
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2590
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2591
                        addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2592
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2593
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2594
        ((((currentIndex = inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2595
                or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: inputKey size))])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2596
                        or: [inputKey startsWith:'SCH'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2597
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2598
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2599
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2600
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2601
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2602
        (#('WICZ' 'WITZ') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 4 min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2603
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2604
                self addPrimaryTranslation: 'TS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2605
                addSecondaryTranslation: 'FX'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2606
                skipCount := skipCount + 3.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2607
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2608
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2609
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2610
    "Modified: / 28-07-2017 / 11:34:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2611
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2612
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2613
processX
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2614
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2615
        case 'X':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2616
                                //french e.g. breaux
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2617
                                if(!!((current == last) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2618
                                        AND (StringAt((current - 3), 3, IAU, EAU, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2619
                                                        OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2620
                                        MetaphAdd(KS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2621
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2622
                                if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2623
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2624
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2625
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2626
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2627
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2628
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2629
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2630
        ((currentIndex = inputKey size) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2631
        and: [(#('IAU' 'EAU') includes: (inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex)) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2632
              or: [(#('AU' 'OU') includes: (inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]]) 
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2633
        ifFalse: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2634
                self addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2635
                addSecondaryTranslation: 'KS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2636
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2637
        (#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2638
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2639
            skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2640
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2641
        ]
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2642
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2643
    "Modified: / 28-07-2017 / 11:34:44 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2644
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2645
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2646
processZ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2647
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2648
        case 'Z':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2649
                                //chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2650
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2651
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2652
                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2653
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2654
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2655
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2656
                                        if(StringAt((current + 1), 2, ZO, ZI, ZA, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2657
                                                OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2658
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2659
                                                MetaphAdd(S, TS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2660
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2661
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2662
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2663
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2664
                                if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2665
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2666
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2667
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2668
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2669
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2670
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2671
        (self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2672
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2673
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2674
                addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2675
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2676
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2677
        ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2678
                ((#('ZO' 'ZI' 'ZA') includes: (inputKey copyFrom: ((currentIndex + 1) min: inputKey size) to: ((currentIndex + 2) min: inputKey size))) or: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2679
                        (self isSlavoGermanic: inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2680
                ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2681
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2682
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2683
                        addSecondaryTranslation: 'TS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2684
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2685
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2686
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2687
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2688
                (self keyAt: currentIndex + 1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2689
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2690
                    skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2691
                        ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2692
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2693
        ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2694
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2695
    "Modified: / 28-07-2017 / 11:35:12 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2696
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2697
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2698
!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2699
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2700
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2701
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2702
    There are many extended and enhanced soundex variants around;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2703
    here is one, called 'extended soundex'. It is destribed for example in
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2704
    http://www.epidata.dk/documentation.php.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2705
    An author or origin is unknown.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2706
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2707
    The number of digits is increased to 5 or 8;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2708
    The first character is not used literally; instead it is encoded like the rest.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2709
    This might have a negative effect on names starting with a vovel, though.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2710
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2711
    Overall, it can be doubted if this is really an enhancement after all.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2712
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2713
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2714
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2715
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2716
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2717
phoneticStringsFor:aString
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2718
    "generates both an extended soundex of length 5 and one of length 8"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2719
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2720
    |first second u t prevCode|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2721
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2722
    u := aString asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2723
    first := second := ''.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2724
    u do:[:c | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2725
        t := self translate:c.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2726
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2727
            first := first , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2728
            second := second , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2729
            second size == 8 ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2730
                ^ Array with:(first copyTo:5) with:second 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2731
            ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2732
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2733
        prevCode := t
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2734
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2735
    [ first size < 5 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2736
        first := first , '0'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2737
        second := second , '0'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2738
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2739
    [ second size < 8 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2740
        second := second , '0'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2741
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2742
    ^ Array with:first with:second
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2743
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2744
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2745
     self basicNew phoneticStringsFor:'müller'  #('87900' '87900000')  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2746
     self basicNew phoneticStringsFor:'miller'  #('87900' '87900000')   
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2747
     self basicNew phoneticStringsFor:'muller'  #('87900' '87900000')    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2748
     self basicNew phoneticStringsFor:'muler'   #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2749
     self basicNew phoneticStringsFor:'schmidt'    #('38600' '38600000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2750
     self basicNew phoneticStringsFor:'schneider'  #('38690' '38690000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2751
     self basicNew phoneticStringsFor:'fischer'    #('23900' '23900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2752
     self basicNew phoneticStringsFor:'weber'      #('19000' '19000000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2753
     self basicNew phoneticStringsFor:'meyer'      #('89000' '89000000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2754
     self basicNew phoneticStringsFor:'wagner'     #('48900' '48900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2755
     self basicNew phoneticStringsFor:'schulz'     #('37500' '37500000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2756
     self basicNew phoneticStringsFor:'becker'     #('13900' '13900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2757
     self basicNew phoneticStringsFor:'hoffmann'   #('28800' '28800000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2758
     self basicNew phoneticStringsFor:'schäfer'    #('32900' '32900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2759
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2760
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2761
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2762
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2763
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2764
translate:aCharacter
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2765
    "use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2766
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2767
    "vowels serve as separators"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2768
    aCharacter == $A ifTrue:[^ '0' ].         
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2769
    aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2770
    aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2771
    aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2772
    aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2773
    aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2774
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2775
    aCharacter == $B ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2776
    aCharacter == $P ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2777
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2778
    aCharacter == $F ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2779
    aCharacter == $V ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2780
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2781
    aCharacter == $C ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2782
    aCharacter == $S ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2783
    aCharacter == $K ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2784
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2785
    aCharacter == $G ifTrue:[^ '4' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2786
    aCharacter == $J ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2787
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2788
    aCharacter == $Q ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2789
    aCharacter == $X ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2790
    aCharacter == $Z ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2791
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2792
    aCharacter == $D ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2793
    aCharacter == $G ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2794
    aCharacter == $T ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2795
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2796
    aCharacter == $L ifTrue:[^ '7' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2797
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2798
    aCharacter == $M ifTrue:[^ '8' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2799
    aCharacter == $N ifTrue:[^ '8' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2800
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2801
    aCharacter == $R ifTrue:[^ '9' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2802
    ^ nil
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2803
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2804
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2805
!PhoneticStringUtilities::SingleResultPhoneticStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2806
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2807
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2808
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2809
    documentation to be added.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2810
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2811
    [author:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2812
        cg
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2813
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2814
    [instance variables:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2815
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2816
    [class variables:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2817
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2818
    [see also:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2819
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2820
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2821
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2822
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2823
!PhoneticStringUtilities::SingleResultPhoneticStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2824
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2825
encode:word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2826
    ^ self subclassResponsibility
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2827
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2828
    "Created: / 28-07-2017 / 15:20:49 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2829
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2830
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2831
phoneticStringsFor:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2832
    ^ Array with:(self encode:word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2833
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2834
    "Created: / 28-07-2017 / 15:20:38 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2835
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2836
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2837
!PhoneticStringUtilities::MRAStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2838
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2839
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2840
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2841
    Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2842
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2843
    The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2844
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2845
    [see also:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2846
        https://en.wikipedia.org/wiki/Match_Rating_Approach
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2847
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2848
        G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2849
            ''Accessing Individual Records from Personal Data Files Using Nonunique Identifiers'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2850
            US National Institute of Standards and Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2851
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2852
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2853
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2854
rCode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2855
"<<END
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2856
## Copyright (c) 2015, James P. Howard, II <jh@jameshoward.us>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2857
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2858
## Redistribution and use in source and binary forms, with or without
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2859
## modification, are permitted provided that the following conditions are
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2860
## met:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2861
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2862
##     Redistributions of source code must retain the above copyright
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2863
##     notice, this list of conditions and the following disclaimer.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2864
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2865
##     Redistributions in binary form must reproduce the above copyright
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2866
##     notice, this list of conditions and the following disclaimer in
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2867
##     the documentation and/or other materials provided with the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2868
##     distribution.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2869
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2870
## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2871
## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2872
## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2873
## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2874
## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2875
## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2876
## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2877
## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2878
## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2879
## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2880
## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2881
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2882
#' @rdname mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2883
#' @title Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2884
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2885
#' @description
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2886
#' The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2887
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2888
#' @param word string or vector of strings to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2889
#' @param x MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2890
#' @param y MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2891
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2892
#' @details
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2893
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2894
#' The variable \code{word} is the name to be encoded.  The variable
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2895
#' \code{maxCodeLen} is \emph{not} supported in this algorithm encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2896
#' because the algorithm itself is dependent upon its six-character
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2897
#' length.  The variables \code{x} and \code{y} are MRA-encoded and are
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2898
#' compared to each other using the MRA comparison specification.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2899
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2900
#' @return The \code{mra_encode} function returns match rating approach
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2901
#' encoded character vector.  The \code{mra_compare} returns a boolean
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2902
#' vector which is \code{TRUE} if \code{x} and \code{y} pass the MRA
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2903
#' comparison test.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2904
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2905
#' @references
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2906
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2907
#' G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2908
#' \emph{Accessing Individual Records from Personal Data Files Using
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2909
#' Nonunique Identifiers,} US National Institute of Standards and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2910
#' Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2911
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2912
#' @family phonics
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2913
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2914
#' @examples
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2915
#' mra_encode("William")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2916
#' mra_encode(c("Peter", "Peady"))
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2917
#' mra_encode("Stevenson")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2918
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2919
#' @rdname mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2920
#' @name mra_encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2921
#' @export
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2922
mra_encode <- function(word) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2923
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2924
    ## First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2925
    word <- gsub("[^[:alpha:]]*", "", word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2926
    word <- toupper(word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2927
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2928
    ## First character of key = first character of name
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2929
    first <- substr(word, 1, 1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2930
    word <- substr(word, 2, nchar(word))
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2931
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2932
    ## Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2933
    word <- gsub("[AEIOU]", "", word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2934
    word <- paste(first, word, sep = "")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2935
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2936
    ## Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2937
    word <- gsub("([A-Z])\\1+", "\\1", word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2938
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2939
    ## If longer than 6 characters, take first and last 3...and we have
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2940
    ## to vectorize it
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2941
    for(i in 1:length(word)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2942
        if((l = nchar(word[i])) > 6) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2943
            first <- substr(word[i], 1, 3)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2944
            last <- substr(word[i], l - 2, l)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2945
            word[i] <- paste(first, last, sep = "");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2946
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2947
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2948
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2949
    return(word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2950
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2951
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2952
#' @rdname mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2953
#' @name mra_compare
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2954
#' @export
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2955
mra_compare <- function(x, y) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2956
    mra <- data.frame(x = x, y = y, sim = 0, min = 100, stringsAsFactors = FALSE)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2957
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2958
    ## Obtain the minimum rating value by calculating the length sum of
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2959
    ## the encoded strings and using table A (from Wikipedia).  We start
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2960
    ## by setting the minimum to be the sum and move from there.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2961
    mra$lensum <- nchar(mra$x) + nchar(mra$y)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2962
    mra$min[mra$lensum == 12] <- 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2963
    mra$min[mra$lensum > 7 && mra$lensum <= 11] <- 3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2964
    mra$min[mra$lensum > 4 && mra$lensum <= 7] <- 4
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2965
    mra$min[mra$lensum <= 4] <- 5
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2966
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2967
    ## If the length difference between the encoded strings is 3 or
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2968
    ## greater, then no similarity comparison is done.  For us, we
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2969
    ## continue the similarity comparison out of laziness and ensure the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2970
    ## minimum is impossibly high to meet.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2971
    mra$min[abs(nchar(mra$x) - nchar(mra$y)) >= 3] <- 100
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2972
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2973
    ## Start the comparison.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2974
    x <- strsplit(mra$x, split = "")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2975
    y <- strsplit(mra$y, split = "")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2976
    rows <- nrow(mra)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2977
    for(i in 1:rows) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2978
        ## Process the encoded strings from left to right and remove any
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2979
        ## identical characters found from both strings respectively.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2980
        j <- 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2981
        while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2982
            if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2983
                x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2984
                y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2985
            } else
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2986
                j <- j + 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2987
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2988
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2989
        ## Process the unmatched characters from right to left and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2990
        ## remove any identical characters found from both names
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2991
        ## respectively.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2992
        x[[i]] <- rev(x[[i]])
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2993
        y[[i]] <- rev(y[[i]])
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2994
        j <- 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2995
        while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2996
            if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2997
                x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2998
                y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2999
            } else
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3000
                j <- j + 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3001
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3002
        ## Subtract the number of unmatched characters from 6 in the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3003
        ## longer string. This is the similarity rating.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3004
        len <- min(length(x[[i]]), length(y[[i]]))
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3005
        mra$sim[i] <- 6 - len
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3006
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3007
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3008
    ## If the similarity is greater than or equal to the minimum
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3009
    ## required, it is a successful match.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3010
    mra$match <- (mra$sim >= mra$min)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3011
    return(mra$match)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3012
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3013
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3014
END>>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3015
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3016
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3017
!PhoneticStringUtilities::MRAStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3018
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3019
encode:wordIn 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3020
    "see https://en.wikipedia.org/wiki/Match_Rating_Approach"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3021
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3022
    |word prev|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3023
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3024
    word := wordIn.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3025
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3026
    "/ First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3027
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3028
    word := word select:#isLetter thenCollect:#asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3029
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3030
    "/ Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3031
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3032
    word := word first asString , ((word from:2) reject:#isVowel).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3033
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3034
    "/ Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3035
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3036
    prev := nil.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3037
    word := word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3038
                collect:[:char |
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3039
                    char == prev ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3040
                        $*
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3041
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3042
                        prev := char.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3043
                        char.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3044
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3045
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3046
                thenSelect:[:char | char ~~ $*].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3047
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3048
    "/ If longer than 6 characters, take first and last 3            
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3049
    word size > 6 ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3050
        word := (word copyFirst:3),(word copyLast:3)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3051
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3052
    ^ word.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3053
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3054
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3055
     self new encode:'Catherine'            -> 'CTHRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3056
     self new encode:'CatherineCatherine'   -> 'CTHHRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3057
     self new encode:'Butter'               -> 'BTR'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3058
     self new encode:'Byrne'                -> 'BYRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3059
     self new encode:'Boern'                -> 'BRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3060
     self new encode:'Smith'                -> 'SMTH'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3061
     self new encode:'Smyth'                -> 'SMYTH'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3062
     self new encode:'Kathryn'              -> 'KTHRYN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3063
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3064
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3065
    "Created: / 28-07-2017 / 15:19:22 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3066
    "Modified (comment): / 31-07-2017 / 15:14:31 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3067
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3068
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3069
!PhoneticStringUtilities::MetaphoneStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3070
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3071
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3072
"
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3073
   Ongoing work - do not use at the moment
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3074
   
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3075
   Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3076
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3077
   Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3078
   Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3079
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3080
    Hanging on the Metaphone by Lawrence Philips in Computer Language of Dec. 1990, p 39.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3081
    Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3082
    https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm6
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3083
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3084
  They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3085
  For more information, see https://issues.apache.org/jira/browse/CODEC-57
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3086
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3087
  Metaphone uses the following rules:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3088
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3089
    Doubled letters except 'c' -> drop 2nd letter.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3090
    Vowels are only kept when they are the first letter.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3091
    B -> B unless at the end of a word after 'm' as in 'dumb'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3092
    C -> X (sh) if -cia- or -ch-
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3093
    S if -ci-, -ce- or -cy-
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3094
    K otherwise, including -sch-
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3095
    D -> J if in -dge-, -dgy- or -dgi-; T otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3096
    F -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3097
    G -> silent if in -gh- and not at end or before a vowel in -gn- or -gned- (also see dge etc. above)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3098
    J if before i or e or y if not double gg; K otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3099
    H -> silent if after vowel and no vowel follows; H otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3100
    J -> J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3101
    K -> silent if after 'c'; K otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3102
    L -> L
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3103
    M -> M
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3104
    N -> N
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3105
    P -> F if before 'h'; P otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3106
    Q -> K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3107
    R -> R
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3108
    S -> X (sh) if before 'h' or in -sio- or -sia-; S otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3109
    T -> X (sh) if -tia- or -tio- 0 (th) if before 'h' silent if in -tch-; T otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3110
    V -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3111
    W -> silent if not followed by a vowel W if followed by a vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3112
    X -> KS
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3113
    Y -> silent if not followed by a vowel Y if followed by a vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3114
    Z -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3115
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3116
    Initial Letter Exceptions
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3117
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3118
    Initial kn-, gn- pn, ae- or wr- -> drop first letter
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3119
    Initial x- -> change to 's'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3120
    Initial wh- -> change to 'w'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3121
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3122
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3123
     self new encode:'a'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3124
     self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3125
     self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3126
     self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3127
     self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3128
     self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3129
     self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3130
     self new encode:'weber'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3131
     self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3132
     self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3133
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3134
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3135
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3136
javaCode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3137
"<<END
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3138
/*
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3139
 * Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3140
 * contributor license agreements.  See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3141
 * this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3142
 * The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3143
 * (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3144
 * the License.  You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3145
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3146
 *      http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3147
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3148
 * Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3149
 * distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3150
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3151
 * See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3152
 * limitations under the License.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3153
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3154
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3155
package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3156
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3157
import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3158
import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3159
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3160
/**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3161
 * Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3162
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3163
 * Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3164
 * Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3165
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3166
 * <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3167
 * p 39.</CITE>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3168
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3169
 * Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3170
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3171
 * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3172
 * <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3173
 *  (broken link 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3174
 * <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3175
 *  (link checked 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3176
 * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3177
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3178
 * They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3179
 * For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3180
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3181
 * This class is conditionally thread-safe.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3182
 * The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3183
 * but is not volatile, and accesses are not synchronized.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3184
 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3185
 * is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3186
 * after initial setup.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3187
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3188
 * @version $Id$
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3189
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3190
public class Metaphone implements StringEncoder {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3191
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3192
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3193
     * Five values in the English language
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3194
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3195
    private static final String VOWELS = "AEIOU";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3196
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3197
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3198
     * Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3199
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3200
    private static final String FRONTV = "EIY";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3201
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3202
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3203
     * Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3204
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3205
    private static final String VARSON = "CSPTG";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3206
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3207
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3208
     * The max code length for metaphone is 4
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3209
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3210
    private int maxCodeLen = 4;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3211
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3212
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3213
     * Creates an instance of the Metaphone encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3214
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3215
    public Metaphone() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3216
        super();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3217
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3218
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3219
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3220
     * Find the metaphone value of a String. This is similar to the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3221
     * soundex algorithm, but better at finding similar sounding words.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3222
     * All input is converted to upper case.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3223
     * Limitations: Input format is expected to be a single ASCII word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3224
     * with only characters in the A - Z range, no punctuation or numbers.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3225
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3226
     * @param txt String to find the metaphone code for
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3227
     * @return A metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3228
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3229
    public String metaphone(final String txt) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3230
        boolean hard = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3231
        int txtLength;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3232
        if (txt == null || (txtLength = txt.length()) == 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3233
            return "";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3234
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3235
        // single character is itself
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3236
        if (txtLength == 1) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3237
            return txt.toUpperCase(java.util.Locale.ENGLISH);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3238
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3239
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3240
        final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3241
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3242
        final StringBuilder local = new StringBuilder(40); // manipulate
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3243
        final StringBuilder code = new StringBuilder(10); //   output
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3244
        // handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3245
        switch(inwd[0]) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3246
        case 'K':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3247
        case 'G':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3248
        case 'P': /* looking for KN, etc*/
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3249
            if (inwd[1] == 'N') {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3250
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3251
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3252
                local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3253
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3254
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3255
        case 'A': /* looking for AE */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3256
            if (inwd[1] == 'E') {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3257
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3258
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3259
                local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3260
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3261
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3262
        case 'W': /* looking for WR or WH */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3263
            if (inwd[1] == 'R') {   // WR -> R
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3264
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3265
                break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3266
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3267
            if (inwd[1] == 'H') {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3268
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3269
                local.setCharAt(0, 'W'); // WH -> W
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3270
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3271
                local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3272
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3273
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3274
        case 'X': /* initial X becomes S */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3275
            inwd[0] = 'S';
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3276
            local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3277
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3278
        default:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3279
            local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3280
        } // now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3281
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3282
        final int wdsz = local.length();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3283
        int n = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3284
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3285
        while (code.length() < this.getMaxCodeLen() &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3286
               n < wdsz ) { // max code size of 4 works well
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3287
            final char symb = local.charAt(n);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3288
            // remove duplicate letters except C
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3289
            if (symb !!= 'C' && isPreviousChar( local, n, symb ) ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3290
                n++;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3291
            } else { // not dup
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3292
                switch(symb) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3293
                case 'A':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3294
                case 'E':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3295
                case 'I':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3296
                case 'O':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3297
                case 'U':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3298
                    if (n == 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3299
                        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3300
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3301
                    break; // only use vowel if leading char
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3302
                case 'B':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3303
                    if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3304
                         isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3305
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3306
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3307
                    code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3308
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3309
                case 'C': // lots of C special cases
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3310
                    /* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3311
                    if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3312
                         !!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3313
                         FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3314
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3315
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3316
                    if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3317
                        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3318
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3319
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3320
                    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3321
                        FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3322
                        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3323
                        break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3324
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3325
                    if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3326
                        isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3327
                        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3328
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3329
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3330
                    if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3331
                        if (n == 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3332
                            wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3333
                            isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3334
                            code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3335
                        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3336
                            code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3337
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3338
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3339
                        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3340
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3341
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3342
                case 'D':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3343
                    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3344
                        isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3345
                        FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3346
                        code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3347
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3348
                        code.append('T');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3349
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3350
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3351
                case 'G': // GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3352
                    if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3353
                        isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3354
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3355
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3356
                    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3357
                        isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3358
                        !!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3359
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3360
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3361
                    if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3362
                        ( regionMatch(local, n, "GN") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3363
                          regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3364
                        break; // silent G
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3365
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3366
                    if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3367
                        // NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3368
                        hard = true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3369
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3370
                        hard = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3371
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3372
                    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3373
                        FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3374
                        !!hard) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3375
                        code.append('J');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3376
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3377
                        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3378
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3379
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3380
                case 'H':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3381
                    if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3382
                        break; // terminal H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3383
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3384
                    if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3385
                        VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3386
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3387
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3388
                    if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3389
                        code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3390
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3391
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3392
                case 'F':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3393
                case 'J':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3394
                case 'L':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3395
                case 'M':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3396
                case 'N':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3397
                case 'R':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3398
                    code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3399
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3400
                case 'K':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3401
                    if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3402
                        if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3403
                            code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3404
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3405
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3406
                        code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3407
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3408
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3409
                case 'P':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3410
                    if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3411
                        // PH -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3412
                        code.append('F');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3413
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3414
                        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3415
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3416
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3417
                case 'Q':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3418
                    code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3419
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3420
                case 'S':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3421
                    if (regionMatch(local,n,"SH") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3422
                        regionMatch(local,n,"SIO") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3423
                        regionMatch(local,n,"SIA")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3424
                        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3425
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3426
                        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3427
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3428
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3429
                case 'T':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3430
                    if (regionMatch(local,n,"TIA") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3431
                        regionMatch(local,n,"TIO")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3432
                        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3433
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3434
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3435
                    if (regionMatch(local,n,"TCH")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3436
                        // Silent if in "TCH"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3437
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3438
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3439
                    // substitute numeral 0 for TH (resembles theta after all)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3440
                    if (regionMatch(local,n,"TH")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3441
                        code.append('0');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3442
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3443
                        code.append('T');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3444
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3445
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3446
                case 'V':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3447
                    code.append('F'); break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3448
                case 'W':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3449
                case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3450
                    if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3451
                        isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3452
                        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3453
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3454
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3455
                case 'X':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3456
                    code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3457
                    code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3458
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3459
                case 'Z':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3460
                    code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3461
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3462
                default:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3463
                    // do nothing
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3464
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3465
                } // end switch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3466
                n++;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3467
            } // end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3468
            if (code.length() > this.getMaxCodeLen()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3469
                code.setLength(this.getMaxCodeLen());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3470
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3471
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3472
        return code.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3473
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3474
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3475
    private boolean isVowel(final StringBuilder string, final int index) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3476
        return VOWELS.indexOf(string.charAt(index)) >= 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3477
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3478
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3479
    private boolean isPreviousChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3480
        boolean matches = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3481
        if( index > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3482
            index < string.length() ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3483
            matches = string.charAt(index - 1) == c;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3484
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3485
        return matches;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3486
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3487
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3488
    private boolean isNextChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3489
        boolean matches = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3490
        if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3491
            index < string.length() - 1 ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3492
            matches = string.charAt(index + 1) == c;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3493
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3494
        return matches;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3495
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3496
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3497
    private boolean regionMatch(final StringBuilder string, final int index, final String test) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3498
        boolean matches = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3499
        if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3500
            index + test.length() - 1 < string.length() ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3501
            final String substring = string.substring( index, index + test.length());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3502
            matches = substring.equals( test );
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3503
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3504
        return matches;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3505
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3506
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3507
    private boolean isLastChar(final int wdsz, final int n) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3508
        return n + 1 == wdsz;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3509
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3510
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3511
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3512
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3513
     * Encodes an Object using the metaphone algorithm.  This method
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3514
     * is provided in order to satisfy the requirements of the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3515
     * Encoder interface, and will throw an EncoderException if the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3516
     * supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3517
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3518
     * @param obj Object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3519
     * @return An object (or type java.lang.String) containing the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3520
     *         metaphone code which corresponds to the String supplied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3521
     * @throws EncoderException if the parameter supplied is not
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3522
     *                          of type java.lang.String
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3523
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3524
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3525
    public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3526
        if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3527
            throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3528
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3529
        return metaphone((String) obj);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3530
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3531
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3532
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3533
     * Encodes a String using the Metaphone algorithm.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3534
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3535
     * @param str String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3536
     * @return The metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3537
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3538
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3539
    public String encode(final String str) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3540
        return metaphone(str);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3541
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3542
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3543
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3544
     * Tests is the metaphones of two strings are identical.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3545
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3546
     * @param str1 First of two strings to compare
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3547
     * @param str2 Second of two strings to compare
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3548
     * @return <code>true</code> if the metaphones of these strings are identical,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3549
     *        <code>false</code> otherwise.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3550
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3551
    public boolean isMetaphoneEqual(final String str1, final String str2) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3552
        return metaphone(str1).equals(metaphone(str2));
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3553
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3554
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3555
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3556
     * Returns the maxCodeLen.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3557
     * @return int
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3558
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3559
    public int getMaxCodeLen() { return this.maxCodeLen; }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3560
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3561
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3562
     * Sets the maxCodeLen.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3563
     * @param maxCodeLen The maxCodeLen to set
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3564
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3565
    public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3566
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3567
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3568
END>>"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3569
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3570
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3571
!PhoneticStringUtilities::MetaphoneStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3572
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3573
encode:txt
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3574
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3575
     self new encode:'a'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3576
     self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3577
     self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3578
     self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3579
     self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3580
     self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3581
     self new encode:'weber'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3582
     self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3583
     self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3584
     self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3585
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3586
    
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3587
    |hard txtLength local code inwd ch ch2 wdsz n|
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3588
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3589
    inwd := txt.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3590
    hard := false.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3591
    txtLength := 0.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3592
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3593
    (txtLength := txt size) == 0 ifTrue:[^ ''].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3594
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3595
    inwd := txt asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3596
    "/ single character is itself
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3597
    (txtLength == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3598
        ^ inwd        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3599
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3600
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3601
    code := '' writeStream.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3602
    local := inwd.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3603
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3604
    "/ handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3605
    ch := inwd at:(0+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3606
    ch2 := inwd at:(1+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3607
    ('KGP' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3608
        "/ looking for KN, etc
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3609
        "/ KNx -> Nx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3610
        "/ GNx -> Nx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3611
        "/ PNx -> Nx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3612
        (ch2 == $N) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3613
            local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3614
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3615
    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3616
    ('A' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3617
        "/ looking for AE
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3618
        "/ AEx -> Ex 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3619
        (ch2 == $E) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3620
            local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3621
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3622
    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3623
    ('W' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3624
        "/ looking for WR or WH 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3625
        (ch2 == $R) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3626
            "/ WRx -> Wx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3627
            local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3628
        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3629
            (ch2 == $H) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3630
                "/ // WH -> W 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3631
                local := 'W',(inwd from:2+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3632
            ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3633
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3634
    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3635
    ('X' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3636
        "/ initial X becomes S */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3637
        "/ Xx -> Sx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3638
        local := 'S',(inwd from:1+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3639
    ]]]].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3640
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3641
    "/ now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3642
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3643
    wdsz := local size.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3644
    n := 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3645
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3646
    [ n <= wdsz ] whileTrue:[
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3647
        "/ max code size of 4 works well
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3648
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3649
        |symb prevChar nextChar nextNextChar isLastChar isPrevToLastChar|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3650
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3651
        symb := local at:n.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3652
        (n > 1) ifTrue:[ prevChar := local at:(n-1) ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3653
        (isLastChar := (n == wdsz)) ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3654
            nextChar := local at:(n+1) 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3655
        ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3656
        isPrevToLastChar := (n == (wdsz-1)).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3657
        (n+2) <= wdsz ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3658
            nextNextChar := local at:(n+2)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3659
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3660
        
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3661
        "/ remove duplicate letters except C and except first
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3662
        (symb == $C or:[ nextChar ~~ symb or:[ n == 1] ]) ifTrue:[
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3663
            "/ not dup
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3664
            ('AEIOU' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3665
                "/ only use vowel if leading char
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3666
                (n == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3667
                    code nextPut:symb
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3668
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3669
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3670
            ('B' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3671
                "/    if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3672
                "/         isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3673
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3674
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3675
                "/    code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3676
                "/    break;
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3677
                (isLastChar and:[ prevChar == $M]) ifTrue:[
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3678
                    "/ B is silent if word ends in MB 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3679
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3680
                    code nextPut:symb.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3681
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3682
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3683
            ('C' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3684
                "/ lots of C special cases    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3685
                "/    /* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3686
                "/    if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3687
                "/         !!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3688
                "/         FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3689
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3690
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3691
                "/    if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3692
                "/        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3693
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3694
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3695
                "/    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3696
                "/        FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3697
                "/        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3698
                "/        break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3699
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3700
                "/    if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3701
                "/        isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3702
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3703
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3704
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3705
                "/    if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3706
                "/        if (n == 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3707
                "/            wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3708
                "/            isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3709
                "/            code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3710
                "/        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3711
                "/            code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3712
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3713
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3714
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3715
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3716
                "/    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3717
                (prevChar == $S and:[ 'EIY' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3718
                    "/ discard if SCI, SCE or SCY
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3719
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3720
                    ((nextChar == $I) and:[ nextNextChar == $A ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3721
                        "/  "CIA" -> X 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3722
                        code nextPut:$X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3723
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3724
                        ('IEY' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3725
                            "/ CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3726
                            code nextPut:$S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3727
                        ] ifFalse:[ 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3728
                           ((prevChar == $S) and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3729
                               "/ SCH->sk
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3730
                                code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3731
                            ] ifFalse:[ 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3732
                                nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3733
                                    "/ CH
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3734
                                    ('AEIOU' includes:nextNextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3735
                                        code nextPut:$K "/ CH consonant -> K consonant 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3736
                                    ] ifFalse:[    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3737
                                        code nextPut:$X "/ CHvowel -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3738
                                    ]    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3739
                                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3740
                                    code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3741
                                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3742
                            ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3743
                        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3744
                    ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3745
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3746
                
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3747
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3748
            ('D' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3749
                "/    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3750
                "/        isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3751
                "/        FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3752
                "/        code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3753
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3754
                "/        code.append('T');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3755
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3756
                "/    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3757
                ((nextChar == $G)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3758
                and:[ (local from:n) startsWithAnyOf:#('DGE' 'DGI' 'DGY') ])
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3759
                ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3760
                    code nextPut:$J.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3761
                    n := n + 2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3762
                ] ifFalse:[    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3763
                    code nextPut:$T.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3764
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3765
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3766
            ('G' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3767
                "/    GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3768
                "/    if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3769
                "/        isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3770
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3771
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3772
                "/    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3773
                "/        isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3774
                "/        !!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3775
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3776
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3777
                "/    if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3778
                "/        ( regionMatch(local, n, "GN") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3779
                "/          regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3780
                "/        break; // silent G
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3781
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3782
                "/    if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3783
                "/        // NOTE: Given that duplicated chars are removed, I dont see how this can ever be true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3784
                "/        hard = true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3785
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3786
                "/        hard = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3787
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3788
                "/    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3789
                "/        FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3790
                "/        !!hard) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3791
                "/        code.append('J');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3792
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3793
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3794
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3795
                "/    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3796
                (isPrevToLastChar and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3797
                    "/ GH silent at end
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3798
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3799
                    (isPrevToLastChar not and:[ nextChar == $H 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3800
                      and:[ ('AEIOU' includes:nextNextChar) not ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3801
                        "/ GH silent before consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3802
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3803
                        (n > 1 and:[ nextChar == $N ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3804
                            "/ GN -> silent G
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3805
                        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3806
                            hard := (prevChar == $G).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3807
                            (isLastChar not and:[ hard not and:[ ('EIY' includes:nextChar) ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3808
                                code nextPut:$J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3809
                            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3810
                                code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3811
                            ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3812
                        ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3813
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3814
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3815
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3816
            ('H' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3817
                "/    case 'H':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3818
                "/        if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3819
                "/            break; // terminal H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3820
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3821
                "/        if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3822
                "/            VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3823
                "/            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3824
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3825
                "/        if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3826
                "/            code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3827
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3828
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3829
                isLastChar ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3830
                    "/ ignore terminal H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3831
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3832
                    ('CSPTG' includes:prevChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3833
                        "/ ignore CH, SH, PH, TH, GH (H treated there)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3834
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3835
                        ('AEIOU' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3836
                            "/ Hvowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3837
                            code nextPut:$H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3838
                        ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3839
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3840
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3841
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3842
            ('FJLMNR' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3843
                "/    case 'F':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3844
                "/    case 'J':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3845
                "/    case 'L':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3846
                "/    case 'M':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3847
                "/    case 'N':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3848
                "/    case 'R':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3849
                "/        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3850
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3851
                code nextPut:symb.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3852
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3853
            ('K' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3854
                "/    case 'K':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3855
                "/        if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3856
                "/            if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3857
                "/                code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3858
                "/            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3859
                "/        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3860
                "/            code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3861
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3862
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3863
                n > 1 ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3864
                    "/ not initial
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3865
                    prevChar ~~ $C ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3866
                        code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3867
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3868
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3869
                    code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3870
                ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3871
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3872
            ('P' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3873
                "/    case 'P':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3874
                "/        if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3875
                "/            // PH -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3876
                "/            code.append('F');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3877
                "/        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3878
                "/            code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3879
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3880
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3881
                nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3882
                    "/ PH -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3883
                    code nextPut:$F.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3884
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3885
                    code nextPut:symb.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3886
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3887
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3888
            ('Q' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3889
                "/    case 'Q':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3890
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3891
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3892
                code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3893
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3894
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3895
            ('S' includes:symb) ifTrue:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3896
                "/    case 'S':
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3897
                "/        if (regionMatch(local,n,"SH") ||
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3898
                "/            regionMatch(local,n,"SIO") ||
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3899
                "/            regionMatch(local,n,"SIA")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3900
                "/            code.append('X');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3901
                "/        } else {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3902
                "/            code.append('S');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3903
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3904
                "/        break;
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3905
                "/ SH -> X  (as in shave or ashton)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3906
                "/ SIO -> X 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3907
                "/ SIA -> X (as in ASIA)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3908
                ((nextChar == $H) 
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3909
                  or:[ ((nextChar == $I) and:[ 'OA' includes:nextNextChar])]
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3910
                ) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3911
                    code nextPut:$X
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3912
                ] ifFalse:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3913
                    code nextPut:$S
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3914
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3915
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3916
            ('T' includes:symb) ifTrue:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3917
                "/    case 'T':
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3918
                "/        if (regionMatch(local,n,"TIA") ||
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3919
                "/            regionMatch(local,n,"TIO")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3920
                "/            code.append('X');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3921
                "/            break;
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3922
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3923
                "/        if (regionMatch(local,n,"TCH")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3924
                "/            // Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3925
                "/            break;
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3926
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3927
                "/        // substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3928
                "/        if (regionMatch(local,n,"TH")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3929
                "/            code.append('0');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3930
                "/        } else {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3931
                "/            code.append('T');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3932
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3933
                "/        break;
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3934
                (nextChar == $I and:[ 'AO' includes:nextNextChar]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3935
                    code nextPut:$X.
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3936
                ] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3937
                    (nextChar == $C and:[ nextNextChar == $H]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3938
                        "/ Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3939
                        "/ cg - huh; hutch - methinksthereisat
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3940
                    ] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3941
                        "/ substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3942
                        nextChar == $H ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3943
                            code nextPut:$0.
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3944
                        ] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3945
                            code nextPut:$T.
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3946
                        ].    
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3947
                    ].    
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3948
                ].    
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3949
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3950
            ('V' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3951
                "/    case 'V':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3952
                "/        code.append('F'); break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3953
                code nextPut:$F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3954
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3955
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3956
            ('WY' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3957
                "/    case 'W':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3958
                "/    case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3959
                "/        if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3960
                "/            isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3961
                "/            code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3962
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3963
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3964
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3965
                "/ silent if not followed by vowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3966
                (isLastChar not and:[ 'AEIOU' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3967
                    code nextPut:symb
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3968
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3969
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3970
            ('X' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3971
                "/    case 'X':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3972
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3973
                "/        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3974
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3975
                code nextPutAll:'KS'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3976
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3977
            ('Z' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3978
                "/    case 'Z':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3979
                "/        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3980
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3981
                code nextPut:$S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3982
            ] ifFalse:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3983
                "/    default:
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3984
                "/        // do nothing
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3985
                "/        break;
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3986
            ]]]]]]]]]]]]]]]]. "/ end switch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3987
        ]. "/ end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3988
        n := n + 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3989
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3990
    ^ code contents
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3991
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3992
    "Created: / 02-08-2017 / 09:51:31 / cg"
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3993
    "Modified: / 03-08-2017 / 14:55:22 / cg"
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3994
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3995
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3996
!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3997
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3998
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3999
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4000
    WARNING: this is the so called 'simplified soundex' algorithm;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4001
      there are more variants like miracode (american soundex) or
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4002
      mysqlSoundex around.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4003
      
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4004
      Be sure to use the correct algorithm, if the generated strings must be compatible
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4005
      (otherwise, the differences are probably too small to be noticed as effect, but
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4006
      your search will be different)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4007
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4008
    The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4009
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4010
    SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4011
    components of names, but by doing so reports more matches. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4012
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4013
    There are some variations around in the literature; 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4014
    the following is called 'simplified soundex', and the rules for coding a name are:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4015
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4016
    1. The first letter of the name is used in its un-coded form to serve as the prefix
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4017
       character of the code. (The rest of the code is numerical).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4018
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4019
    2. Thereafter, W and H are ignored entirely.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4020
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4021
    3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4022
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4023
    4. Other letters of the name are converted to a numerical equivalent:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4024
                 B, P, F, V              1 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4025
                 C, G, J, K, Q, S, X, Z  2 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4026
                 D, T                    3 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4027
                 L                       4 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4028
                 M, N                    5 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4029
                 R                       6 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4030
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4031
    5. There are two exceptions: 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4032
        1. Letters that follow prefix letters which would, if coded, have the same
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4033
           numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4034
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4035
        2. The second letter of any pair of consonants having the same code number is likewise ignored, 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4036
           i.e. unless there is a ''separator'' between them in the name.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4037
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4038
    6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4039
       Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4040
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4041
    Notice, that in another variant, w and h are treated slightly differently.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4042
    This is only of relevance, if you need to reconstruct original soundex codes of other programs
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4043
    or for the original 1880 us census data.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4044
     SoundexStringComparator  new encode:'Ashcraft' -> 'A226'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4045
    vs.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4046
     MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4047
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4048
    Also notice, that soundex deals better with english. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4049
    For german and other languages, other algorithms may provide better results.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4050
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4051
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4052
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4053
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4054
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4055
encode:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4056
    |u p t prevCode|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4057
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4058
    u := word asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4059
    p := u first asString.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4060
    prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4061
    u from:2 to:u size do:[:c | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4062
        t := self translate:c.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4063
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4064
            p := p , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4065
            p size == 4 ifTrue:[^ p ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4066
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4067
        prevCode := t
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4068
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4069
    [ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4070
        p := p , '0'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4071
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4072
    ^ (p copyFrom:1 to:4)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4073
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4074
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4075
     self new encode:'washington' -> 'W252'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4076
     self new encode:'lee'        -> 'L000'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4077
     self new encode:'Gutierrez'  -> 'G362'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4078
     self new encode:'Pfister'    -> 'P236'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4079
     self new encode:'Jackson'    -> 'J250'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4080
     self new encode:'Tymczak'    -> 'T522'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4081
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4082
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4083
    "notice:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4084
     MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4085
     self new encode:'Ashcraft'   -> 'A226'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4086
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4087
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4088
    "Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4089
    "Modified (comment): / 01-08-2017 / 19:01:43 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4090
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4091
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4092
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4093
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4094
translate:aCharacter
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4095
    "use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4096
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4097
    "vowels serve as separators"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4098
    aCharacter == $A ifTrue:[^ '0' ].         
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4099
    aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4100
    aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4101
    aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4102
    aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4103
    aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4104
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4105
    aCharacter == $B ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4106
    aCharacter == $P ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4107
    aCharacter == $F ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4108
    aCharacter == $V ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4109
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4110
    aCharacter == $C ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4111
    aCharacter == $S ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4112
    aCharacter == $K ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4113
    aCharacter == $G ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4114
    aCharacter == $J ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4115
    aCharacter == $Q ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4116
    aCharacter == $X ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4117
    aCharacter == $Z ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4118
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4119
    aCharacter == $D ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4120
    aCharacter == $T ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4121
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4122
    aCharacter == $L ifTrue:[^ '4' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4123
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4124
    aCharacter == $M ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4125
    aCharacter == $N ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4126
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4127
    aCharacter == $R ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4128
    ^ nil
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4129
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4130
    "Modified: / 02-08-2017 / 01:35:40 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4131
    "Modified (comment): / 02-08-2017 / 14:30:11 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4132
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4133
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4134
!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4135
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4136
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4137
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4138
    MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4139
    and also removing vokals first, then removing duplicate codes
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4140
    (whereas the soundex code does this in reverse order).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4141
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4142
    These variations are important, if you need the miracode soundex codes to be generated.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4143
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4144
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4145
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4146
!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4147
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4148
encode:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4149
    "same as inherited, but cares for 0, W and H"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4150
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4151
    |u p t prevCode|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4152
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4153
    u := word asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4154
    p := u first asString.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4155
    prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4156
    u from:2 to:u size do:[:c |
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4157
        t := self translate:c.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4158
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4159
            p := p , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4160
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4161
        (t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4162
            prevCode := t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4163
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4164
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4165
    [ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4166
        p := p , '0'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4167
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4168
    ^ p
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4169
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4170
    "Created: / 28-07-2017 / 15:23:41 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4171
    "Modified: / 31-07-2017 / 17:53:51 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4172
    "Modified (comment): / 02-08-2017 / 14:31:15 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4173
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4174
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4175
!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4176
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4177
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4178
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4179
    NYSIIS Algorithm:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4180
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4181
    1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4182
        remove all ''S'' and ''Z'' chars from the end of the surname 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4183
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4184
    2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4185
        transcode initial strings
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4186
            MAC => MC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4187
            PF => F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4188
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4189
    3.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4190
        Transcode trailing strings as follows,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4191
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4192
            IX => IC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4193
            EX => EC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4194
            YE,EE,IE => Y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4195
            NT,ND => D 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4196
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4197
    4.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4198
        transcode ''EV'' to ''EF'' if not at start of name
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4199
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4200
    5.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4201
        use first character of name as first character of key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4202
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4203
    6.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4204
        remove any ''W'' that follows a vowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4205
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4206
    7.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4207
        replace all vowels with ''A'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4208
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4209
    8.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4210
        transcode ''GHT'' to ''GT'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4211
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4212
    9.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4213
        transcode ''DG'' to ''G'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4214
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4215
    10.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4216
        transcode ''PH'' to ''F'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4217
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4218
    11.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4219
        if not first character, eliminate all ''H'' preceded or followed by a vowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4220
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4221
    12.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4222
        change ''KN'' to ''N'', else ''K'' to ''C'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4223
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4224
    13.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4225
        if not first character, change ''M'' to ''N'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4226
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4227
    14.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4228
        if not first character, change ''Q'' to ''G'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4229
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4230
    15.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4231
        transcode ''SH'' to ''S'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4232
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4233
    16.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4234
        transcode ''SCH'' to ''S'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4235
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4236
    17.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4237
        transcode ''YW'' to ''Y'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4238
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4239
    18.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4240
        if not first or last character, change ''Y'' to ''A'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4241
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4242
    19.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4243
        transcode ''WR'' to ''R'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4244
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4245
    20.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4246
        if not first character, change ''Z'' to ''S'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4247
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4248
    21.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4249
        transcode terminal ''AY'' to ''Y'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4250
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4251
    22.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4252
        remove traling vowels 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4253
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4254
    23.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4255
        collapse all strings of repeated characters 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4256
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4257
    24.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4258
        if first char of original surname was a vowel, append it to the code
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4259
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4260
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4261
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4262
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4263
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4264
encode:aString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4265
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4266
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4267
    k := self rule1:(aString asUppercase).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4268
    "2. Transcode initial strings:  MAC => MC   PF => F"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4269
    k := self rule2:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4270
    k := self rule3:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4271
    k := self rule4:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4272
    k := self rule5:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4273
    k := self rule6:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4274
    k := self rule7:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4275
    k := self rule8:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4276
    k := self rule9:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4277
    k := self rule10:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4278
    k := self rule11:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4279
    k := self rule12:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4280
    k := self rule13:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4281
    k := self rule14:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4282
    k := self rule15:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4283
    k := self rule16:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4284
    k := self rule17:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4285
    k := self rule18:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4286
    k := self rule19:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4287
    k := self rule20:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4288
    k := self rule21:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4289
    k := self rule22:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4290
    k := self rule23:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4291
    k := self rule24:k originalKey:aString.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4292
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4293
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4294
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4295
     self new encode:'hello'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4296
     self new encode:'bliss'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4297
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4298
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4299
     self new phoneticStringsFor:'hello'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4300
     self new phoneticStringsFor:'bliss'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4301
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4302
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4303
    "Created: / 28-07-2017 / 15:34:52 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4304
    "Modified (comment): / 02-08-2017 / 14:31:47 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4305
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4306
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4307
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4308
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4309
rule10:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4310
    "10. transcode 'PH' to 'F' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4311
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4312
    ^ self transcodeAll:'PH' of:key to:'F' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4313
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4314
    "Modified (format): / 02-08-2017 / 14:34:27 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4315
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4316
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4317
rule11:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4318
    |k c|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4319
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4320
    "11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4321
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4322
    c := SortedCollection sortBlock:[:a :b | b < a ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4323
    2 to:key size do:[:i | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4324
        (key at:i) = $H ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4325
            ((key at:i - 1) isVowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4326
                or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4327
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4328
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4329
    c do:[:n | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4330
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4331
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4332
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4333
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4334
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4335
rule12:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4336
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4337
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4338
    "12. change 'KN' to 'N', else 'K' to 'C' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4339
    k := self transcodeAll:'KN' of:key to:'K' startingAt:1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4340
    k := self transcodeAll:'K' of:k to:'C' startingAt:1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4341
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4342
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4343
    "Modified (format): / 02-08-2017 / 14:34:48 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4344
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4345
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4346
rule13:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4347
    "13. if not first character, change 'M' to 'N' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4348
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4349
    ^ self transcodeAll:'M' of:key to:'N' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4350
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4351
    "Modified (format): / 02-08-2017 / 14:35:00 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4352
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4353
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4354
rule14:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4355
    "14. if not first character, change 'Q' to 'G' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4356
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4357
    ^ self transcodeAll:'Q' of:key to:'G' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4358
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4359
    "Modified (format): / 02-08-2017 / 14:35:08 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4360
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4361
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4362
rule15:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4363
    "15. transcode 'SH' to 'S' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4364
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4365
    ^ self transcodeAll:'SH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4366
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4367
    "Modified (format): / 02-08-2017 / 14:35:18 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4368
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4369
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4370
rule16:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4371
    "16. transcode 'SCH' to 'S' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4372
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4373
    ^ self transcodeAll:'SCH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4374
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4375
    "Modified (format): / 02-08-2017 / 14:35:25 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4376
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4377
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4378
rule17:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4379
    "17. transcode 'YW' to 'Y' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4380
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4381
    ^ self transcodeAll:'YW' of:key to:'Y' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4382
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4383
    "Modified (format): / 02-08-2017 / 14:35:33 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4384
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4385
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4386
rule18:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4387
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4388
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4389
    "18. if not first or last character, change 'Y' to 'A' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4390
    k := self transcodeAll:'Y' of:key to:'A' startingAt:2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4391
    key last = $Y ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4392
        k at:k size put:$Y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4393
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4394
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4395
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4396
    "Modified (format): / 02-08-2017 / 14:35:44 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4397
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4398
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4399
rule19:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4400
    "19. transcode 'WR' to 'R' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4401
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4402
    ^ self transcodeAll:'WR' of:key to:'R' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4403
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4404
    "Modified (format): / 02-08-2017 / 14:35:52 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4405
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4406
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4407
rule1:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4408
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4409
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4410
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4411
     "1. Remove all 'S' and 'Z' chars from the end of the name"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4412
    [
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4413
        'SZ' includes:k last
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4414
    ] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4415
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4416
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4417
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4418
rule20:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4419
    "20. if not first character, change 'Z' to 'S' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4420
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4421
    ^ self transcodeAll:'Z' of:key to:'S' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4422
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4423
    "Modified (format): / 02-08-2017 / 14:36:00 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4424
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4425
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4426
rule21:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4427
    "21. transcode terminal 'AY' to 'Y' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4428
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4429
    ^ self transcodeAll:'AY' of:key to:'Y' startingAt:key size - 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4430
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4431
    "Modified (format): / 02-08-2017 / 14:36:08 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4432
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4433
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4434
rule22:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4435
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4436
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4437
    "22. remove trailing vowels "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4438
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4439
    [ k last isVowel ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4440
        k := k copyButLast
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4441
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4442
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4443
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4444
    "Modified: / 02-08-2017 / 14:36:42 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4445
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4446
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4447
rule23:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4448
    |k c|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4449
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4450
    "23. collapse all strings of repeated characters "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4451
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4452
    c := SortedCollection sortBlock:[:a :b | b < a ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4453
    k size to:2 do:[:i | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4454
        (k at:i) = (k at:i - 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4455
            c add:i
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4456
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4457
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4458
    c do:[:n | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4459
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4460
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4461
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4462
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4463
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4464
rule24:key originalKey:originalKey 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4465
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4466
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4467
    "24. if first char of original surname was a vowel, append it to the code"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4468
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4469
    originalKey first isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4470
        k := k , originalKey first asString asUppercase
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4471
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4472
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4473
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4474
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4475
rule2:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4476
     "2. Transcode initial strings:  MAC => MC   PF => F"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4477
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4478
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4479
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4480
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4481
    (k startsWith:'MAC') ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4482
        k := 'MC' , (k copyFrom:4)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4483
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4484
    (k startsWith:'PF') ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4485
        k := 'F' , (k copyFrom:3)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4486
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4487
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4488
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4489
    "Modified (format): / 02-08-2017 / 14:31:40 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4490
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4492
rule3:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4493
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4494
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4495
    "3. Transcode trailing strings as follows:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4496
        IX => IC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4497
          EX => EC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4498
          YE, EE, IE => Y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4499
           NT, ND => D"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4500
           
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4501
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4502
    k := self transcodeTrailing:#( 'IX' ) of:k to:'IC'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4503
    k := self transcodeTrailing:#( 'EX' ) of:k to:'EC'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4504
    k := self transcodeTrailing:#( 'YE' 'EE' 'IE' ) of:k to:'Y'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4505
    k := self transcodeTrailing:#( 'NT' 'ND' ) of:k to:'D'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4506
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4507
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4508
    "Modified (format): / 02-08-2017 / 14:32:24 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4509
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4510
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4511
rule4:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4512
    "4. Transcode 'EV' to 'EF' if not at start of name"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4513
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4514
    ^ self transcodeAll:'EV' of:key to:'EF' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4515
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4516
    "Modified (format): / 02-08-2017 / 14:32:35 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4517
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4518
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4519
rule5:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4520
    "5. Use first character of name as first character of key.  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4521
        Ignored because we're doing an in-place conversion"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4522
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4523
    ^ key
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4524
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4525
    "Modified (comment): / 02-08-2017 / 14:32:45 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4526
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4527
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4528
rule6:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4529
    |k i|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4530
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4531
    "6. Remove any 'W' that follows a vowel"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4532
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4533
    i := 2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4534
    [
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4535
        (i := k indexOf:$W startingAt:i) > 0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4536
    ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4537
        (k at:i - 1) isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4538
            k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4539
            i := i - 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4540
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4541
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4542
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4543
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4544
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4545
rule7:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4546
    "7. replace all vowels with 'A' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4547
    ^ key collect:[:ch | ch isVowel ifTrue:[$A] ifFalse:[ch]].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4548
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4549
    "Modified: / 02-08-2017 / 14:33:56 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4550
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4551
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4552
rule8:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4553
    "8. transcode 'GHT' to 'GT' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4554
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4555
    ^ self transcodeAll:'GHT' of:key to:'GT' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4556
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4557
    "Modified (format): / 02-08-2017 / 14:34:05 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4558
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4559
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4560
rule9:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4561
    "9. transcode 'DG' to 'G' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4562
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4563
    ^ self transcodeAll:'DG' of:key to:'G' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4564
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4565
    "Modified (format): / 02-08-2017 / 14:34:15 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4566
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4567
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4568
transcodeAll:aString of:key to:replacementString startingAt:start 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4569
    |k i|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4570
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4571
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4572
    [
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4573
        (i := k indexOfSubCollection:aString startingAt:start) > 0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4574
    ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4575
        k := (k copyFrom:1 to:i - 1) , replacementString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4576
                    , (k copyFrom:i + aString size to:k size)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4577
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4578
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4579
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4580
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4581
transcodeTrailing:anArrayOfStrings of:key to:replacementString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4582
    |answer|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4583
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4584
    answer := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4585
    anArrayOfStrings do:[:aString | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4586
        answer := self 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4587
                    transcodeAll:aString
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4588
                    of:answer
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4589
                    to:replacementString
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4590
                    startingAt:(answer size - aString size) + 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4591
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4592
    ^ answer
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4593
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4594
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4595
!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4596
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4597
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4598
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4599
    Implementation of the PHONEM algorithm, as described in
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4600
    'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4601
    Ein Programm fuer kontextsensitive phonetische Textumwandlung
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4602
    ct Magazin fuer Computer & Technik 25/1998'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4603
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4604
    This algorithm deals better with the german language (it cares for umlauts)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4605
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4606
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4607
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4608
!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4609
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4610
encode:aString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4611
    |s idx t t2|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4612
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4613
    s := aString asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4614
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4615
    idx := 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4616
    [idx < (s size-1)] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4617
        t2 := nil.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4618
        t := s copyFrom:idx to:idx+1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4619
        t = 'SC' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4620
        ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4621
        ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4622
        ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4623
        ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4624
        ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4625
        ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4626
        ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4627
        ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4628
        ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4629
        ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4630
        ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4631
        ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4632
        ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4633
        ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4634
        ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4635
        ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4636
        t2 notNil ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4637
            s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4638
        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4639
            idx := idx + 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4640
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4641
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4642
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4643
    "/ single character substitutions via tr
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4644
    s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4645
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4646
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4647
    ^ s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4648
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4649
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4650
     self basicNew encode:'müller'  -> 'MYLR'    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4651
     self basicNew encode:'mueller' -> 'MYLR'    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4652
     self basicNew encode:'möller'  -> 'MYLR'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4653
     self basicNew encode:'miller'  -> 'MYLR'     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4654
     self basicNew encode:'muller'  -> 'MULR' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4655
     self basicNew encode:'muler'   -> 'MULR' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4656
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4657
     self basicNew phoneticStringsFor:'müller'  #('MYLR')    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4658
     self basicNew phoneticStringsFor:'mueller' #('MYLR')    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4659
     self basicNew phoneticStringsFor:'möller'  #('MYLR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4660
     self basicNew phoneticStringsFor:'miller'  #('MYLR')     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4661
     self basicNew phoneticStringsFor:'muller'  #('MULR') 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4662
     self basicNew phoneticStringsFor:'muler'   #('MULR') 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4663
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4664
     self basicNew phoneticStringsFor:'schmidt'     #('CMYD')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4665
     self basicNew phoneticStringsFor:'schneider'   #('CNAYDR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4666
     self basicNew phoneticStringsFor:'fischer'     #('VYCR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4667
     self basicNew phoneticStringsFor:'weber'       #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4668
     self basicNew phoneticStringsFor:'weeber'      #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4669
     self basicNew phoneticStringsFor:'webber'      #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4670
     self basicNew phoneticStringsFor:'wepper'      #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4671
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4672
     self basicNew phoneticStringsFor:'meyer'       #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4673
     self basicNew phoneticStringsFor:'maier'       #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4674
     self basicNew phoneticStringsFor:'mayer'       #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4675
     self basicNew phoneticStringsFor:'mayr'        #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4676
     self basicNew phoneticStringsFor:'meir'        #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4677
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4678
     self basicNew phoneticStringsFor:'wagner'      #('VACNR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4679
     self basicNew phoneticStringsFor:'schulz'      #('CULC')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4680
     self basicNew phoneticStringsFor:'becker'      #('BCR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4681
     self basicNew phoneticStringsFor:'hoffmann'    #('OVMAN')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4682
     self basicNew phoneticStringsFor:'haus'        #('AUS')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4683
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4684
     self basicNew phoneticStringsFor:'schäfer'     #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4685
     self basicNew phoneticStringsFor:'scheffer'    #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4686
     self basicNew phoneticStringsFor:'schaeffer'   #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4687
     self basicNew phoneticStringsFor:'schaefer'    #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4688
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4689
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4690
    "Created: / 28-07-2017 / 15:38:08 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4691
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4692
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4693
!PhoneticStringUtilities::Caverphone2StringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4694
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4695
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4696
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4697
    Caverphone (2) Algorithm:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4698
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4699
    see http://caversham.otago.ac.nz/files/working/ctp150804.pdf
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4700
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4701
    Caverphone 2.0 is being made available for free use for the benefit of anyone who has a use for it,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4702
    with the proviso that the Caversham Project at the University of Otago should be acknowledged as the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4703
    original source (which is hereby done ;-).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4704
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4705
    •  Start with a Surname or Firstname
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4706
    •  Convert to lowercase
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4707
        This coding system is case sensitive, implementations should acknowledge that a is not the same as A
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4708
    •  Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4709
        The main intention of this is to remove spaces, hyphens, and apostrophes.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4710
        example:  o'brian becomes obrian
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4711
    •  If the name starts with cough make it cou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4712
        2 is being used as a temporary placeholder to indicate a consonant which we are no longer interested in.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4713
    •  If the name starts with rough make it rou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4714
    •  If the name starts with tough make it tou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4715
    •  If the name starts with enough make it enou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4716
    •  If the name starts with gn make it 2n
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4717
    •  If the name ends with mb make it m2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4718
    •  replace cq with 2q
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4719
    •  replace ci with si
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4720
    •  replace ce with se
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4721
    •  replace cy with sy
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4722
    •  replace tch with 2ch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4723
    •  replace c with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4724
    •  replace q with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4725
    •  replace x with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4726
    •  replace v with f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4727
    •  replace dg with 2g
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4728
    •  replace tio with sio
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4729
    •  replace tia with sia
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4730
    •  replace d with t
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4731
    •  replace ph with fh
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4732
    •  replace b with p
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4733
    •  replace sh with s2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4734
    •  replace z with s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4735
    •  replace and initial vowel with an A
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4736
    •  replace all other vowels with a 3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4737
        3 is a temporary placeholder marking a vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4738
    •  replace 3gh3 with 3kh3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4739
        Exceptions are dealt with before the general case. gh between vowels is an except of the more general gh rule.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4740
    •  replace gh with 22
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4741
    •  replace g with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4742
    •  replace groups of the letter s with a S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4743
        Continuous strings of s are replace by a single S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4744
    •  replace groups of the letter t with a T
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4745
    •  replace groups of the letter p with a P
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4746
    •  replace groups of the letter k with a K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4747
    •  replace groups of the letter f with a F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4748
    •  replace groups of the letter m with a M
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4749
    •  replace groups of the letter n with a N
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4750
    •  replace w3 with W3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4751
    •  replace wy with Wy
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4752
    •  replace wh3 with Wh3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4753
    •  replace why with Why
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4754
    •  replace w with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4755
    •  replace and initial h with an A
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4756
    •  replace all other occurrences of h with a 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4757
    •  replace r3 with R3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4758
    •  replace ry with Ry
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4759
    •  replace r with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4760
    •  replace l3 with L3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4761
    •  replace ly with Ly
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4762
    •  replace l with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4763
    •  replace j with y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4764
    •  replace y3 with Y3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4765
    •  replace y with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4766
    •  remove all 2s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4767
    •  remove all 3s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4768
    •  put six (v1) / ten (v2) 1s on the end
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4769
    •  take the first six characters as the code (caverphone 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4770
       / take the first ten characters as the code (caverphone 2);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4771
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4772
     self new encode:'david'      -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4773
     self new encode:'whittle'    -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4774
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4775
     self new encode:'Stevenson'  -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4776
     self new encode:'Peter'      -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4777
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4778
     self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4779
     self new encode:'lee'        -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4780
     self new encode:'Gutierrez'  -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4781
     self new encode:'Pfister'    -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4782
     self new encode:'Jackson'    -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4783
     self new encode:'Tymczak'    -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4784
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4785
     self new encode:'add'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4786
     self new encode:'aid'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4787
     self new encode:'at'         -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4788
     self new encode:'art'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4789
     self new encode:'earth'      -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4790
     self new encode:'head'       -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4791
     self new encode:'old'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4792
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4793
     self new encode:'ready'      -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4794
     self new encode:'rather'     -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4795
     self new encode:'able'       -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4796
     self new encode:'appear'     -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4797
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4798
     self new encode:'Deedee'     -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4799
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4800
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4801
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4802
!PhoneticStringUtilities::Caverphone2StringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4803
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4804
encode:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4805
    |txt|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4806
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4807
    word size == 0 ifTrue:[^ '1111111111' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4808
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4809
    "/ 1. Convert to lowercase
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4810
    txt := word asLowercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4811
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4812
    "/ 2. Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4813
    txt := txt select:#isLetter.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4814
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4815
    #(
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4816
    "/  oldSeq newSeq repeat
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4817
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4818
    "/ 2.5. Remove final e
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4819
        'e$' '' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4820
    "/ 3. Handle various start options
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4821
        '^cough' 'cou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4822
        '^rough' 'rou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4823
        '^tough' 'tou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4824
        '^enough' 'enou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4825
        '^trough' 'trou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4826
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4827
        '^gn' '2n' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4828
        'mb$' 'm2' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4829
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4830
    "/ 4. Handle replacements
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4831
        'cq' '2q' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4832
        'ci' 'si' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4833
        'ce' 'se' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4834
        'cy' 'sy' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4835
        'tch' '2ch' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4836
        'c' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4837
        'q' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4838
        'x' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4839
        'v' 'f' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4840
        'dg' '2g' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4841
        'tio' 'sio' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4842
        'tia' 'sia' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4843
        'd' 't' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4844
        'ph' 'fh' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4845
        'b' 'p' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4846
        'sh' 's2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4847
        'z' 's' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4848
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4849
        '^a' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4850
        '^e' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4851
        '^i' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4852
        '^o' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4853
        '^u' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4854
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4855
        'a' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4856
        'e' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4857
        'i' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4858
        'o' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4859
        'u' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4860
        'j' 'y' true 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4861
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4862
        '^y3' 'Y3' false 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4863
        '^y' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4864
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4865
        'y' '3'  true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4866
        '3gh3' '3kh3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4867
        'gh' '22' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4868
        'g' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4869
        's'  'S' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4870
        'SS' 'S' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4871
        't'  'T' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4872
        'TT' 'T' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4873
        'p'  'P' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4874
        'PP' 'P' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4875
        'k'  'K' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4876
        'KK' 'K' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4877
        'f'  'F' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4878
        'FF' 'F' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4879
        'm'  'M' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4880
        'MM' 'M' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4881
        'n'  'N' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4882
        'NN' 'N' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4883
        'w3' 'W3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4884
        'wh3' 'Wh3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4885
        'w$' '3'  false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4886
        'w' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4887
        '^h' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4888
        'h' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4889
        'r3' 'R3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4890
        'r$' '3'  false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4891
        'r' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4892
        'l3' 'L3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4893
        'l$' '3' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4894
        'l' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4895
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4896
    "/ 5. removals
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4897
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4898
        '2' '' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4899
        '3$' 'A' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4900
        '3' '' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4901
    ) inGroupsOf:3 do:[:pat :repl :repeat|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4902
        |s txtBefore|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4903
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4904
        txtBefore := txt.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4905
        (pat startsWith:$^) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4906
            s := pat copyButFirst.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4907
            repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4908
                [txt startsWith:s] whileTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4909
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4910
                (txt startsWith:s) ifTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4911
            ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4912
        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4913
            (pat endsWith:$$) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4914
                s := pat copyButLast.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4915
                repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4916
                    [txt endsWith:s] whileTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4917
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4918
                    (txt endsWith:s) ifTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4919
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4920
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4921
                repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4922
                    txt := txt copyReplaceAllSubcollections:pat with:repl
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4923
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4924
                    txt := txt copyReplaceSubcollection:pat with:repl
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4925
                ]    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4926
            ]    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4927
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4928
        "/ txt ~= txtBefore ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4929
        "/     Transcript showCR:(pat,' | ',repl,' -> ',txt).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4930
        "/ ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4931
    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4932
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4933
    "/ 6. put ten 1s on the end
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4934
    txt := txt,'1111111111'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4935
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4936
    "/ 7. take the first ten characters as the code
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4937
    ^ txt copyTo:10
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4938
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4939
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4940
     self new encode:'david'      -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4941
     self new encode:'whittle'    -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4942
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4943
     self new encode:'Stevenson'  -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4944
     self new encode:'Peter'      -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4945
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4946
     self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4947
     self new encode:'lee'        -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4948
     self new encode:'Gutierrez'  -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4949
     self new encode:'Pfister'    -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4950
     self new encode:'Jackson'    -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4951
     self new encode:'Tymczak'    -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4952
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4953
     self new encode:'add'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4954
     self new encode:'aid'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4955
     self new encode:'at'         -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4956
     self new encode:'art'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4957
     self new encode:'earth'      -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4958
     self new encode:'head'       -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4959
     self new encode:'old'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4960
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4961
     self new encode:'ready'      -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4962
     self new encode:'rather'     -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4963
     self new encode:'able'       -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4964
     self new encode:'appear'     -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4965
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4966
     self new encode:'Deedee'     -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4967
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4968
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4969
    "Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4970
    "Modified: / 02-08-2017 / 01:42:35 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4971
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4972
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4973
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4974
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4975
documentation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4976
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4977
     The 'Kölner Phonetik' (cologne phonetic) code is for the german language 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4978
     what the soundex code is for english:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4979
        it returns similar strings for similar sounding words 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4980
     (but is specifically aware of the pronunciation of German and eastern languages) . 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4981
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4982
     There are some other differences to soundex, though: 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4983
        its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4984
        it does not start with the first character of the input, but returns a pure numeric string.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4985
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4986
     This algorithm was described by Postel 1969,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4987
     See  http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4988
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4989
    self new phoneticStringsFor:'Müller-Lüdenscheidt' -> #('65752682')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4990
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4991
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4992
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4993
examples
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4994
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4995
   words sounding similar (german pronunciation) will deliver a similar code:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4996
   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4997
     #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4998
        'Müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  4999
        'Miller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5000
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5001
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5002
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5003
        'Mülherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5004
        'Myler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5005
        'Millar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5006
        'Myller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5007
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5008
        'Müler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5009
        'Muehler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5010
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5011
        'Müllerr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5012
        'Muehlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5013
        'Muellar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5014
        'Mueler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5015
        'Mülleer'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5016
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5017
        'Nüller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5018
        'Nyller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5019
        'Niler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5020
        'Czerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5021
        'Tscherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5022
        'Czernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5023
        'Tschernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5024
        'Schernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5025
        'Scherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5026
        'Scherno'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5027
        'Czerne'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5028
        'Zerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5029
        'Tzernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5030
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5031
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5032
        'Breschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5033
        'Breschnjeff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5034
        'Braeschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5035
        'Braessneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5036
        'Pressneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5037
        'Presznäph'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5038
        'Präschnäf' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5039
        'Breschnjeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5040
        'Breschnijeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5041
        'Breschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5042
        'Bräschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5043
        'Braschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5044
        'Broschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5045
     ) do:[:w |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5046
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5047
     ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5048
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5049
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5050
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5051
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5052
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5053
encode: aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5054
    "return a koelner phonetic code.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5055
     The koelnerPhonetic code is for the german language what the soundex code is for english;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5056
     it returns simular strings for similar sounding words. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5057
     There are some differences to soundex, though: 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5058
        its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5059
        it does not start with the first character of the input.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5060
     This algorithm is described by Postel 1969"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5061
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5062
    |in ret val rslt|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5063
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5064
    in := aString withoutSeparators asLowercase.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5065
    in := in copyReplaceString:'ph' withString:'f'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5066
    (in includesAny:'öäüß') ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5067
        in := in copyReplaceAll:$ü withAll:'u'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5068
        in := in copyReplaceAll:$ä withAll:'a'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5069
        in := in copyReplaceAll:$ö withAll:'o'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5070
        in := in copyReplaceAll:$ß withAll:'ss'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5071
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5072
    in := in select:[:ch | ch isLetter].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5073
    in := '#',in,'#'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5074
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5075
    ret := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5076
    1 to:in size-2 do:[:i |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5077
        |sub|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5078
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5079
        sub := in copyFrom:i to:i+2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5080
        val := (i==1) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5081
                    ifTrue:[ self convertFirst:sub ] 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5082
                    ifFalse:[ self convertRest:sub ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5083
        ret := ret,val
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5084
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5085
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5086
    ret := ret select:[:ch | ch ~= $-].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5087
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5088
    (ret startsWith:'0') ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5089
        ret := '0',(ret select:[:ch | ch ~= $0]).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5090
    ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5091
        ret := ret select:[:ch | ch ~= $0].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5092
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5093
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5094
    rslt := String streamContents:[:s |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5095
        |prev|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5096
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5097
        ret do:[:ch |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5098
            ch ~= prev ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5099
                s nextPut:ch
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5100
            ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5101
            prev := ch.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5102
        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5103
      ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5104
    ^ rslt.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5105
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5106
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5107
     #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5108
        'Müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5109
        'Miller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5110
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5111
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5112
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5113
        'Mülherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5114
        'Myler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5115
        'Millar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5116
        'Myller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5117
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5118
        'Müler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5119
        'Muehler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5120
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5121
        'Müllerr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5122
        'Muehlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5123
        'Muellar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5124
        'Mueler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5125
        'Mülleer'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5126
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5127
        'Nüller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5128
        'Nyller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5129
        'Niler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5130
        'Czerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5131
        'Tscherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5132
        'Czernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5133
        'Tschernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5134
        'Schernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5135
        'Scherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5136
        'Scherno'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5137
        'Czerne'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5138
        'Zerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5139
        'Tzernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5140
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5141
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5142
        'Breschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5143
        'Breschnjeff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5144
        'Braeschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5145
        'Braessneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5146
        'Pressneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5147
        'Presznäph'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5148
        'Präschnäf' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5149
        'Breschnjeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5150
        'Breschnijeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5151
        'Breschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5152
     ) do:[:w |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5153
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5154
     ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5155
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5156
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5157
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5158
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnew' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5159
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5160
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braeschneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5161
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braessneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5162
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Pressneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5163
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Presznäph' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5164
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Präschnäf' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5165
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnjeff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5166
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnijeff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5167
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnieff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5168
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5169
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5170
     self basicNew encode:'müller'      -> '657'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5171
     self basicNew encode:'möller'      -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5172
     self basicNew encode:'miller'      -> '657'     
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5173
     self basicNew encode:'muller'      -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5174
     self basicNew encode:'muler'       -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5175
     self basicNew encode:'schmidt'     -> '862'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5176
     self basicNew encode:'schneider'   -> '8627' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5177
     self basicNew encode:'fischer'     -> '387' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5178
     self basicNew encode:'weber'       -> '317' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5179
     self basicNew encode:'meyer'       -> '67' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5180
     self basicNew encode:'wagner'      -> '3467' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5181
     self basicNew encode:'schulz'      -> '858'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5182
     self basicNew encode:'becker'      -> '147'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5183
     self basicNew encode:'hoffmann'    -> '036'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5184
     self basicNew encode:'schäfer'     -> '837' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5185
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5186
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5187
    "Created: / 28-07-2017 / 15:24:33 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5188
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5189
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5190
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5191
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5192
convertFirst:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5193
    |c2 c3|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5194
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5195
    chars size == 3 ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5196
        c2 := (chars at:2).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5197
        c2 == $a ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5198
        c2 == $e ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5199
        c2 == $i ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5200
        c2 == $j ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5201
        c2 == $y ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5202
        c2 == $o ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5203
        c2 == $u ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5204
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5205
        c2 == $c ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5206
            c3 := (chars at:3).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5207
            (c3 == $a) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5208
            (c3 == $h) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5209
            (c3 == $k) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5210
            (c3 == $l) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5211
            (c3 == $o) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5212
            (c3 == $q) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5213
            (c3 == $r) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5214
            (c3 == $u) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5215
            (c3 == $x) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5216
            ^ '8'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5217
        ].    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5218
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5219
"/        #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5220
"/            ('#a#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5221
"/            ('#e#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5222
"/            ('#i#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5223
"/            ('#j#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5224
"/            ('#y#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5225
"/            ('#o#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5226
"/            ('#u#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5227
"/
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5228
"/            ('#ca' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5229
"/            ('#ch' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5230
"/            ('#ck' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5231
"/            ('#cl' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5232
"/            ('#co' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5233
"/            ('#cq' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5234
"/            ('#cr' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5235
"/            ('#cu' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5236
"/            ('#cx' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5237
"/
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5238
"/            ('#c#' '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5239
"/        ) do:[:pair | 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5240
"/            (pair first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5241
"/                ^ pair second
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5242
"/            ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5243
"/        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5244
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5245
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5246
    ^ self convertRest:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5247
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5248
    "Modified: / 29-07-2017 / 14:22:20 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5249
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5250
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5251
convertRest:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5252
    chars size == 3 ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5253
        self error:'cannot happen'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5254
        ^ '?' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5255
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5256
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5257
    #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5258
        "/ used to be matchpattern code,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5259
        "/ but doing these glob-matches is too slow.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5260
        "/ changed to:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5261
        "/    start nil  code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5262
        "/    nil   end  code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5263
        "/    nil   char code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5264
        "/    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5265
        (nil 'ds' " '#ds' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5266
        (nil 'dc' " '#dc' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5267
        (nil 'dz' " '#dz' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5268
        (nil 'ts' " '#ts' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5269
        (nil 'tc' " '#tc' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5270
        (nil 'tz' " '#tz' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5271
        (nil $d   " '#d#' " '2')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5272
        (nil $t   " '#t#' " '2')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5273
        ('cx' nil " 'cx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5274
        ('kx' nil " 'kx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5275
        ('qx' nil " 'qx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5276
        (nil $x   " '#x#' " '48')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5277
        ('sc' nil " 'sc#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5278
        ('sz' nil " 'sz#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5279
        (nil 'ca' " '#ca' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5280
        (nil 'co' " '#co' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5281
        (nil 'cu' " '#cu' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5282
        (nil 'ch' " '#ch' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5283
        (nil 'ck' " '#ck' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5284
        (nil 'cx' " '#cx' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5285
        (nil 'cq' " '#cq' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5286
        (nil $c   " '#c#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5287
        (nil $a   " '#a#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5288
        (nil $e   " '#e#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5289
        (nil $i   " '#i#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5290
        (nil $j   " '#j#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5291
        (nil $y   " '#y#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5292
        (nil $o   " '#o#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5293
        (nil $u   " '#u#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5294
        (nil $h   " '#h#' " '-')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5295
        (nil $l   " '#l#' " '5')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5296
        (nil $r   " '#r#' " '7')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5297
        (nil $m   " '#m#' " '6')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5298
        (nil $n   " '#n#' " '6')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5299
        (nil $s   " '#s#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5300
        (nil $z   " '#z#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5301
        (nil $b   " '#b#' " '1')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5302
        (nil $p   " '#p#' " '1')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5303
        (nil $f   " '#f#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5304
        (nil $v   " '#v#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5305
        (nil $w   " '#w#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5306
        (nil $g   " '#g#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5307
        (nil $k   " '#k#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5308
        (nil $q   " '#q#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5309
        (nil nil  " '###' " '?')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5310
    ) do:[:vector |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5311
        |v1 v2|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5312
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5313
        (v1 := vector at:1) notNil ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5314
            "/ prefix
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5315
            (chars startsWith:v1) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5316
        ] ifFalse:[                       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5317
            (v2 := vector at:2) isCharacter ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5318
                "/ middle character compare
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5319
                (chars at:2) == v2 ifTrue:[^ (vector at:3) ]. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5320
            ] ifFalse:[    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5321
                v2 isString ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5322
                    "/ suffix
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5323
                    (chars endsWith:v2) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5324
                ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5325
                   ^ '?' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5326
                ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5327
            ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5328
        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5329
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5330
        "/ (vector first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5331
        "/     ^ vector second
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5332
        "/ ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5333
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5334
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5335
    self error:'cannot happen'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5336
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5337
    "Modified: / 29-07-2017 / 14:17:38 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5338
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5339
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5340
!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5341
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5342
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5343
"
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5344
    Miracode (also called << American Soundex >>) is like Soundex with the 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5345
    addition that h and w are discarded if they separate consonants.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5346
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5347
    These variants may be specifically important because they were used in 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5348
    U.S. National Archives. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5349
    Most archive data were encoded with Miracode, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5350
    but there are some (older) entries encoded with Simplified Soundex. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5351
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5352
    The HW-rule was documented as a standard in 1910, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5353
    but actually data of 1880, 1900 and 1910 
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  5354
    censuses were encoded with mixed methods.
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5355
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5356
     self new encode:'washington' -> 'W252'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5357
     self new encode:'lee'        -> 'L000'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5358
     self new encode:'Gutierrez'  -> 'G362'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5359
     self new encode:'Pfister'    -> 'P236'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5360
     self new encode:'Jackson'    -> 'J250'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5361
     self new encode:'Tymczak'    -> 'T522'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5362
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5363
    notice:
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5364
     MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5365
     SoundexStringComparator new encode:'Ashcraft'  -> 'A226'
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5366
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5367
    see also:            
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5368
        https://www.archives.gov/research/census/soundex.html
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5369
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5370
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5371
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5372
!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'private'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5373
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5374
encode:word 
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5375
    "same as inherited, but cares for W and H"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5376
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5377
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5378
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5379
    u := word asUppercase.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5380
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5381
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5382
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5383
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5384
        (t notNil 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5385
        and:[ t ~= '0' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5386
        and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5387
            p := p , t.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5388
            p size == 4 ifTrue:[^ p ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5389
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5390
        (c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5391
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5392
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5393
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5394
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5395
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5396
    ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5397
    ^ (p copyFrom:1 to:4)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5398
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5399
    "Created: / 02-08-2017 / 00:19:47 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5400
    "Modified (comment): / 02-08-2017 / 14:30:47 / cg"
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5401
! !
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5402
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5403
!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator class methodsFor:'documentation'!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5404
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5405
documentation
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5406
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5407
     The 'Spanish Phonetik' (spanish phonetic) code is for the spanish language 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5408
     what the soundex code is for english:
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5409
        it returns similar strings for similar sounding words 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5410
     (but is specifically aware of the pronunciation of spanish) . 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5411
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5412
     There are some other differences to soundex, though: 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5413
        its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5414
        it does not start with the first character of the input, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5415
        but returns a pure numeric string,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5416
        it uses different character groups
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5417
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5418
     This algorithm was described by Marıa del Pilar Angeles, Adrian Espino-Gamez, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5419
     and Jonathan Gil-Moncada, in 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5420
        'Comparison of a Modified Spanish phonetic,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5421
         Soundex, and Phonex coding functions during data matching process'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5422
     See  https://www.researchgate.net/publication/285589803_Comparison_of_a_Modified_Spanish_Phonetic_Soundex_and_Phonex_coding_functions_during_data_matching_process
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5423
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5424
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5425
!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5426
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5427
examples
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5428
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5429
   words sounding similar (german pronunciation) will deliver a similar code:
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5430
   
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5431
     #(
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5432
        'Marıa'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5433
        'Pilar'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5434
        'Angeles'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5435
        'Adrian'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5436
        'Gamez'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5437
     ) do:[:w |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5438
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::SpanishPhoneticCodeStringComparator new encode:w)
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5439
     ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5440
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5441
! !
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5442
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5443
!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'api'!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5444
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5445
encode: aString
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5446
    "return a spanish phonetic code.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5447
     The spanishPhonetic code is for the spanish language what the soundex code is for english;
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5448
     it returns simular strings for similar sounding words. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5449
     There are some differences to soundex, though: 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5450
        its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5451
        it does not start with the first character of the input,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5452
        it uses different character groups.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5453
     This algorithm is described by Marıa del Pilar Angeles, Adrian Espino-Gamez, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5454
     Jonathan Gil-Moncada."
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5455
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5456
    |in|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5457
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5458
    in := aString withoutSeparators asUppercase.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5459
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5460
    ^ String streamContents:[:out |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5461
        |prev|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5462
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5463
        in do:[:ch |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5464
            ch == prev ifFalse:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5465
                ch == $P ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5466
                    out nextPut:$0.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5467
                ] ifFalse:[ ('BV' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5468
                    out nextPut:$1.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5469
                ] ifFalse:[ ('FH' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5470
                    out nextPut:$2.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5471
                ] ifFalse:[ ('DT' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5472
                    out nextPut:$3.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5473
                ] ifFalse:[ ('SZCX' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5474
                    out nextPut:$4.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5475
                ] ifFalse:[ ('YL' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5476
                    out nextPut:$5.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5477
                ] ifFalse:[ ('NŃM' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5478
                    out nextPut:$6.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5479
                ] ifFalse:[ ('QK' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5480
                    out nextPut:$7.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5481
                ] ifFalse:[ ('GJ' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5482
                    out nextPut:$8.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5483
                ] ifFalse:[ ('R' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5484
                    out nextPut:$9.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5485
                ]]]]]]]]]].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5486
                prev := ch.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5487
            ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5488
        ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5489
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5490
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5491
    "
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5492
     self new encode:'Jose'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5493
    "
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5494
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5495
    "Created: / 28-07-2017 / 15:24:33 / cg"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5496
    "Modified: / 01-08-2017 / 18:48:50 / cg"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5497
! !
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5498
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5499
!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'private'!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5500
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5501
convertFirst:chars
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5502
    |c2 c3|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5503
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5504
    chars size == 3 ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5505
        c2 := (chars at:2).
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5506
        c2 == $a ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5507
        c2 == $e ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5508
        c2 == $i ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5509
        c2 == $j ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5510
        c2 == $y ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5511
        c2 == $o ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5512
        c2 == $u ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5513
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5514
        c2 == $c ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5515
            c3 := (chars at:3).
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5516
            (c3 == $a) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5517
            (c3 == $h) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5518
            (c3 == $k) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5519
            (c3 == $l) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5520
            (c3 == $o) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5521
            (c3 == $q) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5522
            (c3 == $r) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5523
            (c3 == $u) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5524
            (c3 == $x) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5525
            ^ '8'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5526
        ].    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5527
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5528
"/        #(
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5529
"/            ('#a#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5530
"/            ('#e#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5531
"/            ('#i#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5532
"/            ('#j#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5533
"/            ('#y#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5534
"/            ('#o#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5535
"/            ('#u#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5536
"/
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5537
"/            ('#ca' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5538
"/            ('#ch' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5539
"/            ('#ck' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5540
"/            ('#cl' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5541
"/            ('#co' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5542
"/            ('#cq' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5543
"/            ('#cr' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5544
"/            ('#cu' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5545
"/            ('#cx' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5546
"/
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5547
"/            ('#c#' '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5548
"/        ) do:[:pair | 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5549
"/            (pair first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5550
"/                ^ pair second
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5551
"/            ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5552
"/        ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5553
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5554
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5555
    ^ self convertRest:chars
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5556
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5557
    "Modified: / 29-07-2017 / 14:22:20 / cg"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5558
!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5559
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5560
convertRest:chars
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5561
    chars size == 3 ifFalse:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5562
        self error:'cannot happen'.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5563
        ^ '?' 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5564
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5565
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5566
    #(
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5567
        "/ used to be matchpattern code,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5568
        "/ but doing these glob-matches is too slow.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5569
        "/ changed to:
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5570
        "/    start nil  code
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5571
        "/    nil   end  code
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5572
        "/    nil   char code
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5573
        "/    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5574
        (nil 'ds' " '#ds' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5575
        (nil 'dc' " '#dc' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5576
        (nil 'dz' " '#dz' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5577
        (nil 'ts' " '#ts' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5578
        (nil 'tc' " '#tc' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5579
        (nil 'tz' " '#tz' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5580
        (nil $d   " '#d#' " '2')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5581
        (nil $t   " '#t#' " '2')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5582
        ('cx' nil " 'cx#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5583
        ('kx' nil " 'kx#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5584
        ('qx' nil " 'qx#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5585
        (nil $x   " '#x#' " '48')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5586
        ('sc' nil " 'sc#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5587
        ('sz' nil " 'sz#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5588
        (nil 'ca' " '#ca' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5589
        (nil 'co' " '#co' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5590
        (nil 'cu' " '#cu' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5591
        (nil 'ch' " '#ch' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5592
        (nil 'ck' " '#ck' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5593
        (nil 'cx' " '#cx' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5594
        (nil 'cq' " '#cq' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5595
        (nil $c   " '#c#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5596
        (nil $a   " '#a#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5597
        (nil $e   " '#e#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5598
        (nil $i   " '#i#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5599
        (nil $j   " '#j#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5600
        (nil $y   " '#y#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5601
        (nil $o   " '#o#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5602
        (nil $u   " '#u#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5603
        (nil $h   " '#h#' " '-')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5604
        (nil $l   " '#l#' " '5')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5605
        (nil $r   " '#r#' " '7')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5606
        (nil $m   " '#m#' " '6')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5607
        (nil $n   " '#n#' " '6')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5608
        (nil $s   " '#s#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5609
        (nil $z   " '#z#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5610
        (nil $b   " '#b#' " '1')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5611
        (nil $p   " '#p#' " '1')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5612
        (nil $f   " '#f#' " '3')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5613
        (nil $v   " '#v#' " '3')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5614
        (nil $w   " '#w#' " '3')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5615
        (nil $g   " '#g#' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5616
        (nil $k   " '#k#' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5617
        (nil $q   " '#q#' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5618
        (nil nil  " '###' " '?')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5619
    ) do:[:vector |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5620
        |v1 v2|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5621
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5622
        (v1 := vector at:1) notNil ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5623
            "/ prefix
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5624
            (chars startsWith:v1) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5625
        ] ifFalse:[                       
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5626
            (v2 := vector at:2) isCharacter ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5627
                "/ middle character compare
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5628
                (chars at:2) == v2 ifTrue:[^ (vector at:3) ]. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5629
            ] ifFalse:[    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5630
                v2 isString ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5631
                    "/ suffix
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5632
                    (chars endsWith:v2) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5633
                ] ifFalse:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5634
                   ^ '?' 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5635
                ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5636
            ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5637
        ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5638
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5639
        "/ (vector first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5640
        "/     ^ vector second
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5641
        "/ ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5642
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5643
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5644
    self error:'cannot happen'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5645
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5646
    "Modified: / 29-07-2017 / 14:17:38 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5647
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5648
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5649
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5650
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5651
version
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  5652
    ^ '$Header$'
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  5653
!
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  5654
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  5655
version_CVS
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  5656
    ^ '$Header$'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5657
! !
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  5658