PhoneticStringUtilities.st
author Claus Gittinger <cg@exept.de>
Sat, 02 May 2020 21:40:13 +0200
changeset 5476 7355a4b11cb6
parent 5456 3040ec2b4531
permissions -rw-r--r--
#FEATURE by cg class: Socket class added: #newTCPclientToHost:port:domain:domainOrder:withTimeout: changed: #newTCPclientToHost:port:domain:withTimeout:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
     1
"{ Encoding: utf8 }"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
     2
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    17
"{ NameSpace: Smalltalk }"
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
    18
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	instanceVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	classVariableNames:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	poolDictionaries:''
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    26
Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    27
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    28
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    29
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    30
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    31
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    32
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    33
PhoneticStringUtilities::PhoneticStringComparator subclass:#DaitchMokotoffStringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    34
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    35
		currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    36
	classVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    37
	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    38
	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    39
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    40
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    41
PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    42
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    43
		currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    44
	classVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    45
	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    46
	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    47
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    48
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    49
PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    50
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    51
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    52
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    53
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    54
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
    55
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    56
PhoneticStringUtilities::PhoneticStringComparator subclass:#SingleResultPhoneticStringComparator
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    57
	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    58
	classVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    59
	poolDictionaries:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    60
	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    61
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    62
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    63
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MRAStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    64
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    65
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    66
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    67
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    68
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    69
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    70
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    71
	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    72
		currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    73
	classVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    74
	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    75
	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    76
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
    77
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    78
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SoundexStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    79
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    80
	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    81
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    82
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    83
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    84
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    85
PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    86
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    87
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    88
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    89
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    90
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    91
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    92
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#NYSIISStringComparator
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    93
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    94
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    95
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    96
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    97
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
    98
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
    99
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#PhonemStringComparator
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   100
	instanceVariableNames:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   101
	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   102
	poolDictionaries:''
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   103
	privateIn:PhoneticStringUtilities
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   104
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   105
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   106
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#Caverphone2StringComparator
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   107
	instanceVariableNames:''
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   108
	classVariableNames:'CharacterTranslationDict'
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   109
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   110
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   111
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   112
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   113
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   114
	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   115
	classVariableNames:'CharacterTranslationDict'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   116
	poolDictionaries:''
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   117
	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   118
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   119
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   120
PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   121
	instanceVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   122
	classVariableNames:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   123
	poolDictionaries:''
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   124
	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   125
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   126
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   127
PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SpanishPhoneticCodeStringComparator
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   128
	instanceVariableNames:''
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   129
	classVariableNames:'CharacterTranslationDict'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   130
	poolDictionaries:''
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   131
	privateIn:PhoneticStringUtilities
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   132
!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
   133
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
copyright
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
 COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
 COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
              All Rights Reserved
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
 This software is furnished under a license and may be used
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
 only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
 inclusion of the above copyright notice.   This software may not
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
 be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
 other person.  No title to or ownership of the software is
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
 hereby transferred.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
documentation
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
"
2445
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   153
    Utilities which are helpful to perform phonetic string searches or comparisons.
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   154
    These are all variations or improvements of the soundex algorithm, which usually fails
d55a3b1e8791 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2285
diff changeset
   155
    to provide good results for non-english languages.
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
   156
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   157
    soundexCode
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   158
        this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   159
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   160
    nysiis
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   161
        a modified soundex algorithm
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   162
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   163
    miracode
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   164
        another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   165
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   166
    mySQLSoundex
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   167
        another modified soundex algorithm used in mySQL.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   168
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   169
    koelner phoneticCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   170
        provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   171
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   172
    Double metaphone 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   173
        works with most european languages.
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   174
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   175
    phonem
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   176
        described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   177
        from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   178
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   179
    mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   180
        Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   181
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   182
    caverphone2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   183
        better than soundex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   184
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   185
    spanish phonetic code
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   186
        an algorithm slightly adjusted to spanish names
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   187
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   188
    More info for german readers is found in:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   189
        http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   190
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   191
!
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   192
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   193
sampleData
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   194
"
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   195
    for the 50 most common german names, we get:
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   196
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
   197
                            ext. 
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   198
    name        soundex   soundex   metaphone   phonet  phonet2     phonix      daitsch phonem      koeln  caverphone2  mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   199
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   200
    müller      M460    54600000    MLR         MÜLA    NILA        M4000000    689000  MYLR        657    MLA1111111   MLR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   201
    schmidt     S530    25300000    SKMTT       SHMIT   ZNIT        S5300000    463000  CMYD        862    SKMT111111   SCHMDT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   202
    schneider   S536    25360000    SKNTR       SHNEIDA ZNEITA      S5300000    463900  CNAYDR      8627   SKNTA11111   SCHNDR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   203
    fischer     F260    12600000    FSKR        FISHA   FIZA        F8000000    749000  VYCR        387    FSKA111111   FSCHR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   204
    weber       W160    16000000    WBR         WEBA    FEBA        $1000000    779000  VBR         317    WPA1111111   WBR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   205
    meyer       M600    56000000    MYR         MEIA    NEIA        M0000000    619000  MAYR        67     MA11111111   MYR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   206
    wagner      W256    25600000    WKNR        WAKNA   FAKNA       $2500000    756900  VACNR       3467   WKNA111111   WGNR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   207
    schulz      S420    24200000    SKLS        SHULS   ZULZ        S4800000    484000  CULC        858    SKS1111111   SCHLZ
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   208
    becker      B260    12600000    BKR         BEKA    BEKA        B2000000    759000  BCR         147    PKA1111111   BCKR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   209
    hoffmann    H155    15500000    HFMN        HOFMAN  UFNAN       $7550000    576600  OVMAN       036    AFMN111111   HFMN
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   210
    schäfer     S16ß    21600000    SKFR        SHEFA   ZEFA        S7000000    479000  CVR         837    SKFA111111   SCHFR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   211
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   212
    |cls|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   213
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   214
    cls := MRAStringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   215
    cls := SoundexStringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   216
    cls := KoelnerPhoneticCodeStringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   217
    cls := Caverphone2StringComparator.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   218
    #('müller' 'schmidt' 'schneider' 'fischer' 'weber' 'meyer' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   219
      'wagner' 'schulz'  'becker'    'hoffmann' 'schäfer')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   220
    do:[:name |
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   221
        Transcript show:''''; show:name; show:''' -> '''; show:(cls encode:name); showCR:''''.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   222
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   223
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   224
    KoelnerPhoneticCodeStringComparator encode:'Müller-Lüdenscheidt'  -> '65752682'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
"
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
    "return a koelner phonetic code.
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
     The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
     it returns simular strings for similar sounding words. 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
     There are some differences to soundex, though: 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
        its length is not limited to 4, but depends on the length of the original string;
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   236
        it does not start with the first character of the input.
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   237
     This algorithm is described by Postel 1969"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   239
    ^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   240
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   241
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   242
     #(
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   243
        'Müller'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   244
        'Miller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   245
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   246
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   247
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   248
        'Mülherr'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   249
        'Myler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   250
        'Millar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   251
        'Myller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   252
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   253
        'Müler'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   254
        'Muehler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   255
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   256
        'Müllerr'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   257
        'Muehlherr'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   258
        'Muellar'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   259
        'Mueler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   260
        'Mülleer'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   262
        'Nüller'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
        'Nyller'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
        'Niler'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
        'Czerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
        'Tscherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
        'Czernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   268
        'Tschernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   269
        'Schernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   270
        'Scherny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
        'Scherno'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
        'Czerne'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
        'Zerny'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
        'Tzernie'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
        'Breschnew'
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
     ) do:[:w |
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
     ].
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
    "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   282
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   283
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   284
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   285
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   286
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   287
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   288
     PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   289
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   290
!
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   291
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   292
miracodeCodeOf:aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   293
    "return a miracode soundex phonetic code or nil.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   294
     Miracode is a slightly modified soundex algorithm.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   295
     Notice that there are better algorithms around (doubleMetaphone) "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   296
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   297
    ^ (MiracodeStringComparator new phoneticStringsFor:aString) first
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   298
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   299
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   300
     PhoneticStringUtilities miracodeCodeOf:'claus'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   301
     PhoneticStringUtilities miracodeCodeOf:'clause'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   302
     PhoneticStringUtilities miracodeCodeOf:'close'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   303
     PhoneticStringUtilities miracodeCodeOf:'smalltalk' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   304
     PhoneticStringUtilities miracodeCodeOf:'smaltalk'  
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   305
     PhoneticStringUtilities miracodeCodeOf:'smaltak'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   306
     PhoneticStringUtilities miracodeCodeOf:'smaltok'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   307
     PhoneticStringUtilities miracodeCodeOf:'smoltok'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   308
     PhoneticStringUtilities miracodeCodeOf:'aa'        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   309
     PhoneticStringUtilities miracodeCodeOf:'by'        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   310
     PhoneticStringUtilities miracodeCodeOf:'bab'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   311
     PhoneticStringUtilities miracodeCodeOf:'bob'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   312
     PhoneticStringUtilities miracodeCodeOf:'bop'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   313
     PhoneticStringUtilities miracodeCodeOf:'pub'       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   314
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   315
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   316
    "Created: / 28-07-2017 / 15:32:41 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   317
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   318
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   319
mySQLSoundexCodeOf:aString
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   320
    "return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   321
     (no 4char limitation; different order of duplicate vowel vs. duplicate code elimination).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   322
     Notice that there are better algorithms around (doubleMetaphone) "
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   323
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   324
    ^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   325
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   326
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   327
     #(
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   328
        'Müller'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   329
        'Miller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   330
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   331
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   332
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   333
        'Mülherr'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   334
        'Myler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   335
        'Millar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   336
        'Myller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   337
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   338
        'Müler'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   339
        'Muehler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   340
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   341
        'Müllerr'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   342
        'Muehlherr'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   343
        'Muellar'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   344
        'Mueler'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   345
        'Mülleer'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   346
        'Mueller'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   347
        'Nüller'
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   348
        'Nyller'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   349
        'Niler'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   350
        'Czerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   351
        'Tscherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   352
        'Czernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   353
        'Tschernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   354
        'Schernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   355
        'Scherny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   356
        'Scherno'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   357
        'Czerne'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   358
        'Zerny'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   359
        'Tzernie'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   360
        'Breschnew'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   361
     ) do:[:w |
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   362
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   363
     ].
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   364
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   365
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   366
    "
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   367
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   368
     PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   369
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   370
     PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   371
     PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'. 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   372
     PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'. 
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
   373
     PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   374
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   375
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   376
    "Modified (comment): / 28-07-2017 / 15:34:03 / cg"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   377
!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   378
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   379
soundexCodeOf:aString
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   380
    "return a soundex phonetic code or nil.
2207
6a98ae779773 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2197
diff changeset
   381
     Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   382
     tool when searching for words where the correct spelling is unknown.
4194
12b5e3e2219b #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4184
diff changeset
   383
     (read Knuth or search the web if you don't know what a soundex code is).
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   384
     Caveat: 'similar sounding words' means: 'similar sounding in english'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   385
     Notice that there are better algorithms around (doubleMetaphone) "
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   386
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   387
    ^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   388
2210
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   389
"/ old code - now use code in private class...
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   390
"/    |inStream codeStream ch last lch codeLength codes code lastCode|
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   391
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   392
"/    inStream := aString readStream.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   393
"/    inStream skipSeparators.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   394
"/    inStream atEnd ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   395
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   396
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   397
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   398
"/    ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   399
"/    ch isLetter ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   400
"/        ^ nil
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   401
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   402
"/    codeLength := 0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   403
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   404
"/    codes := Dictionary new.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   405
"/    codes atAll:'bpfv'     put:$1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   406
"/    codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   407
"/    codes atAll:'dt'       put:$3.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   408
"/    codes atAll:'l'        put:$4.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   409
"/    codes atAll:'nm'       put:$5.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   410
"/    codes atAll:'r'        put:$6.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   411
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   412
"/    codeStream := WriteStream on:(String new:4).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   413
"/    codeStream nextPut:(ch asUppercase).
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   414
"/    last := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   415
"/    lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   416
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   417
"/    [inStream atEnd] whileFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   418
"/        ch := inStream next.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   419
"/        lch := ch asLowercase.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   420
"/        lch = last ifFalse:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   421
"/            last := lch.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   422
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   423
"/            code := codes at:lch ifAbsent:nil.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   424
"/            (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   425
"/                codeLength < 3 ifTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   426
"/                    codeStream nextPut:code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   427
"/                    codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   428
"/                    codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   429
"/                ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   430
"/            ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   431
"/            lastCode := code.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   432
"/        ]
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   433
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   434
"/    [ codeLength < 3 ] whileTrue:[
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   435
"/        codeStream nextPut:$0.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   436
"/        codeLength := codeLength + 1.
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   437
"/    ].
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   438
"/
9c428fe51c78 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2209
diff changeset
   439
"/    ^ codeStream contents
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   440
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   441
    "
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   442
     PhoneticStringUtilities soundexCodeOf:'claus'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   443
     PhoneticStringUtilities soundexCodeOf:'clause'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   444
     PhoneticStringUtilities soundexCodeOf:'close'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   445
     PhoneticStringUtilities soundexCodeOf:'smalltalk' 
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   446
     PhoneticStringUtilities soundexCodeOf:'smaltalk'  
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   447
     PhoneticStringUtilities soundexCodeOf:'smaltak'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   448
     PhoneticStringUtilities soundexCodeOf:'smaltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   449
     PhoneticStringUtilities soundexCodeOf:'smoltok'   
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   450
     PhoneticStringUtilities soundexCodeOf:'aa'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   451
     PhoneticStringUtilities soundexCodeOf:'by'        
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   452
     PhoneticStringUtilities soundexCodeOf:'bab'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   453
     PhoneticStringUtilities soundexCodeOf:'bob'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   454
     PhoneticStringUtilities soundexCodeOf:'bop'       
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   455
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   456
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   457
    "Modified (comment): / 28-07-2017 / 15:33:53 / cg"
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   458
! !
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   459
3648
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   460
!PhoneticStringUtilities class methodsFor:'queries'!
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   461
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   462
isUtilityClass
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   463
    ^ self == PhoneticStringUtilities
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   464
! !
fccb127ba02e #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3646
diff changeset
   465
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   466
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   467
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   468
defaultClass
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   469
	^SoundexStringComparator
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   470
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   471
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   472
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'documentation'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   473
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   474
documentation
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   475
"
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   476
    abstract superclass for various phonetic comparators.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   477
    They returns similar strings for similar sounding words, which can be used
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   478
    to find similar sounding words in a search list.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   479
    
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   480
    Notice, that some comparators are better for particular languages.
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   481
"
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   482
!
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   483
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   484
examples
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   485
"
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   486
     PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   487
            does:'miller' soundLike:'miler'.   
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   488
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   489
     PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   490
            does:'miller' soundLike:'milner'.   
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   491
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   492
     PhoneticStringUtilities::SoundexStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   493
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   494
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   495
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   496
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   497
"
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   498
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   499
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   500
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   501
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   502
new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   503
    ^ self basicNew initialize.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   504
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   505
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   506
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'queries'!
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   507
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   508
isAbstract
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   509
    ^ self == PhoneticStringUtilities::PhoneticStringComparator
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   510
! !
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
   511
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   512
!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'utilities'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   513
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   514
does:aString soundLike:anotherString 
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   515
    "return true, if aString sounds similar to anotherString"
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   516
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   517
    ^ self new does:aString soundLike:anotherString.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   518
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   519
    "
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   520
     PhoneticStringUtilities::SoundexStringComparator does:'miller' soundLike:'miler'.   
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   521
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   522
     PhoneticStringUtilities::SoundexStringComparator does:'miller' soundLike:'milner'.   
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   523
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   524
     PhoneticStringUtilities::SoundexStringComparator does:'müller' soundLike:'mueller'.   
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   525
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   526
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator does:'müller' soundLike:'mueller'.   
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   527
     PhoneticStringUtilities::DoubleMetaphoneStringComparator does:'müller' soundLike:'mueller'.   
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   528
    "
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   529
!
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   530
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   531
encode:word
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   532
    "return a phonetic encoding for a word.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   533
     This can eg. be used as key to map/hash similar sounding words"
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   534
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   535
    ^ (self new phoneticStringsFor:word) first
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   536
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   537
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   538
     SoundexStringComparator encode:'Fischer'             -> 'F260'
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   539
     SoundexStringComparator encode:'Fiescher'            -> 'F260'
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   540
     Caverphone2StringComparator encode:'Fischer'         -> 'FSKA111111'
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   541
     Caverphone2StringComparator encode:'Fiescher'        -> 'FSKA111111'
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   542
     MRAStringComparator encode:'Fischer'                 -> 'FSCHR'
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   543
     MRAStringComparator encode:'Fiescher'                -> 'FSCHR'
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   544
     SpanishPhoneticCodeStringComparator encode:'Fischer'  -> '24429'
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   545
     SpanishPhoneticCodeStringComparator encode:'Fiescher' -> '24429'
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   546
     DoubleMetaphoneStringComparator encode:'Fischer'      -> 'FXR'
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   547
     DoubleMetaphoneStringComparator encode:'Fiescher'     -> 'FXR'
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   548
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   549
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   550
    "Created: / 02-08-2017 / 01:15:50 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   551
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   552
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   553
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   554
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   555
does:aString soundLike:anotherString 
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   556
    "return true, if aString sounds similar to anotherString"
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   557
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   558
    |translations1 translations2|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   559
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   560
    translations1 := self phoneticStringsFor:aString.    
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   561
    translations2 := self phoneticStringsFor:anotherString.  
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   562
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   563
    ^ translations1 contains:[:t1 | 
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   564
        translations2 contains:[:t2 | t1 = t2]
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   565
    ]
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   566
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   567
    "
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
   568
     PhoneticStringUtilities::SoundexStringComparator new does:'miller' soundLike:'miler'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   569
            
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   570
     PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   571
            does:'miller' soundLike:'milner'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   572
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   573
     PhoneticStringUtilities::SoundexStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   574
            does:'müller' soundLike:'mueller'.   
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   575
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   576
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
   577
            does:'müller' soundLike:'mueller'.   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   578
    "
4467
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   579
c946d9eea9ec #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4194
diff changeset
   580
    "Modified (comment): / 13-07-2017 / 17:51:43 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   581
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   582
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   583
phoneticStringsFor: aString
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   584
    "Should answer an array of alternate phonetic strings for the given input string."
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   585
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   586
    self subclassResponsibility
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   587
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   588
    "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   589
     (PhoneticStringUtilities::SoundexStringComparator new
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   590
            phoneticStringsFor:'miller') first 
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   591
            
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   592
     'miller' asSoundexCode 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   593
    "
4485
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   594
735edd20512a #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4467
diff changeset
   595
    "Modified (comment): / 27-07-2017 / 15:07:59 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   596
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   597
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   598
!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   599
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   600
initialize
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   601
    "Invoked when a new instance is created."
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   602
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   603
    "/ please change as required (and remove this comment)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   604
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   605
    "/ super initialize.   -- commented since inherited method does nothing
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   606
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   607
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   608
!PhoneticStringUtilities::DaitchMokotoffStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   609
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   610
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   611
"
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   612
    self encode:'AUERBACH' -> 097400, 097500
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   613
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   614
    Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   615
    The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   616
    yielding greater accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   617
    but differences in spelling.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   618
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   619
    The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   620
        - coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   621
        - the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   622
        - rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   623
        - multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   624
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   625
    This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   626
        encode:aString            - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   627
        phoneticStringsFor:String - branching enabled, all codes will be returned, separated by '|'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   628
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   629
    [see also:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   630
        'Wikipedia - Daitch-Mokotoff Soundex'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   631
            http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   632
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   633
        'Avotaynu - Soundexing and Genealogy'    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   634
            http://www.avotaynu.com/soundex.htm
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   635
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   636
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
   637
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   638
javaCode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   639
"<<END
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   640
/*
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   641
 * Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   642
 * contributor license agreements.  See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   643
 * this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   644
 * The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   645
 * (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   646
 * the License.  You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   647
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   648
 *      http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   649
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   650
 * Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   651
 * distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   652
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   653
 * See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   654
 * limitations under the License.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   655
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   656
package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   657
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   658
import org.apache.commons.codec.CharEncoding;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   659
import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   660
import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   661
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   662
import java.io.InputStream;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   663
import java.util.*;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   664
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   665
/**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   666
 * Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   667
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   668
 * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   669
 * accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   670
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   671
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   672
 * The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   673
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   674
 * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   675
 * <li>coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   676
 * <li>the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   677
 * <li>rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   678
 * <li>multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   679
 * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   680
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   681
 * This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   682
 * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   683
 * <li>{@link #encode(String)} - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   684
 * <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '|'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   685
 * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   686
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   687
 * Note: this implementation has additional branching rules compared to the original description of the algorithm. The
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   688
 * rules can be customized by overriding the default rules contained in the resource file
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   689
 * {@code org/apache/commons/codec/language/dmrules.txt}.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   690
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   691
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   692
 * This class is thread-safe.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   693
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   694
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   695
 * @see Soundex
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   696
 * @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   697
 * @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   698
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   699
 * @version $Id$
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   700
 * @since 1.10
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   701
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   702
public class DaitchMokotoffSoundex implements StringEncoder {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   703
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   704
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   705
     * Inner class representing a branch during DM soundex encoding.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   706
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   707
    private static final class Branch {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   708
        private final StringBuilder builder;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   709
        private String cachedString;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   710
        private String lastReplacement;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   711
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   712
        private Branch() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   713
            builder = new StringBuilder();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   714
            lastReplacement = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   715
            cachedString = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   716
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   717
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   718
        /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   719
         * Creates a new branch, identical to this branch.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   720
         *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   721
         * @return a new, identical branch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   722
         */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   723
        public Branch createBranch() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   724
            final Branch branch = new Branch();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   725
            branch.builder.append(toString());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   726
            branch.lastReplacement = this.lastReplacement;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   727
            return branch;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   728
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   729
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   730
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   731
        public boolean equals(final Object other) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   732
            if (this == other) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   733
                return true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   734
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   735
            if (!!(other instanceof Branch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   736
                return false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   737
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   738
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   739
            return toString().equals(((Branch) other).toString());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   740
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   741
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   742
        /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   743
         * Finish this branch by appending '0's until the maximum code length has been reached.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   744
         */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   745
        public void finish() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   746
            while (builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   747
                builder.append('0');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   748
                cachedString = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   749
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   750
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   751
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   752
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   753
        public int hashCode() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   754
            return toString().hashCode();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   755
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   756
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   757
        /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   758
         * Process the next replacement to be added to this branch.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   759
         *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   760
         * @param replacement
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   761
         *            the next replacement to append
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   762
         * @param forceAppend
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   763
         *            indicates if the default processing shall be overridden
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   764
         */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   765
        public void processNextReplacement(final String replacement, final boolean forceAppend) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   766
            final boolean append = lastReplacement == null || !!lastReplacement.endsWith(replacement) || forceAppend;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   767
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   768
            if (append && builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   769
                builder.append(replacement);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   770
                // remove all characters after the maximum length
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   771
                if (builder.length() > MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   772
                    builder.delete(MAX_LENGTH, builder.length());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   773
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   774
                cachedString = null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   775
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   776
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   777
            lastReplacement = replacement;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   778
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   779
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   780
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   781
        public String toString() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   782
            if (cachedString == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   783
                cachedString = builder.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   784
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   785
            return cachedString;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   786
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   787
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   788
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   789
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   790
     * Inner class for storing rules.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   791
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   792
    private static final class Rule {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   793
        private final String pattern;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   794
        private final String[] replacementAtStart;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   795
        private final String[] replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   796
        private final String[] replacementDefault;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   797
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   798
        protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   799
                final String replacementDefault) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   800
            this.pattern = pattern;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   801
            this.replacementAtStart = replacementAtStart.split("\\|");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   802
            this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   803
            this.replacementDefault = replacementDefault.split("\\|");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   804
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   805
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   806
        public int getPatternLength() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   807
            return pattern.length();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   808
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   809
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   810
        public String[] getReplacements(final String context, final boolean atStart) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   811
            if (atStart) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   812
                return replacementAtStart;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   813
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   814
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   815
            final int nextIndex = getPatternLength();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   816
            final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   817
            if (nextCharIsVowel) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   818
                return replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   819
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   820
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   821
            return replacementDefault;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   822
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   823
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   824
        private boolean isVowel(final char ch) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   825
            return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u';
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   826
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   827
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   828
        public boolean matches(final String context) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   829
            return context.startsWith(pattern);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   830
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   831
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   832
        @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   833
        public String toString() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   834
            return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart),
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   835
                    Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault));
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   836
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   837
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   838
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   839
    private static final String COMMENT = "//";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   840
    private static final String DOUBLE_QUOTE = "\"";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   841
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   842
    private static final String MULTILINE_COMMENT_END = "*/";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   843
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   844
    private static final String MULTILINE_COMMENT_START = "/*";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   845
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   846
    /** The resource file containing the replacement and folding rules */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   847
    private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   848
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   849
    /** The code length of a DM soundex value. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   850
    private static final int MAX_LENGTH = 6;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   851
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   852
    /** Transformation rules indexed by the first character of their pattern. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   853
    private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   854
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   855
    /** Folding rules. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   856
    private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   857
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   858
    static {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   859
        final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   860
        if (rulesIS == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   861
            throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   862
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   863
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   864
        final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   865
        parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   866
        scanner.close();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   867
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   868
        // sort RULES by pattern length in descending order
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   869
        for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   870
            final List<Rule> ruleList = rule.getValue();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   871
            Collections.sort(ruleList, new Comparator<Rule>() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   872
                @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   873
                public int compare(final Rule rule1, final Rule rule2) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   874
                    return rule2.getPatternLength() - rule1.getPatternLength();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   875
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   876
            });
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   877
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   878
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   879
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   880
    private static void parseRules(final Scanner scanner, final String location,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   881
            final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   882
        int currentLine = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   883
        boolean inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   884
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   885
        while (scanner.hasNextLine()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   886
            currentLine++;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   887
            final String rawLine = scanner.nextLine();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   888
            String line = rawLine;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   889
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   890
            if (inMultilineComment) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   891
                if (line.endsWith(MULTILINE_COMMENT_END)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   892
                    inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   893
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   894
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   895
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   896
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   897
            if (line.startsWith(MULTILINE_COMMENT_START)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   898
                inMultilineComment = true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   899
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   900
                // discard comments
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   901
                final int cmtI = line.indexOf(COMMENT);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   902
                if (cmtI >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   903
                    line = line.substring(0, cmtI);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   904
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   905
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   906
                // trim leading-trailing whitespace
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   907
                line = line.trim();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   908
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   909
                if (line.length() == 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   910
                    continue; // empty lines can be safely skipped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   911
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   912
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   913
                if (line.contains("=")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   914
                    // folding
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   915
                    final String[] parts = line.split("=");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   916
                    if (parts.length !!= 2) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   917
                        throw new IllegalArgumentException("Malformed folding statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   918
                                " parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   919
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   920
                        final String leftCharacter = parts[0];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   921
                        final String rightCharacter = parts[1];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   922
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   923
                        if (leftCharacter.length() !!= 1 || rightCharacter.length() !!= 1) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   924
                            throw new IllegalArgumentException("Malformed folding statement - " +
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   925
                                    "patterns are not single characters: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   926
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   927
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   928
                        asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0));
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   929
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   930
                } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   931
                    // rule
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   932
                    final String[] parts = line.split("\\s+");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   933
                    if (parts.length !!= 4) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   934
                        throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   935
                                " parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   936
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   937
                        try {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   938
                            final String pattern = stripQuotes(parts[0]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   939
                            final String replacement1 = stripQuotes(parts[1]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   940
                            final String replacement2 = stripQuotes(parts[2]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   941
                            final String replacement3 = stripQuotes(parts[3]);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   942
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   943
                            final Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   944
                            final char patternKey = r.pattern.charAt(0);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   945
                            List<Rule> rules = ruleMapping.get(patternKey);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   946
                            if (rules == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   947
                                rules = new ArrayList<Rule>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   948
                                ruleMapping.put(patternKey, rules);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   949
                            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   950
                            rules.add(r);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   951
                        } catch (final IllegalArgumentException e) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   952
                            throw new IllegalStateException(
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   953
                                    "Problem parsing line '" + currentLine + "' in " + location, e);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   954
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   955
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   956
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   957
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   958
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   959
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   960
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   961
    private static String stripQuotes(String str) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   962
        if (str.startsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   963
            str = str.substring(1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   964
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   965
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   966
        if (str.endsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   967
            str = str.substring(0, str.length() - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   968
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   969
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   970
        return str;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   971
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   972
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   973
    /** Whether to use ASCII folding prior to encoding. */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   974
    private final boolean folding;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   975
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   976
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   977
     * Creates a new instance with ASCII-folding enabled.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   978
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   979
    public DaitchMokotoffSoundex() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   980
        this(true);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   981
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   982
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   983
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   984
     * Creates a new instance.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   985
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   986
     * With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   987
     * è -&gt; e.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   988
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   989
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   990
     * @param folding
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   991
     *            if ASCII-folding shall be performed before encoding
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   992
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   993
    public DaitchMokotoffSoundex(final boolean folding) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   994
        this.folding = folding;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   995
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   996
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   997
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   998
     * Performs a cleanup of the input string before the actual soundex transformation.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
   999
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1000
     * Removes all whitespace characters and performs ASCII folding if enabled.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1001
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1002
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1003
     * @param input
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1004
     *            the input string to cleanup
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1005
     * @return a cleaned up string
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1006
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1007
    private String cleanup(final String input) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1008
        final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1009
        for (char ch : input.toCharArray()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1010
            if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1011
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1012
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1013
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1014
            ch = Character.toLowerCase(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1015
            if (folding && FOLDINGS.containsKey(ch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1016
                ch = FOLDINGS.get(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1017
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1018
            sb.append(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1019
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1020
        return sb.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1021
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1022
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1023
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1024
     * Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1025
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1026
     * This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1027
     * EncoderException if the supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1028
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1029
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1030
     * @see #soundex(String)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1031
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1032
     * @param obj
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1033
     *            Object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1034
     * @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1035
     *         supplied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1036
     * @throws EncoderException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1037
     *             if the parameter supplied is not of type java.lang.String
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1038
     * @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1039
     *             if a character is not mapped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1040
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1041
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1042
    public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1043
        if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1044
            throw new EncoderException(
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1045
                    "Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1046
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1047
        return encode((String) obj);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1048
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1049
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1050
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1051
     * Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1052
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1053
     * @see #soundex(String)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1054
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1055
     * @param source
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1056
     *            A String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1057
     * @return A DM Soundex code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1058
     * @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1059
     *             if a character is not mapped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1060
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1061
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1062
    public String encode(final String source) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1063
        if (source == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1064
            return null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1065
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1066
        return soundex(source, false)[0];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1067
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1068
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1069
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1070
     * Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1071
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1072
     * In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1073
     * separated by '|'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1074
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1075
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1076
     * Example: the name "AUERBACH" is encoded as both
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1077
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1078
     * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1079
     * <li>097400</li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1080
     * <li>097500</li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1081
     * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1082
     * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1083
     * Thus the result will be "097400|097500".
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1084
     * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1085
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1086
     * @param source
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1087
     *            A String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1088
     * @return A string containing a set of DM Soundex codes corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1089
     * @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1090
     *             if a character is not mapped
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1091
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1092
    public String soundex(final String source) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1093
        final String[] branches = soundex(source, true);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1094
        final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1095
        int index = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1096
        for (final String branch : branches) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1097
            sb.append(branch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1098
            if (++index < branches.length) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1099
                sb.append('|');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1100
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1101
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1102
        return sb.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1103
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1104
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1105
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1106
     * Perform the actual DM Soundex algorithm on the input string.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1107
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1108
     * @param source
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1109
     *            A String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1110
     * @param branching
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1111
     *            If branching shall be performed
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1112
     * @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1113
     *         selected branching mode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1114
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1115
    private String[] soundex(final String source, final boolean branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1116
        if (source == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1117
            return null;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1118
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1119
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1120
        final String input = cleanup(source);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1121
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1122
        final Set<Branch> currentBranches = new LinkedHashSet<Branch>();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1123
        currentBranches.add(new Branch());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1124
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1125
        char lastChar = '\0';
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1126
        for (int index = 0; index < input.length(); index++) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1127
            final char ch = input.charAt(index);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1128
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1129
            // ignore whitespace inside a name
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1130
            if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1131
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1132
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1133
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1134
            final String inputContext = input.substring(index);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1135
            final List<Rule> rules = RULES.get(ch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1136
            if (rules == null) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1137
                continue;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1138
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1139
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1140
            // use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1141
            @SuppressWarnings("unchecked")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1142
            final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1143
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1144
            for (final Rule rule : rules) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1145
                if (rule.matches(inputContext)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1146
                    if (branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1147
                        nextBranches.clear();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1148
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1149
                    final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1150
                    final boolean branchingRequired = replacements.length > 1 && branching;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1151
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1152
                    for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1153
                        for (final String nextReplacement : replacements) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1154
                            // if we have multiple replacements, always create a new branch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1155
                            final Branch nextBranch = branchingRequired ? branch.createBranch() : branch;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1156
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1157
                            // special rule: occurrences of mn or nm are treated differently
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1158
                            final boolean force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1159
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1160
                            nextBranch.processNextReplacement(nextReplacement, force);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1161
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1162
                            if (branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1163
                                nextBranches.add(nextBranch);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1164
                            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1165
                                break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1166
                            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1167
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1168
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1169
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1170
                    if (branching) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1171
                        currentBranches.clear();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1172
                        currentBranches.addAll(nextBranches);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1173
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1174
                    index += rule.getPatternLength() - 1;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1175
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1176
                }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1177
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1178
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1179
            lastChar = ch;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1180
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1181
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1182
        final String[] result = new String[currentBranches.size()];
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1183
        int index = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1184
        for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1185
            branch.finish();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1186
            result[index++] = branch.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1187
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1188
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1189
        return result;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1190
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1191
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  1192
END>>"
2211
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1193
! !
42fe8fe39e9c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2210
diff changeset
  1194
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1195
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1196
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1197
copyright
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1198
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1199
Copyright (c) 2002-2004 Robert Jarvis
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1200
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1201
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1202
files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1203
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1204
the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1205
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1206
The above copyright notice and this permission notice shall be included in all copies or substantial 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1207
portions of the Software.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1208
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1209
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1210
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1211
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1212
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1213
USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1214
"
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1215
! !
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1216
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1217
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1218
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1219
isSlavoGermanic:aString
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1220
    ^ #('w' 'k' 'cz' 'witz' 'ä' 'ö' 'ü' 'ß') contains:[:sub | aString includesString:sub]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1221
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1222
    "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1223
     self isSlavoGermanic:'walter'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1224
     self isSlavoGermanic:'horowitz'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1225
     self isSlavoGermanic:'müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1226
     self isSlavoGermanic:'miller'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1227
    "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1228
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1229
    "Modified: / 28-07-2017 / 10:14:38 / cg"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1230
! !
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1231
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1232
!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1233
3685
01ebbac96899 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3648
diff changeset
  1234
documentation
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1235
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1236
    The Double Metaphone algorithm
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1237
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1238
    see internet: https://en.wikipedia.org/wiki/Metaphone
2209
d544b2f9f239 comments
Claus Gittinger <cg@exept.de>
parents: 2208
diff changeset
  1239
"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1240
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1241
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1242
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1243
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1244
currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1245
	^currentIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1246
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1247
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1248
currentIndex: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1249
	currentIndex := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1250
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1251
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1252
inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1253
	^inputKey
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1254
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1255
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1256
inputKey: aString
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1257
    inputKey := aString asUppercase.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1258
    "/ care for diareses
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1259
    (inputKey includesAny:'ÄÖÜ') ifTrue:[
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1260
        inputKey := inputKey copyReplaceString:'Ä' withString:'AE'.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1261
        inputKey := inputKey copyReplaceString:'Ö' withString:'OE'.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1262
        inputKey := inputKey copyReplaceString:'Ü' withString:'UE'.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1263
    ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1264
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1265
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1266
primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1267
	^primaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1268
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1269
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1270
primaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1271
	primaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1272
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1273
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1274
secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1275
	^secondaryTranslation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1276
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1277
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1278
secondaryTranslation: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1279
	secondaryTranslation := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1280
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1281
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1282
skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1283
	^skipCount
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1284
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1285
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1286
skipCount: anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1287
	skipCount := anInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1288
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1289
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1290
startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1291
	^startIndex
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1292
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1293
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1294
startIndex: anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1295
	startIndex := anObject
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1296
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1297
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1298
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1299
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1300
phoneticStringsFor:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1301
    "Private - Answers an array of alternate phonetic strings for the given input string."
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1302
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1303
    self initialize.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1304
    self inputKey:aString.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1305
    self performInitialProcessing.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1306
    self processRemainingCharacters.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1307
    ^ Array with:primaryTranslation with:secondaryTranslation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1308
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1309
    "Modified (format): / 28-07-2017 / 11:25:02 / cg"
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1310
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1311
    "
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1312
     PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'muller'
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1313
     PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'mueller' 
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1314
     PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'müller' 
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1315
    "
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1316
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1317
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1318
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1319
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1320
initialize
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1321
    super initialize.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1322
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1323
    startIndex := 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1324
    primaryTranslation := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1325
    secondaryTranslation := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1326
    skipCount := 0.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1327
    currentIndex := 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1328
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1329
    "Modified: / 28-07-2017 / 11:18:44 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1330
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1331
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1332
!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1333
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1334
addPrimaryTranslation:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1335
    primaryTranslation := (primaryTranslation , aString)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1336
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1337
    "Modified: / 28-07-2017 / 11:19:09 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1338
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1339
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1340
addSecondaryTranslation:aString 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1341
    secondaryTranslation := secondaryTranslation , aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1342
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1343
    "Modified: / 28-07-2017 / 11:17:11 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1344
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1345
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1346
isSlavoGermanic: aString
4521
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1347
        ^((aString includesAny: 'WK') or:
5235
b21db1463c69 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5212
diff changeset
  1348
                [ (aString indexOfSubCollection: 'CZ' startingAt: 1) > 0 ]) or:
b21db1463c69 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5212
diff changeset
  1349
                [ (aString indexOfSubCollection: 'WITZ' startingAt: 1) > 0 ]
4521
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1350
cfe4f333794f #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4495
diff changeset
  1351
    "Modified: / 09-10-2017 / 17:10:46 / stefan"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1352
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1353
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1354
keyAt: anInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1355
    (anInteger between:1 and:inputKey size) ifTrue: [ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1356
        ^ inputKey at: anInteger 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1357
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1358
    ^ Character space
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1359
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1360
    "Modified: / 28-07-2017 / 11:38:30 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1361
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1362
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1363
keyLeftString: lengthInteger
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1364
	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1365
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1366
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1367
keyMidString: lengthInteger from: fromInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1368
        | result from len additionalSpaces |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1369
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1370
        result := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1371
        from := fromInteger.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1372
        len := lengthInteger.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1373
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1374
        "Prepend spaces if caller is requesting characters from before the start of the string"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1375
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1376
        [ from < 1 ] whileTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1377
                [ result := result, ' '.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1378
                from := from + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1379
                len := len - 1 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1380
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1381
        from + len - 1 > inputKey size
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1382
                ifTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1383
                        [ additionalSpaces := from + len - 1 - inputKey size.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1384
                        len := inputKey size - from + 1 ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1385
                ifFalse: [ additionalSpaces := 0 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1386
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1387
        result := result, (inputKey copyFrom: from to: (from+len-1 min: inputKey size)).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1388
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1389
        [ additionalSpaces > 0 ] whileTrue:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1390
                [ result := result, ' '.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1391
                additionalSpaces := additionalSpaces - 1 ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1392
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1393
        ^result
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1394
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1395
    "Modified: / 28-07-2017 / 11:20:43 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1396
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1397
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1398
keyRightString: lengthInteger
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1399
        ^self keyMidString: lengthInteger from: inputKey size - lengthInteger + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1400
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1401
    "Modified: / 28-07-2017 / 11:20:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1402
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1403
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1404
performInitialProcessing
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1405
    |ch1|
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1406
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1407
    inputKey size > 1 ifTrue:[
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1408
        (inputKey startsWithAnyOf:#( 'GN' 'KN' 'PN' 'WR' 'PS' )) ifTrue:[
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1409
            startIndex := startIndex + 1
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1410
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1411
    ].
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1412
    
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1413
    ch1 := self keyAt:1.
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1414
    ch1 = $X ifTrue:[
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1415
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1416
            addPrimaryTranslation:'S';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1417
            addSecondaryTranslation:'S'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1418
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1419
    ].
5236
28c398151366 #BUGFIX by exept
Claus Gittinger <cg@exept.de>
parents: 5235
diff changeset
  1420
    ch1 isVowel ifTrue:[
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1421
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1422
            addPrimaryTranslation:'A';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1423
            addSecondaryTranslation:'A'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1424
        startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1425
    ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1426
4490
33b5fbfc4b5d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4489
diff changeset
  1427
    "Modified: / 01-08-2017 / 19:29:19 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1428
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1429
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1430
processB
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1431
    self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1432
        addPrimaryTranslation: 'P';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1433
        addSecondaryTranslation: 'P'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1434
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1435
    (self keyAt: (currentIndex + 1)) == $B ifTrue: [ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1436
        skipCount := skipCount + 1 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1437
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1438
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1439
    "Modified: / 28-07-2017 / 11:26:03 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1440
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1441
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1442
processC
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1443
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1444
        ((((currentIndex >= 3
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1445
                and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1446
                and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1447
                and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1448
                and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1449
                                or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1450
                                                and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1451
                        ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1452
                                [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1453
                                self addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1454
                                skipCount := skipCount + 2.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1455
                                ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1456
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1457
        "ii"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1458
        (inputKey beginsWith: 'CAESAR')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1459
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1460
                        [ self addPrimaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1461
                        self addSecondaryTranslation: 'S'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1462
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1463
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1464
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1465
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1466
        (self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1467
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1468
                        [ self addPrimaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1469
                        self addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1470
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1471
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1472
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1473
        "iv"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1474
        (self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1475
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1476
                        [ (currentIndex > 1                "a"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1477
                                        and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1478
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1479
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1480
                                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1481
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1482
                                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1483
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1484
                        (currentIndex = 1          "b"
5456
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1485
                                        and: [ (inputKey size > 5 and: [(inputKey startsWith: 'CHARAC')
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1486
                                                        or: [ (inputKey startsWith: 'CHARIS') ]] )
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1487
                                                or: [inputKey size > 4 and: [ ((((inputKey startsWith: 'CHOR')
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1488
                                                        or: [ (inputKey startsWith: 'CHYM') ])
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1489
                                                        or: [ (inputKey startsWith: 'CHIA') ])
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1490
                                                        or: [ (inputKey startsWith: 'CHEM') ])
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1491
                                                        and: [ (inputKey startsWith: 'CHORE') not ] ] ] ])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1492
                                ifTrue: [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1493
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1494
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1495
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1496
                                          ^self ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1497
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1498
                        (((((#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: 4))              "c"
5456
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1499
                                        or: [ (inputKey startsWith: 'SCH') ])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1500
                                        or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1501
                                                        includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1502
                                        or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1503
                                        or: [ ((currentIndex = 1)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1504
                                                        or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1505
                                                and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1506
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1507
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1508
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1509
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1510
                                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1511
                                          ^self ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1512
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1513
                                        [ currentIndex > 1
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1514
                                                ifTrue:
5456
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1515
                                                        [ (inputKey startsWith: 'MC')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1516
                                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1517
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1518
                                                                                                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1519
                                                                                                addSecondaryTranslation: 'K' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1520
                                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1521
                                                                                [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1522
                                                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1523
                                                                                                addSecondaryTranslation: 'K' ] ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1524
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1525
                                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1526
                                                                addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1527
                                                                addSecondaryTranslation: 'X' ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1528
                                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1529
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1530
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1531
        "v"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1532
        (self keyAt: currentIndex+1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1533
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1534
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1535
                                addPrimaryTranslation: 'S';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1536
                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1537
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1538
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1539
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1540
        "vi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1541
        (self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1542
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1543
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1544
                                addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1545
                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1546
                          skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1547
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1548
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1549
        "vii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1550
        ((self keyAt: currentIndex+1) = $C
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1551
                        and: [ ((currentIndex = 2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1552
                                and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1553
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1554
                        [ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1555
                                        and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1556
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1557
                                        [ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1558
                                                        or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1559
                                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1560
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1561
                                                                addPrimaryTranslation: 'KS';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1562
                                                                addSecondaryTranslation: 'KS'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1563
                                                         skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1564
                                                         ^self ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1565
                                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1566
                                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1567
                                                                addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1568
                                                                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1569
                                                         skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1570
                                                         ^self ] ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1571
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1572
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1573
                                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1574
                                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1575
                                          skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1576
                                          ^self ] ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1577
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1578
        "viii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1579
        (#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1580
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1581
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1582
                                addPrimaryTranslation: 'K';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1583
                                addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1584
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1585
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1586
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1587
        "ix"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1588
        (#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1589
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1590
                        [ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1591
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1592
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1593
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1594
                                                addSecondaryTranslation: 'X' ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1595
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1596
                                        [self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1597
                                                addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1598
                                                addSecondaryTranslation: 'S'].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1599
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1600
                        ^self ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1601
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1602
        "x"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1603
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1604
                addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1605
                addSecondaryTranslation: 'K'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1606
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1607
        "xi"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1608
        (#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1609
                ifTrue:
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1610
                        [ skipCount := skipCount + 2 ]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1611
                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1612
                        [ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1613
                                        and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1614
                                ifTrue: [ skipCount := skipCount + 1] ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1615
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1616
    "Modified: / 28-07-2017 / 11:29:11 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1617
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1618
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1619
processCedille 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1620
	self
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1621
		addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1622
		addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1623
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1624
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1625
processD
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1626
        "i"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1627
        (self keyAt: currentIndex+1) = $G
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1628
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1629
                        [ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1630
                                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1631
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1632
                                                addPrimaryTranslation: 'J';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1633
                                                addSecondaryTranslation: 'J'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1634
                                         skipCount := skipCount + 2.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1635
                                        ^self ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1636
                                ifFalse:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1637
                                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1638
                                                addPrimaryTranslation: 'TK';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1639
                                                addSecondaryTranslation: 'TK'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1640
                                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1641
                                        ^self ] ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1642
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1643
        "ii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1644
        (#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1645
                ifTrue:
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1646
                        [ self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1647
                                addPrimaryTranslation: 'T';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1648
                                addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1649
                          skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1650
                          ^self ].
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1651
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1652
        "iii"
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1653
        self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1654
                addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1655
                addSecondaryTranslation: 'T'
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1656
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1657
    "Modified: / 28-07-2017 / 11:27:39 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1658
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1659
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1660
processF
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1661
        self
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1662
                addPrimaryTranslation: 'F';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1663
                addSecondaryTranslation: 'F'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1664
                
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1665
        (self keyAt: currentIndex+1) = $F
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1666
                ifTrue: [ skipCount := skipCount + 1 ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1667
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1668
    "Modified (format): / 28-07-2017 / 11:29:21 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1669
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1670
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1671
processG
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1672
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1673
        case 'G':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1674
                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1675
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1676
        | word |
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1677
        (self keyAt: currentIndex + 1) = $H
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1678
        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1679
                "if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1680
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1681
                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1682
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1683
              " {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1684
                   MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1685
                   current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1686
                   break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1687
                }"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1688
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1689
                        self 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1690
                            addPrimaryTranslation: 'K';
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1691
                            addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1692
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1693
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1694
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1695
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1696
                "if(current < 3)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1697
          {"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1698
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1699
                currentIndex < 4 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1700
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1701
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1702
                        " //'ghislane', ghiradelli
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1703
               if(current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1704
               { "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1705
                        currentIndex = 1 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1706
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1707
                                "if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1708
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1709
                                (self keyAt: currentIndex + 2) = $I
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1710
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1711
                                        "MetaphAdd(J);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1712
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1713
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1714
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1715
                                        "MetaphAdd(K);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1716
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1717
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1718
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1719
                                "  current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1720
                                break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1721
                                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1722
                                ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1723
                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1724
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1725
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1726
                " //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1727
                if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1728
                //e.g., 'bough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1729
                OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1730
                //e.g., 'broughton'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1731
                OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1732
         "
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1733
                (((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1734
                or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])  
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1735
                or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])   
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1736
                ifTrue: [                         
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1737
                        "current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1738
                        break;"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1739
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1740
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1741
                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1742
                        " //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1743
               if((current > 2) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1744
               AND (GetAt(current - 1) == 'U') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1745
               AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1746
                        (currentIndex > 3 and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1747
                                ((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1748
                                        #($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1749
                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1750
                        ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1751
                                "MetaphAdd(F);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1752
                                self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1753
                                addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1754
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1755
                                " if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1756
                    MetaphAdd(K);"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1757
                                (currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1758
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1759
                                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1760
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1761
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1762
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1763
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1764
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1765
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1766
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1767
                "if(GetAt(current + 1) == 'N')"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1768
          (self keyAt: currentIndex + 1) = $N
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1769
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1770
                        "if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1771
                        (currentIndex = 2 and: [(inputKey at: 1) isVowel and: [(self isSlavoGermanic: inputKey) not]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1772
               ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1773
                                "MetaphAdd(KN, N);"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1774
                                self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1775
                                addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1776
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1777
                                " //not e.g. 'cagney'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1778
                                if(!!StringAt((current + 2), 2, EY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1779
                                AND (GetAt(current + 1) !!= 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1780
                                AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1781
                                ((inputKey size >= (currentIndex + 2)) and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1782
                                        (inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: inputKey size)) ~= 'EY' and: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1783
                                                (self keyAt: currentIndex + 1) ~= $Y and: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1784
                                                        (self isSlavoGermanic: inputKey) not
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1785
                                                ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1786
                                        ]
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1787
                                ]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1788
                                        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1789
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1790
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1791
                                        self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1792
                                        addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1793
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1794
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1795
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1796
                        ^self 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1797
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1798
                " //'tagliaro'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1799
                if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1800
                ((inputKey size >= (currentIndex + 3)) and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1801
                        (inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1802
                                (self isSlavoGermanic: inputKey) not]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1803
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1804
                        self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1805
                        addSecondaryTranslation: 'L'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1806
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1807
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1808
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1809
                " //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1810
                if((current == 0)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1811
                AND ((GetAt(current + 1) == 'Y') 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1812
                OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1813
                (currentIndex = 1 and: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1814
                        ((self keyAt: currentIndex + 1) = $Y) or: [
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1815
                        (#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes: 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1816
                                (inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1817
                ]]) ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1818
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1819
                        addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1820
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1821
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1822
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1823
                " // -ger-,  -gy-
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1824
                if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1825
                AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1826
                AND !!StringAt((current - 1), 1, E, I, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1827
                AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1828
                "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1829
          (((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: inputKey size)) = 'ER' or: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1830
                                ((self keyAt: currentIndex + 1) = $Y)]) 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1831
                        and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := inputKey copyFrom: 1 to: (6 min: inputKey size))) not)
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1832
                                and: [(self keyAt: currentIndex - 1) ~= $E
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1833
                                        and: [(#('RGY' 'OGY') includes: (inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1834
                 ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1835
                        self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1836
                        addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1837
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1838
                        ^self.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1839
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1840
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1841
          " // italian e.g, 'biaggi'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1842
           if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1843
           "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1844
                ((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size)))])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1845
                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1846
                        " //obvious germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1847
                                        if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1848
                                                OR StringAt((current + 1), 2, ET, ))                                                MetaphAdd(K);"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1849
                        word := (inputKey copyFrom: 1 to: 4).
5456
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1850
                        ((#('VAN ' 'VON ') includes: word) or: [(word startsWith: 'SCH') or: [(word startsWith: 'ET')]]) 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1851
                        ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1852
                                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1853
                                addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1854
                        ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1855
                            " //always soft if french ending
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1856
                                                if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1857
                                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1858
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1859
                                                        MetaphAdd(J, K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1860
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1861
                                        break;"
5456
3040ec2b4531 #REFACTORING by exept
Claus Gittinger <cg@exept.de>
parents: 5236
diff changeset
  1862
                                (((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: inputKey size)), '    ') startsWith: 'IER ')
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1863
                                ifTrue: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1864
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1865
                                        addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1866
                                ] ifFalse: [
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1867
                                        self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1868
                                        addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1869
                                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1870
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1871
                        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1872
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1873
                        ^self.       
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1874
                ].                      
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1875
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1876
        " if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1877
             current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1878
         else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1879
             current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1880
         MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1881
            break;"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1882
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1883
                (self keyAt: (currentIndex + 1)) = $G
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1884
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1885
                        skipCount := skipCount + 1.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1886
                ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1887
                self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1888
                addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1889
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1890
    "Modified: / 28-07-2017 / 11:31:33 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1891
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1892
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1893
processH
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1894
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1895
        case 'H':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1896
                                //only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1897
                                if(((current == 0) OR IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1898
                                        AND IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1899
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1900
                                        MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1901
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1902
                                }else//also takes care of 'HH'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1903
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1904
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1905
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1906
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1907
        (((currentIndex = 1) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1908
                or: [ (self keyAt: currentIndex - 1) isVowel]) 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1909
        and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1910
        ifTrue: [               
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1911
                self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1912
                addSecondaryTranslation: 'H'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1913
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1914
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1915
        ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1916
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1917
    "Modified: / 28-07-2017 / 11:29:52 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1918
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1919
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1920
processJ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1921
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1922
        case 'J':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1923
                                //obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1924
                                if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1925
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1926
                                        if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1927
                                                MetaphAdd(H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1928
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1929
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1930
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1931
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1932
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1933
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1934
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1935
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1936
                                if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1937
                                        MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1938
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1939
                                        //spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1940
                                        if(IsVowel(current - 1) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1941
                                                AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1942
                                                        AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1943
                                                MetaphAdd(J, H);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1944
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1945
                                                if(current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1946
                                                        MetaphAdd(J,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1947
                                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1948
                                                        if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1949
                                                                        AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1950
                                                                MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1951
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1952
                                if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1953
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1954
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1955
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1956
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  1957
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1958
        | currentWord firstWord nextLetter |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1959
        currentWord := inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1960
        firstWord := inputKey copyFrom: 1 to: (4 min: inputKey size).
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1961
        nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1962
        (currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1963
        ifTrue: [       
5212
76ae0b6f061e #TUNING by exept
Claus Gittinger <cg@exept.de>
parents: 4521
diff changeset
  1964
                ((currentIndex = 1 and: [inputKey size == 4 or: [inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1965
                        or: [firstWord = 'SAN '])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1966
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1967
                        self addPrimaryTranslation: 'H';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1968
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1969
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1970
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1971
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1972
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1973
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1974
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1975
        (currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1976
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1977
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1978
                addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1979
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1980
                ((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1981
                and: [(self isSlavoGermanic: inputKey) not and: [nextLetter == $A or: [nextLetter == $O]]])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1982
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1983
                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1984
                        addSecondaryTranslation: 'H'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1985
                ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  1986
                        currentIndex = inputKey size 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1987
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1988
                                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1989
                                addSecondaryTranslation: ' '.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1990
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1991
                                ((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1992
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1993
                                        self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1994
                                        addSecondaryTranslation: 'J'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1995
                                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1996
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1997
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  1998
        ].
3489
6ef5f530df03 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3488
diff changeset
  1999
        nextLetter == $J
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2000
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2001
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2002
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2003
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2004
    "Modified: / 28-07-2017 / 11:31:41 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2005
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2006
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2007
processK
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2008
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2009
        case 'K':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2010
                                if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2011
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2012
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2013
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2014
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2015
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2016
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2017
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2018
        (self keyAt: currentIndex + 1) = $K
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2019
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2020
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2021
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2022
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2023
        addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2024
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2025
    "Modified: / 28-07-2017 / 11:31:46 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2026
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2027
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2028
processL
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2029
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2030
"case 'L':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2031
                                if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2032
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2033
                                        //spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2034
                                        if(((current == (length - 3)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2035
                                                AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2036
                                                         OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, )) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2037
                                                                AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2038
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2039
                                                MetaphAdd(L,  );
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2040
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2041
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2042
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2043
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2044
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2045
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2046
                                MetaphAdd(L);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2047
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2048
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2049
        | currentWord |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2050
        (self keyAt: currentIndex + 1) = $L 
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2051
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2052
                (((currentIndex = (inputKey size - 2))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2053
                and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size))])])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2054
                or: [((#('AS' 'OS') includes: (inputKey copyFrom: inputKey size - 1 to: inputKey size)) or: [#($A $O) includes: (self keyAt: inputKey size)]) and: [currentWord = 'ALLE']
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2055
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2056
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2057
                        self addPrimaryTranslation: 'L';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2058
                        addSecondaryTranslation: ' '.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2059
                        skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2060
                        ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2061
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2062
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2063
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2064
        self addPrimaryTranslation: 'L';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2065
        addSecondaryTranslation: 'L'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2066
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2067
    "Modified: / 28-07-2017 / 11:32:03 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2068
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2069
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2070
processM
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2071
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2072
"case 'M':
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2073
                                if((StringAt((current - 1), 3, UMB, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2074
                                        AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2075
                                                //'dumb','thumb'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2076
                                                OR  (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2077
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2078
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2079
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2080
                                MetaphAdd(M);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2081
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2082
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2083
        (((currentIndex > 1 and: [(inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: inputKey size)) = 'UMB'])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2084
                and: [currentIndex + 1 = inputKey size or: [(inputKey copyFrom: (currentIndex + 2 min: inputKey size) to: (currentIndex + 4 min: inputKey size)) = 'ER']])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2085
                or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2086
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2087
                        skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2088
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2089
                self addPrimaryTranslation: 'M';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2090
                addSecondaryTranslation: 'M'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2091
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2092
    "Modified: / 28-07-2017 / 11:32:08 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2093
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2094
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2095
processN
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2096
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2097
        case 'N':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2098
                                if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2099
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2100
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2101
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2102
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2103
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2104
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2105
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2106
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2107
        (self keyAt: currentIndex + 1) = $N
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2108
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2109
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2110
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2111
        self addPrimaryTranslation: 'N';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2112
        addSecondaryTranslation: 'N'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2113
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2114
    "Modified: / 28-07-2017 / 11:32:14 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2115
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2116
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2117
processNtilde
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2118
        "case 'Ñ':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2119
                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2120
                                MetaphAdd(N);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2121
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2122
        "
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2123
        self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2124
        addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2125
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2126
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2127
processP
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2128
        "case 'P':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2129
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2130
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2131
                                        MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2132
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2133
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2134
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2135
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2136
                                //also account for campbell, raspberry
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2137
                                if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2138
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2139
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2140
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2141
                                        MetaphAdd(P);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2142
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2143
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2144
        | nextLetter |
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2145
        (nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2146
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2147
                self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2148
                addSecondaryTranslation: 'F'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2149
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2150
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2151
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2152
        (#($P $B) includes: nextLetter)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2153
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2154
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2155
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2156
                self addPrimaryTranslation: 'P';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2157
                addSecondaryTranslation: 'P'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2158
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2159
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2160
    "Modified: / 28-07-2017 / 11:32:28 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2161
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2162
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2163
processQ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2164
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2165
        case 'Q':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2166
                                if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2167
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2168
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2169
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2170
                                MetaphAdd(K);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2171
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2172
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2173
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2174
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2175
        (self keyAt: currentIndex + 1) = $Q
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2176
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2177
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2178
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2179
        self addPrimaryTranslation: 'K';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2180
        addSecondaryTranslation: 'K'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2181
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2182
    "Modified: / 28-07-2017 / 11:32:32 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2183
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2184
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2185
processR
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2186
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2187
        case 'R':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2188
                                //french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2189
                                if((current == last)
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2190
                                        AND !!SlavoGermanic()
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2191
                                                AND StringAt((current - 2), 2, IE, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2192
                                                        AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2193
                                        MetaphAdd(, R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2194
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2195
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2196
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2197
                                if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2198
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2199
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2200
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2201
                                break;
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2202
        "
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2203
        (currentIndex = inputKey size and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2204
                (self isSlavoGermanic: inputKey) not and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2205
                        (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2206
                                (#('ME' 'MA') includes: (inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2207
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2208
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2209
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2210
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2211
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2212
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2213
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2214
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2215
                addSecondaryTranslation: 'R'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2216
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2217
        (self keyAt: currentIndex + 1) = $R
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2218
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2219
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2220
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2221
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2222
    "Modified: / 28-07-2017 / 11:32:37 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2223
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2224
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2225
processRemainingCharacters
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2226
    startIndex to: inputKey size do:[ :i | 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2227
        | c methodSelector |
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2228
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2229
        skipCount = 0 ifTrue:[ 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2230
            ((primaryTranslation size > 4) and: [ secondaryTranslation size > 4 ])
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2231
                ifTrue: [ ^self ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2232
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2233
            currentIndex := i.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2234
            c := self keyAt: i.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2235
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2236
            (c isVowel not and: [c ~= $Y]) ifTrue:[ 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2237
                c == $Ç ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2238
                    methodSelector := #processCedille 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2239
                ] ifFalse: [ c == $Ñ ifTrue: [ 
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2240
                    methodSelector := #processNtilde 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2241
                ] ifFalse: [ 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2242
                    methodSelector := ('process', c asString) asSymbol 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2243
                ]].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2244
                self perform: methodSelector 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2245
            ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2246
        ] ifFalse: [ 
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2247
            skipCount := skipCount - 1
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2248
        ] 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2249
    ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2250
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2251
    "Modified: / 28-07-2017 / 11:24:15 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2252
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2253
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2254
processS
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2255
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2256
        case 'S':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2257
                                //special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2258
                                if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2259
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2260
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2261
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2262
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2263
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2264
                                //special case 'sugar-'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2265
                                if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2266
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2267
                                        MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2268
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2269
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2270
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2271
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2272
                                if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2273
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2274
                                        //germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2275
                                        if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2276
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2277
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2278
                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2279
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2280
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2281
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2282
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2283
                                //italian & armenian
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2284
                                if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2285
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2286
                                        if(!!SlavoGermanic())
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2287
                                                MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2288
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2289
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2290
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2291
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2292
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2293
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2294
                                //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2295
                                //also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2296
                                if(((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2297
                                                AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2298
                                                        OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2299
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2300
                                        MetaphAdd(S, X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2301
                                        if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2302
                                                current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2303
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2304
                                                current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2305
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2306
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2307
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2308
                                if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2309
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2310
                                        //Schlesinger's rule
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2311
                                        if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2312
                                                //dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2313
                                                if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2314
                                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2315
                                                        //'schermerhorn', 'schenker'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2316
                                                        if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2317
                                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2318
                                                                MetaphAdd(X, SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2319
                                                        }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2320
                                                                MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2321
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2322
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2323
                                                }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2324
                                                        if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2325
                                                                MetaphAdd(X, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2326
                                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2327
                                                                MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2328
                                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2329
                                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2330
                                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2331
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2332
                                        if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2333
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2334
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2335
                                                current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2336
                                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2337
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2338
                                        //else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2339
                                        MetaphAdd(SK);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2340
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2341
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2342
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2343
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2344
                                //french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2345
                                if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2346
                                        MetaphAdd(, S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2347
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2348
                                        MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2349
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2350
                                if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2351
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2352
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2353
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2354
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2355
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2356
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2357
        | nextChar char2 chars char |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2358
        (#('ISL' 'YSL') includes: (inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: inputKey size))) 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2359
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2360
                ^self
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2361
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2362
        (currentIndex = 1 and: [(inputKey copyFrom: 1 to: (5 min: inputKey size)) = 'SUGAR'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2363
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2364
                self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2365
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2366
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2367
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2368
        (inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SH'
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2369
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2370
                (#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (inputKey copyFrom: (currentIndex + 1 min: inputKey size) to: ((currentIndex + 5) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2371
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2372
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2373
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2374
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2375
                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2376
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2377
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2378
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2379
                ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2380
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2381
        ((#('SIO' 'SIA') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 2 min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2382
                or: [(inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size)) = 'SIAN'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2383
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2384
                (self isSlavoGermanic: inputKey) not
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2385
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2386
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2387
                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2388
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2389
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2390
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2391
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2392
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2393
                ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2394
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2395
        ((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2396
                or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2397
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2398
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2399
                addSecondaryTranslation: 'X'.
3488
5a69e672d7f8 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 3185
diff changeset
  2400
                nextChar == $Z
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2401
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2402
                    skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2403
                        ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2404
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2405
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2406
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2407
        ((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SC')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2408
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2409
                (char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2410
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2411
                        (#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := inputKey copyFrom: ((currentIndex + 3) min: inputKey size) to: ((currentIndex + 4) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2412
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2413
                                (#('ER' 'EN') includes: chars)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2414
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2415
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2416
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2417
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2418
                                        self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2419
                                        addSecondaryTranslation: 'SK'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2420
                                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2421
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2422
                                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2423
                        ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2424
                                ((currentIndex = 1 and: [(char := inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2425
                                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2426
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2427
                                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2428
                                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2429
                                        self addPrimaryTranslation: 'X';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2430
                                        addSecondaryTranslation: 'X'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2431
                                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2432
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2433
                                ^self .
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2434
                        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2435
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2436
                        (#($I $E $Y) includes: char2)
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2437
                        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2438
                                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2439
                                addSecondaryTranslation: 'S'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2440
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2441
                                ^self .
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2442
                        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2443
                                self addPrimaryTranslation: 'SK';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2444
                                addSecondaryTranslation: 'SK'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2445
                                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2446
                                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2447
                        ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2448
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2449
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2450
        (currentIndex = inputKey size and: [(#('AI' 'OI') includes: (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2451
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2452
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2453
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2454
        ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2455
                self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2456
                addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2457
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2458
        (#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2459
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2460
            skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2461
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2462
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2463
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2464
    "Modified: / 28-07-2017 / 11:34:18 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2465
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2466
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2467
processT
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2468
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2469
        case 'T':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2470
                                if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2471
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2472
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2473
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2474
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2475
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2476
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2477
                                if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2478
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2479
                                        MetaphAdd(X);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2480
                                        current += 3;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2481
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2482
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2483
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2484
                                if(StringAt(current, 2, TH, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2485
                                        OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2486
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2487
                                        //special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2488
                                        if(StringAt((current + 2), 2, OM, AM, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2489
                                                OR StringAt(0, 4, VAN , VON , ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2490
                                                        OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2491
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2492
                                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2493
                                        }else{
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2494
                                                MetaphAdd(0, T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2495
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2496
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2497
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2498
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2499
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2500
                                if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2501
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2502
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2503
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2504
                                MetaphAdd(T);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2505
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2506
"
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2507
        ((inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: inputKey size)) = 'TION')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2508
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2509
                self addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2510
                addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2511
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2512
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2513
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2514
        (#('TIA' 'TCH') includes: (inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2515
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2516
                self addPrimaryTranslation: 'X';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2517
                addSecondaryTranslation: 'X'. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2518
                skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2519
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2520
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2521
        (((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'TH') or: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2522
                ((inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)) = 'TTH')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2523
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2524
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2525
                ((#('OM' 'AM') includes: (inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2526
                or: [(#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: (4 min: inputKey size)))
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2527
                        or: [(inputKey copyFrom: 1 to: (3 min: inputKey size)) = 'SCH']
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2528
                        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2529
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2530
                        self addPrimaryTranslation: 'T';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2531
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2532
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2533
                        self addPrimaryTranslation: '0';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2534
                        addSecondaryTranslation: 'T'.   
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2535
                ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2536
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2537
                ^self.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2538
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2539
        (#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2540
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2541
                skipCount := skipCount + 1.
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2542
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2543
        self addPrimaryTranslation: 'T';
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2544
        addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2545
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2546
    "Modified: / 28-07-2017 / 11:33:33 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2547
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2548
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2549
processV
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2550
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2551
        case 'V':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2552
                                if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2553
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2554
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2555
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2556
                                MetaphAdd(F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2557
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2558
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2559
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2560
        "
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2561
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2562
        (self keyAt: currentIndex + 1) = $V
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2563
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2564
                skipCount := skipCount + 1
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2565
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2566
        self addPrimaryTranslation: 'F';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2567
        addSecondaryTranslation: 'F'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2568
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2569
    "Modified: / 28-07-2017 / 11:34:27 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2570
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2571
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2572
processW
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2573
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2574
        case 'W':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2575
                                //can also be in middle of word
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2576
                                if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2577
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2578
                                        MetaphAdd(R);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2579
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2580
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2581
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2582
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2583
                                if((current == 0) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2584
                                        AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2585
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2586
                                        //Wasserman should match Vasserman
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2587
                                        if(IsVowel(current + 1))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2588
                                                MetaphAdd(A, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2589
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2590
                                                //need Uomo to match Womo
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2591
                                                MetaphAdd(A);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2592
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2593
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2594
                                //Arnow should match Arnoff
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2595
                                if(((current == last) AND IsVowel(current - 1)) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2596
                                        OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2597
                                                        OR StringAt(0, 3, SCH, ))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2598
                                  {
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2599
                                        MetaphAdd(, F);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2600
                                        current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2601
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2602
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2603
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2604
                                //polish e.g. 'filipowicz'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2605
                                if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2606
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2607
                                        MetaphAdd(TS, FX);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2608
                                        current +=4;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2609
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2610
                                }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2611
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2612
                                //else skip it
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2613
                                current +=1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2614
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2615
"
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2616
        | word nextLetter |
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2617
        ((word := inputKey copyFrom: currentIndex to: (currentIndex + 1 min: inputKey size)) = 'WR')
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2618
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2619
                self addPrimaryTranslation: 'R';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2620
                addSecondaryTranslation: 'R'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2621
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2622
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2623
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2624
        ((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2625
                word = 'WH'
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2626
        ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2627
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2628
                nextLetter isVowel
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2629
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2630
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2631
                        addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2632
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2633
                        self addPrimaryTranslation: 'A';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2634
                        addSecondaryTranslation: 'A'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2635
                ]
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2636
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2637
        ((((currentIndex = inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2638
                or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: inputKey size))])
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2639
                        or: [inputKey startsWith:'SCH'])
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2640
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2641
                self addPrimaryTranslation: '';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2642
                addSecondaryTranslation: 'F'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2643
                ^self.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2644
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2645
        (#('WICZ' 'WITZ') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 4 min: inputKey size)))
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2646
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2647
                self addPrimaryTranslation: 'TS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2648
                addSecondaryTranslation: 'FX'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2649
                skipCount := skipCount + 3.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2650
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2651
        ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2652
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2653
    "Modified: / 28-07-2017 / 11:34:51 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2654
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2655
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2656
processX
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2657
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2658
        case 'X':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2659
                                //french e.g. breaux
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2660
                                if(!!((current == last) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2661
                                        AND (StringAt((current - 3), 3, IAU, EAU, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2662
                                                        OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2663
                                        MetaphAdd(KS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2664
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2665
                                if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2666
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2667
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2668
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2669
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2670
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2671
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2672
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2673
        ((currentIndex = inputKey size) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2674
        and: [(#('IAU' 'EAU') includes: (inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex)) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2675
              or: [(#('AU' 'OU') includes: (inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]]) 
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2676
        ifFalse: [
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2677
                self addPrimaryTranslation: 'KS';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2678
                addSecondaryTranslation: 'KS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2679
        ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2680
        (#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2681
        ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2682
            skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2683
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2684
        ]
2580
7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-)
Claus Gittinger <cg@exept.de>
parents: 2445
diff changeset
  2685
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2686
    "Modified: / 28-07-2017 / 11:34:44 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2687
!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2688
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2689
processZ
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2690
        "http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2691
        case 'Z':
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2692
                                //chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2693
                                if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2694
                                {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2695
                                        MetaphAdd(J);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2696
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2697
                                        break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2698
                                }else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2699
                                        if(StringAt((current + 1), 2, ZO, ZI, ZA, ) 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2700
                                                OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2701
                                        {
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2702
                                                MetaphAdd(S, TS);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2703
                                        }
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2704
                                        else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2705
                                                MetaphAdd(S);
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2706
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2707
                                if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2708
                                        current += 2;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2709
                                else
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2710
                                        current += 1;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2711
                                break;
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2712
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  2713
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2714
        (self keyAt: currentIndex + 1) = $H
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2715
        ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2716
                self addPrimaryTranslation: 'J';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2717
                addSecondaryTranslation: 'J'.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2718
                skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2719
                ^self
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2720
        ] ifFalse: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2721
                ((#('ZO' 'ZI' 'ZA') includes: (inputKey copyFrom: ((currentIndex + 1) min: inputKey size) to: ((currentIndex + 2) min: inputKey size))) or: [
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2722
                        (self isSlavoGermanic: inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2723
                ])
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2724
                ifTrue: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2725
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2726
                        addSecondaryTranslation: 'TS'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2727
                ] ifFalse: [
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2728
                        self addPrimaryTranslation: 'S';
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2729
                        addSecondaryTranslation: 'S'.
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2730
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2731
                (self keyAt: currentIndex + 1) = $Z
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2732
                ifTrue: [
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2733
                    skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2734
                        ^self 
2213
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2735
                ].
d465fa29df0e *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2211
diff changeset
  2736
        ]
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2737
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2738
    "Modified: / 28-07-2017 / 11:35:12 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2739
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  2740
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2741
!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2742
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2743
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2744
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2745
    There are many extended and enhanced soundex variants around;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2746
    here is one, called 'extended soundex'. It is destribed for example in
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2747
    http://www.epidata.dk/documentation.php.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2748
    An author or origin is unknown.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2749
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2750
    The number of digits is increased to 5 or 8;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2751
    The first character is not used literally; instead it is encoded like the rest.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2752
    This might have a negative effect on names starting with a vovel, though.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2753
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2754
    Overall, it can be doubted if this is really an enhancement after all.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2755
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2756
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2757
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2758
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2759
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2760
phoneticStringsFor:aString
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2761
    "generates both an extended soundex of length 5 and one of length 8"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2762
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2763
    |first second u t prevCode|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2764
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2765
    u := aString asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2766
    first := second := ''.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2767
    u do:[:c | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2768
        t := self translate:c.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2769
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2770
            first := first , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2771
            second := second , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2772
            second size == 8 ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2773
                ^ Array with:(first copyTo:5) with:second 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2774
            ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2775
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2776
        prevCode := t
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2777
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2778
    [ first size < 5 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2779
        first := first , '0'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2780
        second := second , '0'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2781
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2782
    [ second size < 8 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2783
        second := second , '0'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2784
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2785
    ^ Array with:first with:second
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2786
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2787
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2788
     self basicNew phoneticStringsFor:'müller'  #('87900' '87900000')  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2789
     self basicNew phoneticStringsFor:'miller'  #('87900' '87900000')   
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2790
     self basicNew phoneticStringsFor:'muller'  #('87900' '87900000')    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2791
     self basicNew phoneticStringsFor:'muler'   #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2792
     self basicNew phoneticStringsFor:'schmidt'    #('38600' '38600000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2793
     self basicNew phoneticStringsFor:'schneider'  #('38690' '38690000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2794
     self basicNew phoneticStringsFor:'fischer'    #('23900' '23900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2795
     self basicNew phoneticStringsFor:'weber'      #('19000' '19000000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2796
     self basicNew phoneticStringsFor:'meyer'      #('89000' '89000000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2797
     self basicNew phoneticStringsFor:'wagner'     #('48900' '48900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2798
     self basicNew phoneticStringsFor:'schulz'     #('37500' '37500000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2799
     self basicNew phoneticStringsFor:'becker'     #('13900' '13900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2800
     self basicNew phoneticStringsFor:'hoffmann'   #('28800' '28800000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2801
     self basicNew phoneticStringsFor:'schäfer'    #('32900' '32900000')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2802
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2803
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2804
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2805
!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2806
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2807
translate:aCharacter
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2808
    "use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2809
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2810
    "vowels serve as separators"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2811
    aCharacter == $A ifTrue:[^ '0' ].         
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2812
    aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2813
    aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2814
    aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2815
    aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2816
    aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2817
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2818
    aCharacter == $B ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2819
    aCharacter == $P ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2820
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2821
    aCharacter == $F ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2822
    aCharacter == $V ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2823
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2824
    aCharacter == $C ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2825
    aCharacter == $S ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2826
    aCharacter == $K ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2827
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2828
    aCharacter == $G ifTrue:[^ '4' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2829
    aCharacter == $J ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2830
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2831
    aCharacter == $Q ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2832
    aCharacter == $X ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2833
    aCharacter == $Z ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2834
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2835
    aCharacter == $D ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2836
    aCharacter == $G ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2837
    aCharacter == $T ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2838
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2839
    aCharacter == $L ifTrue:[^ '7' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2840
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2841
    aCharacter == $M ifTrue:[^ '8' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2842
    aCharacter == $N ifTrue:[^ '8' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2843
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2844
    aCharacter == $R ifTrue:[^ '9' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2845
    ^ nil
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2846
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2847
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2848
!PhoneticStringUtilities::SingleResultPhoneticStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2849
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2850
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2851
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2852
    documentation to be added.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2853
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2854
    [author:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2855
        cg
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2856
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2857
    [instance variables:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2858
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2859
    [class variables:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2860
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2861
    [see also:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2862
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2863
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2864
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2865
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2866
!PhoneticStringUtilities::SingleResultPhoneticStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2867
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2868
encode:word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2869
    ^ self subclassResponsibility
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2870
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2871
    "Created: / 28-07-2017 / 15:20:49 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2872
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2873
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2874
phoneticStringsFor:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2875
    ^ Array with:(self encode:word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2876
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2877
    "Created: / 28-07-2017 / 15:20:38 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2878
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2879
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2880
!PhoneticStringUtilities::MRAStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2881
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2882
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2883
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2884
    Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2885
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2886
    The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2887
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2888
    [see also:]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2889
        https://en.wikipedia.org/wiki/Match_Rating_Approach
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2890
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2891
        G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2892
            ''Accessing Individual Records from Personal Data Files Using Nonunique Identifiers'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2893
            US National Institute of Standards and Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2894
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2895
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2896
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2897
rCode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2898
"<<END
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2899
## Copyright (c) 2015, James P. Howard, II <jh@jameshoward.us>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2900
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2901
## Redistribution and use in source and binary forms, with or without
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2902
## modification, are permitted provided that the following conditions are
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2903
## met:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2904
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2905
##     Redistributions of source code must retain the above copyright
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2906
##     notice, this list of conditions and the following disclaimer.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2907
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2908
##     Redistributions in binary form must reproduce the above copyright
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2909
##     notice, this list of conditions and the following disclaimer in
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2910
##     the documentation and/or other materials provided with the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2911
##     distribution.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2912
##
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2913
## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2914
## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2915
## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2916
## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2917
## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2918
## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2919
## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2920
## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2921
## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2922
## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2923
## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2924
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2925
#' @rdname mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2926
#' @title Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2927
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2928
#' @description
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2929
#' The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2930
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2931
#' @param word string or vector of strings to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2932
#' @param x MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2933
#' @param y MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2934
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2935
#' @details
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2936
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2937
#' The variable \code{word} is the name to be encoded.  The variable
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2938
#' \code{maxCodeLen} is \emph{not} supported in this algorithm encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2939
#' because the algorithm itself is dependent upon its six-character
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2940
#' length.  The variables \code{x} and \code{y} are MRA-encoded and are
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2941
#' compared to each other using the MRA comparison specification.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2942
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2943
#' @return The \code{mra_encode} function returns match rating approach
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2944
#' encoded character vector.  The \code{mra_compare} returns a boolean
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2945
#' vector which is \code{TRUE} if \code{x} and \code{y} pass the MRA
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2946
#' comparison test.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2947
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2948
#' @references
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2949
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2950
#' G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2951
#' \emph{Accessing Individual Records from Personal Data Files Using
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2952
#' Nonunique Identifiers,} US National Institute of Standards and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2953
#' Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2954
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2955
#' @family phonics
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2956
#'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2957
#' @examples
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2958
#' mra_encode("William")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2959
#' mra_encode(c("Peter", "Peady"))
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2960
#' mra_encode("Stevenson")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2961
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2962
#' @rdname mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2963
#' @name mra_encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2964
#' @export
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2965
mra_encode <- function(word) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2966
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2967
    ## First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2968
    word <- gsub("[^[:alpha:]]*", "", word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2969
    word <- toupper(word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2970
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2971
    ## First character of key = first character of name
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2972
    first <- substr(word, 1, 1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2973
    word <- substr(word, 2, nchar(word))
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2974
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2975
    ## Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2976
    word <- gsub("[AEIOU]", "", word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2977
    word <- paste(first, word, sep = "")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2978
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2979
    ## Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2980
    word <- gsub("([A-Z])\\1+", "\\1", word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2981
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2982
    ## If longer than 6 characters, take first and last 3...and we have
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2983
    ## to vectorize it
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2984
    for(i in 1:length(word)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2985
        if((l = nchar(word[i])) > 6) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2986
            first <- substr(word[i], 1, 3)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2987
            last <- substr(word[i], l - 2, l)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2988
            word[i] <- paste(first, last, sep = "");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2989
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2990
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2991
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2992
    return(word)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2993
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2994
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2995
#' @rdname mra
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2996
#' @name mra_compare
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2997
#' @export
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2998
mra_compare <- function(x, y) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  2999
    mra <- data.frame(x = x, y = y, sim = 0, min = 100, stringsAsFactors = FALSE)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3000
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3001
    ## Obtain the minimum rating value by calculating the length sum of
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3002
    ## the encoded strings and using table A (from Wikipedia).  We start
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3003
    ## by setting the minimum to be the sum and move from there.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3004
    mra$lensum <- nchar(mra$x) + nchar(mra$y)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3005
    mra$min[mra$lensum == 12] <- 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3006
    mra$min[mra$lensum > 7 && mra$lensum <= 11] <- 3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3007
    mra$min[mra$lensum > 4 && mra$lensum <= 7] <- 4
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3008
    mra$min[mra$lensum <= 4] <- 5
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3009
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3010
    ## If the length difference between the encoded strings is 3 or
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3011
    ## greater, then no similarity comparison is done.  For us, we
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3012
    ## continue the similarity comparison out of laziness and ensure the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3013
    ## minimum is impossibly high to meet.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3014
    mra$min[abs(nchar(mra$x) - nchar(mra$y)) >= 3] <- 100
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3015
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3016
    ## Start the comparison.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3017
    x <- strsplit(mra$x, split = "")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3018
    y <- strsplit(mra$y, split = "")
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3019
    rows <- nrow(mra)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3020
    for(i in 1:rows) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3021
        ## Process the encoded strings from left to right and remove any
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3022
        ## identical characters found from both strings respectively.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3023
        j <- 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3024
        while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3025
            if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3026
                x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3027
                y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3028
            } else
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3029
                j <- j + 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3030
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3031
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3032
        ## Process the unmatched characters from right to left and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3033
        ## remove any identical characters found from both names
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3034
        ## respectively.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3035
        x[[i]] <- rev(x[[i]])
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3036
        y[[i]] <- rev(y[[i]])
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3037
        j <- 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3038
        while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3039
            if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3040
                x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3041
                y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3042
            } else
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3043
                j <- j + 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3044
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3045
        ## Subtract the number of unmatched characters from 6 in the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3046
        ## longer string. This is the similarity rating.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3047
        len <- min(length(x[[i]]), length(y[[i]]))
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3048
        mra$sim[i] <- 6 - len
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3049
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3050
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3051
    ## If the similarity is greater than or equal to the minimum
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3052
    ## required, it is a successful match.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3053
    mra$match <- (mra$sim >= mra$min)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3054
    return(mra$match)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3055
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3056
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3057
END>>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3058
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3059
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3060
!PhoneticStringUtilities::MRAStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3061
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3062
encode:wordIn 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3063
    "see https://en.wikipedia.org/wiki/Match_Rating_Approach"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3064
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3065
    |word prev|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3066
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3067
    word := wordIn.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3068
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3069
    "/ First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3070
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3071
    word := word select:#isLetter thenCollect:#asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3072
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3073
    "/ Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3074
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3075
    word := word first asString , ((word from:2) reject:#isVowel).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3076
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3077
    "/ Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3078
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3079
    prev := nil.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3080
    word := word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3081
                collect:[:char |
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3082
                    char == prev ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3083
                        $*
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3084
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3085
                        prev := char.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3086
                        char.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3087
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3088
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3089
                thenSelect:[:char | char ~~ $*].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3090
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3091
    "/ If longer than 6 characters, take first and last 3            
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3092
    word size > 6 ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3093
        word := (word copyFirst:3),(word copyLast:3)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3094
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3095
    ^ word.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3096
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3097
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3098
     self new encode:'Catherine'            -> 'CTHRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3099
     self new encode:'CatherineCatherine'   -> 'CTHHRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3100
     self new encode:'Butter'               -> 'BTR'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3101
     self new encode:'Byrne'                -> 'BYRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3102
     self new encode:'Boern'                -> 'BRN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3103
     self new encode:'Smith'                -> 'SMTH'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3104
     self new encode:'Smyth'                -> 'SMYTH'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3105
     self new encode:'Kathryn'              -> 'KTHRYN'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3106
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3107
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3108
    "Created: / 28-07-2017 / 15:19:22 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3109
    "Modified (comment): / 31-07-2017 / 15:14:31 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3110
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3111
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3112
!PhoneticStringUtilities::MetaphoneStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3113
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3114
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3115
"
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3116
   Ongoing work - do not use at the moment
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3117
   
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3118
   Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3119
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3120
   Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3121
   Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3122
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3123
    Hanging on the Metaphone by Lawrence Philips in Computer Language of Dec. 1990, p 39.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3124
    Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3125
    https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm6
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3126
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3127
  They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3128
  For more information, see https://issues.apache.org/jira/browse/CODEC-57
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3129
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3130
  Metaphone uses the following rules:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3131
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3132
    Doubled letters except 'c' -> drop 2nd letter.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3133
    Vowels are only kept when they are the first letter.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3134
    B -> B unless at the end of a word after 'm' as in 'dumb'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3135
    C -> X (sh) if -cia- or -ch-
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3136
    S if -ci-, -ce- or -cy-
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3137
    K otherwise, including -sch-
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3138
    D -> J if in -dge-, -dgy- or -dgi-; T otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3139
    F -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3140
    G -> silent if in -gh- and not at end or before a vowel in -gn- or -gned- (also see dge etc. above)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3141
    J if before i or e or y if not double gg; K otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3142
    H -> silent if after vowel and no vowel follows; H otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3143
    J -> J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3144
    K -> silent if after 'c'; K otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3145
    L -> L
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3146
    M -> M
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3147
    N -> N
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3148
    P -> F if before 'h'; P otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3149
    Q -> K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3150
    R -> R
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3151
    S -> X (sh) if before 'h' or in -sio- or -sia-; S otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3152
    T -> X (sh) if -tia- or -tio- 0 (th) if before 'h' silent if in -tch-; T otherwise
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3153
    V -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3154
    W -> silent if not followed by a vowel W if followed by a vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3155
    X -> KS
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3156
    Y -> silent if not followed by a vowel Y if followed by a vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3157
    Z -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3158
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3159
    Initial Letter Exceptions
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3160
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3161
    Initial kn-, gn- pn, ae- or wr- -> drop first letter
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3162
    Initial x- -> change to 's'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3163
    Initial wh- -> change to 'w'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3164
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3165
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3166
     self new encode:'a'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3167
     self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3168
     self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3169
     self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3170
     self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3171
     self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3172
     self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3173
     self new encode:'weber'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3174
     self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3175
     self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3176
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3177
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3178
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3179
javaCode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3180
"<<END
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3181
/*
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3182
 * Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3183
 * contributor license agreements.  See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3184
 * this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3185
 * The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3186
 * (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3187
 * the License.  You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3188
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3189
 *      http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3190
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3191
 * Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3192
 * distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3193
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3194
 * See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3195
 * limitations under the License.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3196
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3197
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3198
package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3199
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3200
import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3201
import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3202
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3203
/**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3204
 * Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3205
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3206
 * Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3207
 * Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3208
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3209
 * <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3210
 * p 39.</CITE>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3211
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3212
 * Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3213
 * </p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3214
 * <ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3215
 * <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3216
 *  (broken link 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3217
 * <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3218
 *  (link checked 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3219
 * </ul>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3220
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3221
 * They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3222
 * For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3223
 * <p>
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3224
 * This class is conditionally thread-safe.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3225
 * The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3226
 * but is not volatile, and accesses are not synchronized.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3227
 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3228
 * is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3229
 * after initial setup.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3230
 *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3231
 * @version $Id$
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3232
 */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3233
public class Metaphone implements StringEncoder {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3234
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3235
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3236
     * Five values in the English language
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3237
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3238
    private static final String VOWELS = "AEIOU";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3239
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3240
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3241
     * Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3242
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3243
    private static final String FRONTV = "EIY";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3244
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3245
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3246
     * Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3247
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3248
    private static final String VARSON = "CSPTG";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3249
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3250
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3251
     * The max code length for metaphone is 4
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3252
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3253
    private int maxCodeLen = 4;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3254
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3255
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3256
     * Creates an instance of the Metaphone encoder
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3257
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3258
    public Metaphone() {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3259
        super();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3260
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3261
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3262
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3263
     * Find the metaphone value of a String. This is similar to the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3264
     * soundex algorithm, but better at finding similar sounding words.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3265
     * All input is converted to upper case.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3266
     * Limitations: Input format is expected to be a single ASCII word
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3267
     * with only characters in the A - Z range, no punctuation or numbers.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3268
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3269
     * @param txt String to find the metaphone code for
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3270
     * @return A metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3271
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3272
    public String metaphone(final String txt) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3273
        boolean hard = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3274
        int txtLength;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3275
        if (txt == null || (txtLength = txt.length()) == 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3276
            return "";
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3277
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3278
        // single character is itself
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3279
        if (txtLength == 1) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3280
            return txt.toUpperCase(java.util.Locale.ENGLISH);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3281
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3282
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3283
        final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3284
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3285
        final StringBuilder local = new StringBuilder(40); // manipulate
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3286
        final StringBuilder code = new StringBuilder(10); //   output
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3287
        // handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3288
        switch(inwd[0]) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3289
        case 'K':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3290
        case 'G':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3291
        case 'P': /* looking for KN, etc*/
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3292
            if (inwd[1] == 'N') {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3293
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3294
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3295
                local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3296
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3297
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3298
        case 'A': /* looking for AE */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3299
            if (inwd[1] == 'E') {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3300
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3301
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3302
                local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3303
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3304
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3305
        case 'W': /* looking for WR or WH */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3306
            if (inwd[1] == 'R') {   // WR -> R
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3307
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3308
                break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3309
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3310
            if (inwd[1] == 'H') {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3311
                local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3312
                local.setCharAt(0, 'W'); // WH -> W
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3313
            } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3314
                local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3315
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3316
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3317
        case 'X': /* initial X becomes S */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3318
            inwd[0] = 'S';
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3319
            local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3320
            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3321
        default:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3322
            local.append(inwd);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3323
        } // now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3324
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3325
        final int wdsz = local.length();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3326
        int n = 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3327
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3328
        while (code.length() < this.getMaxCodeLen() &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3329
               n < wdsz ) { // max code size of 4 works well
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3330
            final char symb = local.charAt(n);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3331
            // remove duplicate letters except C
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3332
            if (symb !!= 'C' && isPreviousChar( local, n, symb ) ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3333
                n++;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3334
            } else { // not dup
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3335
                switch(symb) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3336
                case 'A':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3337
                case 'E':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3338
                case 'I':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3339
                case 'O':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3340
                case 'U':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3341
                    if (n == 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3342
                        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3343
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3344
                    break; // only use vowel if leading char
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3345
                case 'B':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3346
                    if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3347
                         isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3348
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3349
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3350
                    code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3351
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3352
                case 'C': // lots of C special cases
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3353
                    /* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3354
                    if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3355
                         !!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3356
                         FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3357
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3358
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3359
                    if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3360
                        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3361
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3362
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3363
                    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3364
                        FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3365
                        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3366
                        break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3367
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3368
                    if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3369
                        isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3370
                        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3371
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3372
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3373
                    if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3374
                        if (n == 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3375
                            wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3376
                            isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3377
                            code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3378
                        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3379
                            code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3380
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3381
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3382
                        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3383
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3384
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3385
                case 'D':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3386
                    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3387
                        isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3388
                        FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3389
                        code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3390
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3391
                        code.append('T');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3392
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3393
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3394
                case 'G': // GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3395
                    if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3396
                        isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3397
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3398
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3399
                    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3400
                        isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3401
                        !!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3402
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3403
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3404
                    if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3405
                        ( regionMatch(local, n, "GN") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3406
                          regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3407
                        break; // silent G
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3408
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3409
                    if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3410
                        // NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3411
                        hard = true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3412
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3413
                        hard = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3414
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3415
                    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3416
                        FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3417
                        !!hard) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3418
                        code.append('J');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3419
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3420
                        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3421
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3422
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3423
                case 'H':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3424
                    if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3425
                        break; // terminal H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3426
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3427
                    if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3428
                        VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3429
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3430
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3431
                    if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3432
                        code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3433
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3434
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3435
                case 'F':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3436
                case 'J':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3437
                case 'L':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3438
                case 'M':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3439
                case 'N':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3440
                case 'R':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3441
                    code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3442
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3443
                case 'K':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3444
                    if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3445
                        if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3446
                            code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3447
                        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3448
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3449
                        code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3450
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3451
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3452
                case 'P':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3453
                    if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3454
                        // PH -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3455
                        code.append('F');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3456
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3457
                        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3458
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3459
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3460
                case 'Q':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3461
                    code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3462
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3463
                case 'S':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3464
                    if (regionMatch(local,n,"SH") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3465
                        regionMatch(local,n,"SIO") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3466
                        regionMatch(local,n,"SIA")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3467
                        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3468
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3469
                        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3470
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3471
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3472
                case 'T':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3473
                    if (regionMatch(local,n,"TIA") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3474
                        regionMatch(local,n,"TIO")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3475
                        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3476
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3477
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3478
                    if (regionMatch(local,n,"TCH")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3479
                        // Silent if in "TCH"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3480
                        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3481
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3482
                    // substitute numeral 0 for TH (resembles theta after all)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3483
                    if (regionMatch(local,n,"TH")) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3484
                        code.append('0');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3485
                    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3486
                        code.append('T');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3487
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3488
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3489
                case 'V':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3490
                    code.append('F'); break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3491
                case 'W':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3492
                case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3493
                    if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3494
                        isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3495
                        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3496
                    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3497
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3498
                case 'X':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3499
                    code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3500
                    code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3501
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3502
                case 'Z':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3503
                    code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3504
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3505
                default:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3506
                    // do nothing
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3507
                    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3508
                } // end switch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3509
                n++;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3510
            } // end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3511
            if (code.length() > this.getMaxCodeLen()) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3512
                code.setLength(this.getMaxCodeLen());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3513
            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3514
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3515
        return code.toString();
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3516
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3517
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3518
    private boolean isVowel(final StringBuilder string, final int index) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3519
        return VOWELS.indexOf(string.charAt(index)) >= 0;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3520
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3521
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3522
    private boolean isPreviousChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3523
        boolean matches = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3524
        if( index > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3525
            index < string.length() ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3526
            matches = string.charAt(index - 1) == c;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3527
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3528
        return matches;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3529
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3530
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3531
    private boolean isNextChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3532
        boolean matches = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3533
        if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3534
            index < string.length() - 1 ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3535
            matches = string.charAt(index + 1) == c;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3536
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3537
        return matches;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3538
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3539
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3540
    private boolean regionMatch(final StringBuilder string, final int index, final String test) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3541
        boolean matches = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3542
        if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3543
            index + test.length() - 1 < string.length() ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3544
            final String substring = string.substring( index, index + test.length());
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3545
            matches = substring.equals( test );
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3546
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3547
        return matches;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3548
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3549
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3550
    private boolean isLastChar(final int wdsz, final int n) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3551
        return n + 1 == wdsz;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3552
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3553
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3554
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3555
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3556
     * Encodes an Object using the metaphone algorithm.  This method
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3557
     * is provided in order to satisfy the requirements of the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3558
     * Encoder interface, and will throw an EncoderException if the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3559
     * supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3560
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3561
     * @param obj Object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3562
     * @return An object (or type java.lang.String) containing the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3563
     *         metaphone code which corresponds to the String supplied.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3564
     * @throws EncoderException if the parameter supplied is not
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3565
     *                          of type java.lang.String
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3566
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3567
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3568
    public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3569
        if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3570
            throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3571
        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3572
        return metaphone((String) obj);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3573
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3574
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3575
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3576
     * Encodes a String using the Metaphone algorithm.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3577
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3578
     * @param str String object to encode
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3579
     * @return The metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3580
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3581
    @Override
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3582
    public String encode(final String str) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3583
        return metaphone(str);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3584
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3585
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3586
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3587
     * Tests is the metaphones of two strings are identical.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3588
     *
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3589
     * @param str1 First of two strings to compare
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3590
     * @param str2 Second of two strings to compare
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3591
     * @return <code>true</code> if the metaphones of these strings are identical,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3592
     *        <code>false</code> otherwise.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3593
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3594
    public boolean isMetaphoneEqual(final String str1, final String str2) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3595
        return metaphone(str1).equals(metaphone(str2));
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3596
    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3597
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3598
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3599
     * Returns the maxCodeLen.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3600
     * @return int
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3601
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3602
    public int getMaxCodeLen() { return this.maxCodeLen; }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3603
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3604
    /**
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3605
     * Sets the maxCodeLen.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3606
     * @param maxCodeLen The maxCodeLen to set
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3607
     */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3608
    public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3609
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3610
}
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3611
END>>"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3612
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3613
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3614
!PhoneticStringUtilities::MetaphoneStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3615
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3616
encode:txt
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3617
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3618
     self new encode:'a'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3619
     self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3620
     self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3621
     self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3622
     self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3623
     self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3624
     self new encode:'weber'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3625
     self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3626
     self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3627
     self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3628
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3629
    
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3630
    |hard txtLength local code inwd ch ch2 wdsz n|
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3631
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3632
    inwd := txt.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3633
    hard := false.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3634
    txtLength := 0.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3635
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3636
    (txtLength := txt size) == 0 ifTrue:[^ ''].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3637
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3638
    inwd := txt asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3639
    "/ single character is itself
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3640
    (txtLength == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3641
        ^ inwd        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3642
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3643
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3644
    code := '' writeStream.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3645
    local := inwd.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3646
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3647
    "/ handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3648
    ch := inwd at:(0+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3649
    ch2 := inwd at:(1+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3650
    ('KGP' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3651
        "/ looking for KN, etc
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3652
        "/ KNx -> Nx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3653
        "/ GNx -> Nx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3654
        "/ PNx -> Nx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3655
        (ch2 == $N) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3656
            local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3657
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3658
    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3659
    ('A' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3660
        "/ looking for AE
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3661
        "/ AEx -> Ex 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3662
        (ch2 == $E) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3663
            local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3664
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3665
    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3666
    ('W' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3667
        "/ looking for WR or WH 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3668
        (ch2 == $R) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3669
            "/ WRx -> Wx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3670
            local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3671
        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3672
            (ch2 == $H) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3673
                "/ // WH -> W 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3674
                local := 'W',(inwd from:2+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3675
            ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3676
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3677
    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3678
    ('X' includes:ch) ifTrue:[  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3679
        "/ initial X becomes S */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3680
        "/ Xx -> Sx 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3681
        local := 'S',(inwd from:1+1).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3682
    ]]]].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3683
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3684
    "/ now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3685
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3686
    wdsz := local size.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3687
    n := 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3688
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3689
    [ n <= wdsz ] whileTrue:[
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3690
        "/ max code size of 4 works well
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3691
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3692
        |symb prevChar nextChar nextNextChar isLastChar isPrevToLastChar|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3693
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3694
        symb := local at:n.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3695
        (n > 1) ifTrue:[ prevChar := local at:(n-1) ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3696
        (isLastChar := (n == wdsz)) ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3697
            nextChar := local at:(n+1) 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3698
        ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3699
        isPrevToLastChar := (n == (wdsz-1)).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3700
        (n+2) <= wdsz ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3701
            nextNextChar := local at:(n+2)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3702
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3703
        
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3704
        "/ remove duplicate letters except C and except first
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3705
        (symb == $C or:[ nextChar ~~ symb or:[ n == 1] ]) ifTrue:[
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3706
            "/ not dup
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3707
            ('AEIOU' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3708
                "/ only use vowel if leading char
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3709
                (n == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3710
                    code nextPut:symb
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3711
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3712
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3713
            ('B' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3714
                "/    if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3715
                "/         isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3716
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3717
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3718
                "/    code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3719
                "/    break;
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3720
                (isLastChar and:[ prevChar == $M]) ifTrue:[
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3721
                    "/ B is silent if word ends in MB 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3722
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3723
                    code nextPut:symb.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3724
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3725
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3726
            ('C' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3727
                "/ lots of C special cases    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3728
                "/    /* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3729
                "/    if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3730
                "/         !!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3731
                "/         FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3732
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3733
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3734
                "/    if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3735
                "/        code.append('X');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3736
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3737
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3738
                "/    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3739
                "/        FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3740
                "/        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3741
                "/        break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3742
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3743
                "/    if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3744
                "/        isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3745
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3746
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3747
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3748
                "/    if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3749
                "/        if (n == 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3750
                "/            wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3751
                "/            isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3752
                "/            code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3753
                "/        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3754
                "/            code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3755
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3756
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3757
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3758
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3759
                "/    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3760
                (prevChar == $S and:[ 'EIY' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3761
                    "/ discard if SCI, SCE or SCY
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3762
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3763
                    ((nextChar == $I) and:[ nextNextChar == $A ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3764
                        "/  "CIA" -> X 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3765
                        code nextPut:$X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3766
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3767
                        ('IEY' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3768
                            "/ CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3769
                            code nextPut:$S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3770
                        ] ifFalse:[ 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3771
                           ((prevChar == $S) and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3772
                               "/ SCH->sk
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3773
                                code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3774
                            ] ifFalse:[ 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3775
                                nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3776
                                    "/ CH
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3777
                                    ('AEIOU' includes:nextNextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3778
                                        code nextPut:$K "/ CH consonant -> K consonant 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3779
                                    ] ifFalse:[    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3780
                                        code nextPut:$X "/ CHvowel -> X
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3781
                                    ]    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3782
                                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3783
                                    code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3784
                                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3785
                            ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3786
                        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3787
                    ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3788
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3789
                
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3790
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3791
            ('D' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3792
                "/    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3793
                "/        isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3794
                "/        FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3795
                "/        code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3796
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3797
                "/        code.append('T');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3798
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3799
                "/    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3800
                ((nextChar == $G)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3801
                and:[ (local from:n) startsWithAnyOf:#('DGE' 'DGI' 'DGY') ])
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3802
                ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3803
                    code nextPut:$J.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3804
                    n := n + 2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3805
                ] ifFalse:[    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3806
                    code nextPut:$T.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3807
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3808
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3809
            ('G' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3810
                "/    GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3811
                "/    if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3812
                "/        isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3813
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3814
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3815
                "/    if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3816
                "/        isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3817
                "/        !!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3818
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3819
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3820
                "/    if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3821
                "/        ( regionMatch(local, n, "GN") ||
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3822
                "/          regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3823
                "/        break; // silent G
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3824
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3825
                "/    if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3826
                "/        // NOTE: Given that duplicated chars are removed, I dont see how this can ever be true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3827
                "/        hard = true;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3828
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3829
                "/        hard = false;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3830
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3831
                "/    if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3832
                "/        FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3833
                "/        !!hard) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3834
                "/        code.append('J');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3835
                "/    } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3836
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3837
                "/    }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3838
                "/    break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3839
                (isPrevToLastChar and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3840
                    "/ GH silent at end
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3841
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3842
                    (isPrevToLastChar not and:[ nextChar == $H 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3843
                      and:[ ('AEIOU' includes:nextNextChar) not ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3844
                        "/ GH silent before consonant
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3845
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3846
                        (n > 1 and:[ nextChar == $N ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3847
                            "/ GN -> silent G
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3848
                        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3849
                            hard := (prevChar == $G).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3850
                            (isLastChar not and:[ hard not and:[ ('EIY' includes:nextChar) ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3851
                                code nextPut:$J
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3852
                            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3853
                                code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3854
                            ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3855
                        ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3856
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3857
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3858
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3859
            ('H' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3860
                "/    case 'H':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3861
                "/        if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3862
                "/            break; // terminal H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3863
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3864
                "/        if (n > 0 &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3865
                "/            VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3866
                "/            break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3867
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3868
                "/        if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3869
                "/            code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3870
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3871
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3872
                isLastChar ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3873
                    "/ ignore terminal H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3874
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3875
                    ('CSPTG' includes:prevChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3876
                        "/ ignore CH, SH, PH, TH, GH (H treated there)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3877
                    ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3878
                        ('AEIOU' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3879
                            "/ Hvowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3880
                            code nextPut:$H
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3881
                        ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3882
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3883
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3884
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3885
            ('FJLMNR' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3886
                "/    case 'F':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3887
                "/    case 'J':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3888
                "/    case 'L':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3889
                "/    case 'M':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3890
                "/    case 'N':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3891
                "/    case 'R':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3892
                "/        code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3893
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3894
                code nextPut:symb.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3895
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3896
            ('K' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3897
                "/    case 'K':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3898
                "/        if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3899
                "/            if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3900
                "/                code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3901
                "/            }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3902
                "/        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3903
                "/            code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3904
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3905
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3906
                n > 1 ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3907
                    "/ not initial
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3908
                    prevChar ~~ $C ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3909
                        code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3910
                    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3911
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3912
                    code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3913
                ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3914
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3915
            ('P' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3916
                "/    case 'P':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3917
                "/        if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3918
                "/            // PH -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3919
                "/            code.append('F');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3920
                "/        } else {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3921
                "/            code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3922
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3923
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3924
                nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3925
                    "/ PH -> F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3926
                    code nextPut:$F.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3927
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3928
                    code nextPut:symb.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3929
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3930
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3931
            ('Q' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3932
                "/    case 'Q':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3933
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3934
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3935
                code nextPut:$K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3936
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3937
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3938
            ('S' includes:symb) ifTrue:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3939
                "/    case 'S':
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3940
                "/        if (regionMatch(local,n,"SH") ||
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3941
                "/            regionMatch(local,n,"SIO") ||
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3942
                "/            regionMatch(local,n,"SIA")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3943
                "/            code.append('X');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3944
                "/        } else {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3945
                "/            code.append('S');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3946
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3947
                "/        break;
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3948
                "/ SH -> X  (as in shave or ashton)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3949
                "/ SIO -> X 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3950
                "/ SIA -> X (as in ASIA)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3951
                ((nextChar == $H) 
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3952
                  or:[ ((nextChar == $I) and:[ 'OA' includes:nextNextChar])]
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3953
                ) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3954
                    code nextPut:$X
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3955
                ] ifFalse:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3956
                    code nextPut:$S
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3957
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3958
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3959
            ('T' includes:symb) ifTrue:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3960
                "/    case 'T':
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3961
                "/        if (regionMatch(local,n,"TIA") ||
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3962
                "/            regionMatch(local,n,"TIO")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3963
                "/            code.append('X');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3964
                "/            break;
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3965
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3966
                "/        if (regionMatch(local,n,"TCH")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3967
                "/            // Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3968
                "/            break;
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3969
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3970
                "/        // substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3971
                "/        if (regionMatch(local,n,"TH")) {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3972
                "/            code.append('0');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3973
                "/        } else {
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3974
                "/            code.append('T');
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3975
                "/        }
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3976
                "/        break;
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3977
                (nextChar == $I and:[ 'AO' includes:nextNextChar]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3978
                    code nextPut:$X.
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3979
                ] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3980
                    (nextChar == $C and:[ nextNextChar == $H]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3981
                        "/ Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3982
                        "/ cg - huh; hutch - methinksthereisat
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3983
                    ] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3984
                        "/ substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3985
                        nextChar == $H ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3986
                            code nextPut:$0.
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3987
                        ] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3988
                            code nextPut:$T.
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3989
                        ].    
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3990
                    ].    
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  3991
                ].    
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3992
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3993
            ('V' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3994
                "/    case 'V':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3995
                "/        code.append('F'); break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3996
                code nextPut:$F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3997
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3998
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  3999
            ('WY' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4000
                "/    case 'W':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4001
                "/    case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4002
                "/        if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4003
                "/            isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4004
                "/            code.append(symb);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4005
                "/        }
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4006
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4007
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4008
                "/ silent if not followed by vowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4009
                (isLastChar not and:[ 'AEIOU' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4010
                    code nextPut:symb
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4011
                ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4012
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4013
            ('X' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4014
                "/    case 'X':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4015
                "/        code.append('K');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4016
                "/        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4017
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4018
                code nextPutAll:'KS'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4019
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4020
            ('Z' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4021
                "/    case 'Z':
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4022
                "/        code.append('S');
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4023
                "/        break;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4024
                code nextPut:$S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4025
            ] ifFalse:[
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  4026
                "/    default:
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  4027
                "/        // do nothing
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  4028
                "/        break;
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4029
            ]]]]]]]]]]]]]]]]. "/ end switch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4030
        ]. "/ end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4031
        n := n + 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4032
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4033
    ^ code contents
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4034
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4035
    "Created: / 02-08-2017 / 09:51:31 / cg"
4495
5d2da4bddbda #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4491
diff changeset
  4036
    "Modified: / 03-08-2017 / 14:55:22 / cg"
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4037
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4038
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4039
!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4040
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4041
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4042
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4043
    WARNING: this is the so called 'simplified soundex' algorithm;
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4044
      there are more variants like miracode (american soundex) or
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4045
      mysqlSoundex around.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4046
      
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4047
      Be sure to use the correct algorithm, if the generated strings must be compatible
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4048
      (otherwise, the differences are probably too small to be noticed as effect, but
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4049
      your search will be different)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4050
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4051
    The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4052
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4053
    SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4054
    components of names, but by doing so reports more matches. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4055
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4056
    There are some variations around in the literature; 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4057
    the following is called 'simplified soundex', and the rules for coding a name are:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4058
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4059
    1. The first letter of the name is used in its un-coded form to serve as the prefix
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4060
       character of the code. (The rest of the code is numerical).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4061
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4062
    2. Thereafter, W and H are ignored entirely.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4063
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4064
    3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4065
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4066
    4. Other letters of the name are converted to a numerical equivalent:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4067
                 B, P, F, V              1 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4068
                 C, G, J, K, Q, S, X, Z  2 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4069
                 D, T                    3 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4070
                 L                       4 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4071
                 M, N                    5 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4072
                 R                       6 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4073
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4074
    5. There are two exceptions: 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4075
        1. Letters that follow prefix letters which would, if coded, have the same
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4076
           numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4077
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4078
        2. The second letter of any pair of consonants having the same code number is likewise ignored, 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4079
           i.e. unless there is a ''separator'' between them in the name.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4080
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4081
    6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4082
       Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4083
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4084
    Notice, that in another variant, w and h are treated slightly differently.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4085
    This is only of relevance, if you need to reconstruct original soundex codes of other programs
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4086
    or for the original 1880 us census data.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4087
     SoundexStringComparator  new encode:'Ashcraft' -> 'A226'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4088
    vs.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4089
     MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4090
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4091
    Also notice, that soundex deals better with english. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4092
    For german and other languages, other algorithms may provide better results.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4093
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4094
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4095
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4096
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4097
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4098
encode:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4099
    |u p t prevCode|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4100
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4101
    u := word asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4102
    p := u first asString.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4103
    prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4104
    u from:2 to:u size do:[:c | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4105
        t := self translate:c.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4106
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4107
            p := p , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4108
            p size == 4 ifTrue:[^ p ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4109
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4110
        prevCode := t
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4111
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4112
    [ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4113
        p := p , '0'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4114
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4115
    ^ (p copyFrom:1 to:4)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4116
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4117
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4118
     self new encode:'washington' -> 'W252'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4119
     self new encode:'lee'        -> 'L000'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4120
     self new encode:'Gutierrez'  -> 'G362'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4121
     self new encode:'Pfister'    -> 'P236'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4122
     self new encode:'Jackson'    -> 'J250'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4123
     self new encode:'Tymczak'    -> 'T522'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4124
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4125
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4126
    "notice:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4127
     MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4128
     self new encode:'Ashcraft'   -> 'A226'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4129
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4130
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4131
    "Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4132
    "Modified (comment): / 01-08-2017 / 19:01:43 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4133
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4134
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4135
!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4136
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4137
translate:aCharacter
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4138
    "use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4139
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4140
    "vowels serve as separators"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4141
    aCharacter == $A ifTrue:[^ '0' ].         
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4142
    aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4143
    aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4144
    aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4145
    aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4146
    aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4147
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4148
    aCharacter == $B ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4149
    aCharacter == $P ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4150
    aCharacter == $F ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4151
    aCharacter == $V ifTrue:[^ '1' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4152
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4153
    aCharacter == $C ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4154
    aCharacter == $S ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4155
    aCharacter == $K ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4156
    aCharacter == $G ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4157
    aCharacter == $J ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4158
    aCharacter == $Q ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4159
    aCharacter == $X ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4160
    aCharacter == $Z ifTrue:[^ '2' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4161
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4162
    aCharacter == $D ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4163
    aCharacter == $T ifTrue:[^ '3' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4164
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4165
    aCharacter == $L ifTrue:[^ '4' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4166
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4167
    aCharacter == $M ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4168
    aCharacter == $N ifTrue:[^ '5' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4169
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4170
    aCharacter == $R ifTrue:[^ '6' ]. 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4171
    ^ nil
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4172
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4173
    "Modified: / 02-08-2017 / 01:35:40 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4174
    "Modified (comment): / 02-08-2017 / 14:30:11 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4175
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4176
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4177
!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4178
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4179
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4180
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4181
    MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4182
    and also removing vokals first, then removing duplicate codes
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4183
    (whereas the soundex code does this in reverse order).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4184
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4185
    These variations are important, if you need the miracode soundex codes to be generated.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4186
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4187
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4188
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4189
!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4190
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4191
encode:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4192
    "same as inherited, but cares for 0, W and H"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4193
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4194
    |u p t prevCode|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4195
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4196
    u := word asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4197
    p := u first asString.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4198
    prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4199
    u from:2 to:u size do:[:c |
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4200
        t := self translate:c.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4201
        (t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4202
            p := p , t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4203
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4204
        (t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4205
            prevCode := t.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4206
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4207
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4208
    [ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4209
        p := p , '0'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4210
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4211
    ^ p
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4212
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4213
    "Created: / 28-07-2017 / 15:23:41 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4214
    "Modified: / 31-07-2017 / 17:53:51 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4215
    "Modified (comment): / 02-08-2017 / 14:31:15 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4216
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4217
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4218
!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4219
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4220
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4221
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4222
    NYSIIS Algorithm:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4223
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4224
    1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4225
        remove all ''S'' and ''Z'' chars from the end of the surname 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4226
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4227
    2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4228
        transcode initial strings
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4229
            MAC => MC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4230
            PF => F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4231
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4232
    3.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4233
        Transcode trailing strings as follows,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4234
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4235
            IX => IC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4236
            EX => EC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4237
            YE,EE,IE => Y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4238
            NT,ND => D 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4239
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4240
    4.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4241
        transcode ''EV'' to ''EF'' if not at start of name
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4242
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4243
    5.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4244
        use first character of name as first character of key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4245
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4246
    6.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4247
        remove any ''W'' that follows a vowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4248
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4249
    7.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4250
        replace all vowels with ''A'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4251
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4252
    8.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4253
        transcode ''GHT'' to ''GT'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4254
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4255
    9.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4256
        transcode ''DG'' to ''G'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4257
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4258
    10.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4259
        transcode ''PH'' to ''F'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4260
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4261
    11.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4262
        if not first character, eliminate all ''H'' preceded or followed by a vowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4263
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4264
    12.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4265
        change ''KN'' to ''N'', else ''K'' to ''C'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4266
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4267
    13.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4268
        if not first character, change ''M'' to ''N'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4269
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4270
    14.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4271
        if not first character, change ''Q'' to ''G'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4272
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4273
    15.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4274
        transcode ''SH'' to ''S'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4275
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4276
    16.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4277
        transcode ''SCH'' to ''S'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4278
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4279
    17.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4280
        transcode ''YW'' to ''Y'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4281
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4282
    18.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4283
        if not first or last character, change ''Y'' to ''A'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4284
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4285
    19.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4286
        transcode ''WR'' to ''R'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4287
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4288
    20.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4289
        if not first character, change ''Z'' to ''S'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4290
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4291
    21.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4292
        transcode terminal ''AY'' to ''Y'' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4293
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4294
    22.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4295
        remove traling vowels 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4296
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4297
    23.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4298
        collapse all strings of repeated characters 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4299
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4300
    24.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4301
        if first char of original surname was a vowel, append it to the code
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4302
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4303
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4304
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4305
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4306
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4307
encode:aString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4308
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4309
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4310
    k := self rule1:(aString asUppercase).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4311
    "2. Transcode initial strings:  MAC => MC   PF => F"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4312
    k := self rule2:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4313
    k := self rule3:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4314
    k := self rule4:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4315
    k := self rule5:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4316
    k := self rule6:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4317
    k := self rule7:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4318
    k := self rule8:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4319
    k := self rule9:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4320
    k := self rule10:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4321
    k := self rule11:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4322
    k := self rule12:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4323
    k := self rule13:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4324
    k := self rule14:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4325
    k := self rule15:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4326
    k := self rule16:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4327
    k := self rule17:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4328
    k := self rule18:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4329
    k := self rule19:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4330
    k := self rule20:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4331
    k := self rule21:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4332
    k := self rule22:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4333
    k := self rule23:k.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4334
    k := self rule24:k originalKey:aString.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4335
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4336
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4337
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4338
     self new encode:'hello'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4339
     self new encode:'bliss'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4340
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4341
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4342
     self new phoneticStringsFor:'hello'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4343
     self new phoneticStringsFor:'bliss'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4344
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4345
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4346
    "Created: / 28-07-2017 / 15:34:52 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4347
    "Modified (comment): / 02-08-2017 / 14:31:47 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4348
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4349
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4350
!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4351
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4352
rule10:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4353
    "10. transcode 'PH' to 'F' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4354
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4355
    ^ self transcodeAll:'PH' of:key to:'F' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4356
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4357
    "Modified (format): / 02-08-2017 / 14:34:27 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4358
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4359
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4360
rule11:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4361
    |k c|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4362
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4363
    "11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4364
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4365
    c := SortedCollection sortBlock:[:a :b | b < a ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4366
    2 to:key size do:[:i | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4367
        (key at:i) = $H ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4368
            ((key at:i - 1) isVowel 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4369
                or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4370
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4371
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4372
    c do:[:n | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4373
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4374
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4375
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4376
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4377
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4378
rule12:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4379
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4380
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4381
    "12. change 'KN' to 'N', else 'K' to 'C' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4382
    k := self transcodeAll:'KN' of:key to:'K' startingAt:1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4383
    k := self transcodeAll:'K' of:k to:'C' startingAt:1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4384
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4385
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4386
    "Modified (format): / 02-08-2017 / 14:34:48 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4387
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4388
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4389
rule13:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4390
    "13. if not first character, change 'M' to 'N' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4391
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4392
    ^ self transcodeAll:'M' of:key to:'N' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4393
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4394
    "Modified (format): / 02-08-2017 / 14:35:00 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4395
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4396
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4397
rule14:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4398
    "14. if not first character, change 'Q' to 'G' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4399
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4400
    ^ self transcodeAll:'Q' of:key to:'G' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4401
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4402
    "Modified (format): / 02-08-2017 / 14:35:08 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4403
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4404
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4405
rule15:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4406
    "15. transcode 'SH' to 'S' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4407
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4408
    ^ self transcodeAll:'SH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4409
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4410
    "Modified (format): / 02-08-2017 / 14:35:18 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4411
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4412
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4413
rule16:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4414
    "16. transcode 'SCH' to 'S' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4415
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4416
    ^ self transcodeAll:'SCH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4417
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4418
    "Modified (format): / 02-08-2017 / 14:35:25 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4419
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4420
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4421
rule17:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4422
    "17. transcode 'YW' to 'Y' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4423
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4424
    ^ self transcodeAll:'YW' of:key to:'Y' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4425
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4426
    "Modified (format): / 02-08-2017 / 14:35:33 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4427
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4428
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4429
rule18:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4430
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4431
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4432
    "18. if not first or last character, change 'Y' to 'A' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4433
    k := self transcodeAll:'Y' of:key to:'A' startingAt:2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4434
    key last = $Y ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4435
        k at:k size put:$Y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4436
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4437
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4438
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4439
    "Modified (format): / 02-08-2017 / 14:35:44 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4440
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4441
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4442
rule19:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4443
    "19. transcode 'WR' to 'R' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4444
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4445
    ^ self transcodeAll:'WR' of:key to:'R' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4446
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4447
    "Modified (format): / 02-08-2017 / 14:35:52 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4448
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4449
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4450
rule1:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4451
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4452
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4453
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4454
     "1. Remove all 'S' and 'Z' chars from the end of the name"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4455
    [
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4456
        'SZ' includes:k last
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4457
    ] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4458
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4459
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4460
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4461
rule20:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4462
    "20. if not first character, change 'Z' to 'S' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4463
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4464
    ^ self transcodeAll:'Z' of:key to:'S' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4465
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4466
    "Modified (format): / 02-08-2017 / 14:36:00 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4467
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4468
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4469
rule21:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4470
    "21. transcode terminal 'AY' to 'Y' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4471
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4472
    ^ self transcodeAll:'AY' of:key to:'Y' startingAt:key size - 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4473
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4474
    "Modified (format): / 02-08-2017 / 14:36:08 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4475
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4476
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4477
rule22:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4478
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4479
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4480
    "22. remove trailing vowels "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4481
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4482
    [ k last isVowel ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4483
        k := k copyButLast
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4484
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4485
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4486
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4487
    "Modified: / 02-08-2017 / 14:36:42 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4488
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4489
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4490
rule23:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4491
    |k c|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4492
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4493
    "23. collapse all strings of repeated characters "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4494
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4495
    c := SortedCollection sortBlock:[:a :b | b < a ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4496
    k size to:2 do:[:i | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4497
        (k at:i) = (k at:i - 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4498
            c add:i
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4499
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4500
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4501
    c do:[:n | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4502
        k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4503
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4504
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4505
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4506
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4507
rule24:key originalKey:originalKey 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4508
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4509
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4510
    "24. if first char of original surname was a vowel, append it to the code"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4511
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4512
    originalKey first isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4513
        k := k , originalKey first asString asUppercase
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4514
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4515
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4516
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4517
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4518
rule2:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4519
     "2. Transcode initial strings:  MAC => MC   PF => F"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4520
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4521
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4522
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4523
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4524
    (k startsWith:'MAC') ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4525
        k := 'MC' , (k copyFrom:4)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4526
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4527
    (k startsWith:'PF') ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4528
        k := 'F' , (k copyFrom:3)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4529
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4530
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4531
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4532
    "Modified (format): / 02-08-2017 / 14:31:40 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4533
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4534
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4535
rule3:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4536
    |k|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4537
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4538
    "3. Transcode trailing strings as follows:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4539
        IX => IC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4540
          EX => EC
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4541
          YE, EE, IE => Y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4542
           NT, ND => D"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4543
           
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4544
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4545
    k := self transcodeTrailing:#( 'IX' ) of:k to:'IC'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4546
    k := self transcodeTrailing:#( 'EX' ) of:k to:'EC'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4547
    k := self transcodeTrailing:#( 'YE' 'EE' 'IE' ) of:k to:'Y'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4548
    k := self transcodeTrailing:#( 'NT' 'ND' ) of:k to:'D'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4549
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4550
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4551
    "Modified (format): / 02-08-2017 / 14:32:24 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4552
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4553
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4554
rule4:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4555
    "4. Transcode 'EV' to 'EF' if not at start of name"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4556
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4557
    ^ self transcodeAll:'EV' of:key to:'EF' startingAt:2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4558
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4559
    "Modified (format): / 02-08-2017 / 14:32:35 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4560
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4561
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4562
rule5:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4563
    "5. Use first character of name as first character of key.  
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4564
        Ignored because we're doing an in-place conversion"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4565
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4566
    ^ key
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4567
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4568
    "Modified (comment): / 02-08-2017 / 14:32:45 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4569
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4570
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4571
rule6:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4572
    |k i|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4573
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4574
    "6. Remove any 'W' that follows a vowel"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4575
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4576
    i := 2.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4577
    [
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4578
        (i := k indexOf:$W startingAt:i) > 0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4579
    ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4580
        (k at:i - 1) isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4581
            k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4582
            i := i - 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4583
        ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4584
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4585
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4586
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4587
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4588
rule7:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4589
    "7. replace all vowels with 'A' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4590
    ^ key collect:[:ch | ch isVowel ifTrue:[$A] ifFalse:[ch]].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4591
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4592
    "Modified: / 02-08-2017 / 14:33:56 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4593
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4594
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4595
rule8:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4596
    "8. transcode 'GHT' to 'GT' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4597
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4598
    ^ self transcodeAll:'GHT' of:key to:'GT' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4599
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4600
    "Modified (format): / 02-08-2017 / 14:34:05 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4601
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4602
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4603
rule9:key 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4604
    "9. transcode 'DG' to 'G' "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4605
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4606
    ^ self transcodeAll:'DG' of:key to:'G' startingAt:1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4607
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4608
    "Modified (format): / 02-08-2017 / 14:34:15 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4609
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4610
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4611
transcodeAll:aString of:key to:replacementString startingAt:start 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4612
    |k i|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4613
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4614
    k := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4615
    [
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4616
        (i := k indexOfSubCollection:aString startingAt:start) > 0
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4617
    ] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4618
        k := (k copyFrom:1 to:i - 1) , replacementString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4619
                    , (k copyFrom:i + aString size to:k size)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4620
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4621
    ^ k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4622
!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4623
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4624
transcodeTrailing:anArrayOfStrings of:key to:replacementString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4625
    |answer|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4626
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4627
    answer := key copy.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4628
    anArrayOfStrings do:[:aString | 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4629
        answer := self 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4630
                    transcodeAll:aString
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4631
                    of:answer
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4632
                    to:replacementString
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4633
                    startingAt:(answer size - aString size) + 1
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4634
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4635
    ^ answer
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4636
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4637
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4638
!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4639
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4640
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4641
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4642
    Implementation of the PHONEM algorithm, as described in
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4643
    'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4644
    Ein Programm fuer kontextsensitive phonetische Textumwandlung
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4645
    ct Magazin fuer Computer & Technik 25/1998'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4646
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4647
    This algorithm deals better with the german language (it cares for umlauts)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4648
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4649
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4650
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4651
!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4652
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4653
encode:aString 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4654
    |s idx t t2|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4655
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4656
    s := aString asUppercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4657
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4658
    idx := 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4659
    [idx < (s size-1)] whileTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4660
        t2 := nil.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4661
        t := s copyFrom:idx to:idx+1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4662
        t = 'SC' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4663
        ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4664
        ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4665
        ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4666
        ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4667
        ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4668
        ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4669
        ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4670
        ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4671
        ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4672
        ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4673
        ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4674
        ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4675
        ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4676
        ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4677
        ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4678
        ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4679
        t2 notNil ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4680
            s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4681
        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4682
            idx := idx + 1.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4683
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4684
    ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4685
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4686
    "/ single character substitutions via tr
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4687
    s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4688
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4689
    s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4690
    ^ s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4691
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4692
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4693
     self basicNew encode:'müller'  -> 'MYLR'    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4694
     self basicNew encode:'mueller' -> 'MYLR'    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4695
     self basicNew encode:'möller'  -> 'MYLR'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4696
     self basicNew encode:'miller'  -> 'MYLR'     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4697
     self basicNew encode:'muller'  -> 'MULR' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4698
     self basicNew encode:'muler'   -> 'MULR' 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4699
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4700
     self basicNew phoneticStringsFor:'müller'  #('MYLR')    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4701
     self basicNew phoneticStringsFor:'mueller' #('MYLR')    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4702
     self basicNew phoneticStringsFor:'möller'  #('MYLR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4703
     self basicNew phoneticStringsFor:'miller'  #('MYLR')     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4704
     self basicNew phoneticStringsFor:'muller'  #('MULR') 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4705
     self basicNew phoneticStringsFor:'muler'   #('MULR') 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4706
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4707
     self basicNew phoneticStringsFor:'schmidt'     #('CMYD')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4708
     self basicNew phoneticStringsFor:'schneider'   #('CNAYDR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4709
     self basicNew phoneticStringsFor:'fischer'     #('VYCR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4710
     self basicNew phoneticStringsFor:'weber'       #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4711
     self basicNew phoneticStringsFor:'weeber'      #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4712
     self basicNew phoneticStringsFor:'webber'      #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4713
     self basicNew phoneticStringsFor:'wepper'      #('VBR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4714
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4715
     self basicNew phoneticStringsFor:'meyer'       #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4716
     self basicNew phoneticStringsFor:'maier'       #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4717
     self basicNew phoneticStringsFor:'mayer'       #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4718
     self basicNew phoneticStringsFor:'mayr'        #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4719
     self basicNew phoneticStringsFor:'meir'        #('MAYR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4720
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4721
     self basicNew phoneticStringsFor:'wagner'      #('VACNR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4722
     self basicNew phoneticStringsFor:'schulz'      #('CULC')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4723
     self basicNew phoneticStringsFor:'becker'      #('BCR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4724
     self basicNew phoneticStringsFor:'hoffmann'    #('OVMAN')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4725
     self basicNew phoneticStringsFor:'haus'        #('AUS')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4726
     
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4727
     self basicNew phoneticStringsFor:'schäfer'     #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4728
     self basicNew phoneticStringsFor:'scheffer'    #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4729
     self basicNew phoneticStringsFor:'schaeffer'   #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4730
     self basicNew phoneticStringsFor:'schaefer'    #('CVR')
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4731
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4732
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4733
    "Created: / 28-07-2017 / 15:38:08 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4734
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4735
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4736
!PhoneticStringUtilities::Caverphone2StringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4737
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4738
documentation
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4739
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4740
    Caverphone (2) Algorithm:
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4741
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4742
    see http://caversham.otago.ac.nz/files/working/ctp150804.pdf
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4743
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4744
    Caverphone 2.0 is being made available for free use for the benefit of anyone who has a use for it,
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4745
    with the proviso that the Caversham Project at the University of Otago should be acknowledged as the
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4746
    original source (which is hereby done ;-).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4747
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4748
    •  Start with a Surname or Firstname
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4749
    •  Convert to lowercase
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4750
        This coding system is case sensitive, implementations should acknowledge that a is not the same as A
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4751
    •  Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4752
        The main intention of this is to remove spaces, hyphens, and apostrophes.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4753
        example:  o'brian becomes obrian
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4754
    •  If the name starts with cough make it cou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4755
        2 is being used as a temporary placeholder to indicate a consonant which we are no longer interested in.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4756
    •  If the name starts with rough make it rou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4757
    •  If the name starts with tough make it tou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4758
    •  If the name starts with enough make it enou2f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4759
    •  If the name starts with gn make it 2n
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4760
    •  If the name ends with mb make it m2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4761
    •  replace cq with 2q
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4762
    •  replace ci with si
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4763
    •  replace ce with se
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4764
    •  replace cy with sy
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4765
    •  replace tch with 2ch
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4766
    •  replace c with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4767
    •  replace q with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4768
    •  replace x with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4769
    •  replace v with f
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4770
    •  replace dg with 2g
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4771
    •  replace tio with sio
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4772
    •  replace tia with sia
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4773
    •  replace d with t
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4774
    •  replace ph with fh
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4775
    •  replace b with p
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4776
    •  replace sh with s2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4777
    •  replace z with s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4778
    •  replace and initial vowel with an A
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4779
    •  replace all other vowels with a 3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4780
        3 is a temporary placeholder marking a vowel
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4781
    •  replace 3gh3 with 3kh3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4782
        Exceptions are dealt with before the general case. gh between vowels is an except of the more general gh rule.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4783
    •  replace gh with 22
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4784
    •  replace g with k
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4785
    •  replace groups of the letter s with a S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4786
        Continuous strings of s are replace by a single S
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4787
    •  replace groups of the letter t with a T
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4788
    •  replace groups of the letter p with a P
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4789
    •  replace groups of the letter k with a K
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4790
    •  replace groups of the letter f with a F
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4791
    •  replace groups of the letter m with a M
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4792
    •  replace groups of the letter n with a N
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4793
    •  replace w3 with W3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4794
    •  replace wy with Wy
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4795
    •  replace wh3 with Wh3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4796
    •  replace why with Why
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4797
    •  replace w with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4798
    •  replace and initial h with an A
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4799
    •  replace all other occurrences of h with a 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4800
    •  replace r3 with R3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4801
    •  replace ry with Ry
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4802
    •  replace r with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4803
    •  replace l3 with L3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4804
    •  replace ly with Ly
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4805
    •  replace l with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4806
    •  replace j with y
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4807
    •  replace y3 with Y3
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4808
    •  replace y with 2
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4809
    •  remove all 2s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4810
    •  remove all 3s
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4811
    •  put six (v1) / ten (v2) 1s on the end
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4812
    •  take the first six characters as the code (caverphone 1);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4813
       / take the first ten characters as the code (caverphone 2);
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4814
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4815
     self new encode:'david'      -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4816
     self new encode:'whittle'    -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4817
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4818
     self new encode:'Stevenson'  -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4819
     self new encode:'Peter'      -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4820
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4821
     self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4822
     self new encode:'lee'        -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4823
     self new encode:'Gutierrez'  -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4824
     self new encode:'Pfister'    -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4825
     self new encode:'Jackson'    -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4826
     self new encode:'Tymczak'    -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4827
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4828
     self new encode:'add'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4829
     self new encode:'aid'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4830
     self new encode:'at'         -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4831
     self new encode:'art'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4832
     self new encode:'earth'      -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4833
     self new encode:'head'       -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4834
     self new encode:'old'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4835
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4836
     self new encode:'ready'      -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4837
     self new encode:'rather'     -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4838
     self new encode:'able'       -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4839
     self new encode:'appear'     -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4840
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4841
     self new encode:'Deedee'     -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4842
"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4843
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4844
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4845
!PhoneticStringUtilities::Caverphone2StringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4846
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4847
encode:word 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4848
    |txt|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4849
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4850
    word size == 0 ifTrue:[^ '1111111111' ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4851
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4852
    "/ 1. Convert to lowercase
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4853
    txt := word asLowercase.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4854
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4855
    "/ 2. Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4856
    txt := txt select:#isLetter.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4857
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4858
    #(
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4859
    "/  oldSeq newSeq repeat
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4860
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4861
    "/ 2.5. Remove final e
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4862
        'e$' '' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4863
    "/ 3. Handle various start options
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4864
        '^cough' 'cou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4865
        '^rough' 'rou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4866
        '^tough' 'tou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4867
        '^enough' 'enou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4868
        '^trough' 'trou2f' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4869
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4870
        '^gn' '2n' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4871
        'mb$' 'm2' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4872
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4873
    "/ 4. Handle replacements
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4874
        'cq' '2q' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4875
        'ci' 'si' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4876
        'ce' 'se' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4877
        'cy' 'sy' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4878
        'tch' '2ch' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4879
        'c' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4880
        'q' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4881
        'x' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4882
        'v' 'f' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4883
        'dg' '2g' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4884
        'tio' 'sio' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4885
        'tia' 'sia' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4886
        'd' 't' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4887
        'ph' 'fh' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4888
        'b' 'p' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4889
        'sh' 's2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4890
        'z' 's' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4891
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4892
        '^a' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4893
        '^e' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4894
        '^i' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4895
        '^o' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4896
        '^u' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4897
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4898
        'a' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4899
        'e' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4900
        'i' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4901
        'o' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4902
        'u' '3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4903
        'j' 'y' true 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4904
        
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4905
        '^y3' 'Y3' false 
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4906
        '^y' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4907
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4908
        'y' '3'  true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4909
        '3gh3' '3kh3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4910
        'gh' '22' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4911
        'g' 'k' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4912
        's'  'S' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4913
        'SS' 'S' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4914
        't'  'T' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4915
        'TT' 'T' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4916
        'p'  'P' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4917
        'PP' 'P' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4918
        'k'  'K' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4919
        'KK' 'K' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4920
        'f'  'F' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4921
        'FF' 'F' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4922
        'm'  'M' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4923
        'MM' 'M' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4924
        'n'  'N' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4925
        'NN' 'N' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4926
        'w3' 'W3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4927
        'wh3' 'Wh3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4928
        'w$' '3'  false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4929
        'w' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4930
        '^h' 'A' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4931
        'h' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4932
        'r3' 'R3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4933
        'r$' '3'  false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4934
        'r' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4935
        'l3' 'L3' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4936
        'l$' '3' false
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4937
        'l' '2' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4938
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4939
    "/ 5. removals
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4940
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4941
        '2' '' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4942
        '3$' 'A' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4943
        '3' '' true
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4944
    ) inGroupsOf:3 do:[:pat :repl :repeat|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4945
        |s txtBefore|
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4946
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4947
        txtBefore := txt.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4948
        (pat startsWith:$^) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4949
            s := pat copyButFirst.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4950
            repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4951
                [txt startsWith:s] whileTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4952
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4953
                (txt startsWith:s) ifTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4954
            ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4955
        ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4956
            (pat endsWith:$$) ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4957
                s := pat copyButLast.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4958
                repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4959
                    [txt endsWith:s] whileTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4960
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4961
                    (txt endsWith:s) ifTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4962
                ]
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4963
            ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4964
                repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4965
                    txt := txt copyReplaceAllSubcollections:pat with:repl
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4966
                ] ifFalse:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4967
                    txt := txt copyReplaceSubcollection:pat with:repl
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4968
                ]    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4969
            ]    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4970
        ].
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4971
        "/ txt ~= txtBefore ifTrue:[
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4972
        "/     Transcript showCR:(pat,' | ',repl,' -> ',txt).
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4973
        "/ ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4974
    ].    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4975
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4976
    "/ 6. put ten 1s on the end
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4977
    txt := txt,'1111111111'.
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4978
    
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4979
    "/ 7. take the first ten characters as the code
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4980
    ^ txt copyTo:10
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4981
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4982
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4983
     self new encode:'david'      -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4984
     self new encode:'whittle'    -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4985
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4986
     self new encode:'Stevenson'  -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4987
     self new encode:'Peter'      -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4988
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4989
     self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4990
     self new encode:'lee'        -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4991
     self new encode:'Gutierrez'  -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4992
     self new encode:'Pfister'    -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4993
     self new encode:'Jackson'    -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4994
     self new encode:'Tymczak'    -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4995
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4996
     self new encode:'add'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4997
     self new encode:'aid'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4998
     self new encode:'at'         -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  4999
     self new encode:'art'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5000
     self new encode:'earth'      -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5001
     self new encode:'head'       -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5002
     self new encode:'old'        -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5003
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5004
     self new encode:'ready'      -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5005
     self new encode:'rather'     -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5006
     self new encode:'able'       -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5007
     self new encode:'appear'     -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5008
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5009
     self new encode:'Deedee'     -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5010
    "
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5011
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5012
    "Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5013
    "Modified: / 02-08-2017 / 01:42:35 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5014
! !
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5015
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5016
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5017
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5018
documentation
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5019
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5020
     The 'Kölner Phonetik' (cologne phonetic) code is for the german language 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5021
     what the soundex code is for english:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5022
        it returns similar strings for similar sounding words 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5023
     (but is specifically aware of the pronunciation of German and eastern languages) . 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5024
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5025
     There are some other differences to soundex, though: 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5026
        its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5027
        it does not start with the first character of the input, but returns a pure numeric string.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5028
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5029
     This algorithm was described by Postel 1969,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5030
     See  http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5031
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5032
    self new phoneticStringsFor:'Müller-Lüdenscheidt' -> #('65752682')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5033
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5034
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5035
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5036
examples
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5037
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5038
   words sounding similar (german pronunciation) will deliver a similar code:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5039
   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5040
     #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5041
        'Müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5042
        'Miller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5043
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5044
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5045
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5046
        'Mülherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5047
        'Myler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5048
        'Millar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5049
        'Myller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5050
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5051
        'Müler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5052
        'Muehler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5053
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5054
        'Müllerr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5055
        'Muehlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5056
        'Muellar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5057
        'Mueler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5058
        'Mülleer'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5059
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5060
        'Nüller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5061
        'Nyller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5062
        'Niler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5063
        'Czerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5064
        'Tscherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5065
        'Czernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5066
        'Tschernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5067
        'Schernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5068
        'Scherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5069
        'Scherno'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5070
        'Czerne'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5071
        'Zerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5072
        'Tzernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5073
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5074
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5075
        'Breschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5076
        'Breschnjeff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5077
        'Braeschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5078
        'Braessneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5079
        'Pressneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5080
        'Presznäph'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5081
        'Präschnäf' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5082
        'Breschnjeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5083
        'Breschnijeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5084
        'Breschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5085
        'Bräschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5086
        'Braschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5087
        'Broschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5088
     ) do:[:w |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5089
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5090
     ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5091
"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5092
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5093
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5094
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5095
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5096
encode: aString
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5097
    "return a koelner phonetic code.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5098
     The koelnerPhonetic code is for the german language what the soundex code is for english;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5099
     it returns simular strings for similar sounding words. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5100
     There are some differences to soundex, though: 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5101
        its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5102
        it does not start with the first character of the input.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5103
     This algorithm is described by Postel 1969"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5104
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5105
    |in ret val rslt|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5106
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5107
    in := aString withoutSeparators asLowercase.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5108
    in := in copyReplaceString:'ph' withString:'f'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5109
    (in includesAny:'öäüß') ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5110
        in := in copyReplaceAll:$ü withAll:'u'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5111
        in := in copyReplaceAll:$ä withAll:'a'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5112
        in := in copyReplaceAll:$ö withAll:'o'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5113
        in := in copyReplaceAll:$ß withAll:'ss'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5114
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5115
    in := in select:[:ch | ch isLetter].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5116
    in := '#',in,'#'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5117
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5118
    ret := ''.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5119
    1 to:in size-2 do:[:i |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5120
        |sub|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5121
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5122
        sub := in copyFrom:i to:i+2.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5123
        val := (i==1) 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5124
                    ifTrue:[ self convertFirst:sub ] 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5125
                    ifFalse:[ self convertRest:sub ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5126
        ret := ret,val
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5127
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5128
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5129
    ret := ret select:[:ch | ch ~= $-].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5130
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5131
    (ret startsWith:'0') ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5132
        ret := '0',(ret select:[:ch | ch ~= $0]).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5133
    ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5134
        ret := ret select:[:ch | ch ~= $0].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5135
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5136
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5137
    rslt := String streamContents:[:s |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5138
        |prev|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5139
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5140
        ret do:[:ch |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5141
            ch ~= prev ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5142
                s nextPut:ch
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5143
            ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5144
            prev := ch.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5145
        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5146
      ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5147
    ^ rslt.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5148
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5149
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5150
     #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5151
        'Müller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5152
        'Miller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5153
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5154
        'Mühler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5155
        'Mühlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5156
        'Mülherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5157
        'Myler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5158
        'Millar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5159
        'Myller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5160
        'Müllar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5161
        'Müler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5162
        'Muehler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5163
        'Mülller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5164
        'Müllerr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5165
        'Muehlherr'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5166
        'Muellar'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5167
        'Mueler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5168
        'Mülleer'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5169
        'Mueller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5170
        'Nüller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5171
        'Nyller'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5172
        'Niler'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5173
        'Czerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5174
        'Tscherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5175
        'Czernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5176
        'Tschernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5177
        'Schernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5178
        'Scherny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5179
        'Scherno'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5180
        'Czerne'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5181
        'Zerny'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5182
        'Tzernie'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5183
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5184
        'Breschnew'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5185
        'Breschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5186
        'Breschnjeff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5187
        'Braeschneff'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5188
        'Braessneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5189
        'Pressneff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5190
        'Presznäph'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5191
        'Präschnäf' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5192
        'Breschnjeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5193
        'Breschnijeff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5194
        'Breschnieff' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5195
     ) do:[:w |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5196
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5197
     ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5198
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5199
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5200
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5201
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnew' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5202
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5203
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braeschneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5204
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braessneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5205
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Pressneff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5206
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Presznäph' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5207
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Präschnäf' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5208
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnjeff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5209
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnijeff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5210
     PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnieff' -> '17863'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5211
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5212
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5213
     self basicNew encode:'müller'      -> '657'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5214
     self basicNew encode:'möller'      -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5215
     self basicNew encode:'miller'      -> '657'     
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5216
     self basicNew encode:'muller'      -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5217
     self basicNew encode:'muler'       -> '657'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5218
     self basicNew encode:'schmidt'     -> '862'   
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5219
     self basicNew encode:'schneider'   -> '8627' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5220
     self basicNew encode:'fischer'     -> '387' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5221
     self basicNew encode:'weber'       -> '317' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5222
     self basicNew encode:'meyer'       -> '67' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5223
     self basicNew encode:'wagner'      -> '3467' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5224
     self basicNew encode:'schulz'      -> '858'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5225
     self basicNew encode:'becker'      -> '147'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5226
     self basicNew encode:'hoffmann'    -> '036'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5227
     self basicNew encode:'schäfer'     -> '837' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5228
    "
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5229
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5230
    "Created: / 28-07-2017 / 15:24:33 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5231
! !
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5232
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5233
!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5234
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5235
convertFirst:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5236
    |c2 c3|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5237
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5238
    chars size == 3 ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5239
        c2 := (chars at:2).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5240
        c2 == $a ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5241
        c2 == $e ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5242
        c2 == $i ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5243
        c2 == $j ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5244
        c2 == $y ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5245
        c2 == $o ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5246
        c2 == $u ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5247
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5248
        c2 == $c ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5249
            c3 := (chars at:3).
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5250
            (c3 == $a) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5251
            (c3 == $h) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5252
            (c3 == $k) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5253
            (c3 == $l) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5254
            (c3 == $o) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5255
            (c3 == $q) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5256
            (c3 == $r) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5257
            (c3 == $u) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5258
            (c3 == $x) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5259
            ^ '8'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5260
        ].    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5261
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5262
"/        #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5263
"/            ('#a#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5264
"/            ('#e#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5265
"/            ('#i#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5266
"/            ('#j#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5267
"/            ('#y#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5268
"/            ('#o#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5269
"/            ('#u#' '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5270
"/
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5271
"/            ('#ca' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5272
"/            ('#ch' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5273
"/            ('#ck' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5274
"/            ('#cl' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5275
"/            ('#co' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5276
"/            ('#cq' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5277
"/            ('#cr' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5278
"/            ('#cu' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5279
"/            ('#cx' '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5280
"/
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5281
"/            ('#c#' '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5282
"/        ) do:[:pair | 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5283
"/            (pair first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5284
"/                ^ pair second
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5285
"/            ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5286
"/        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5287
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5288
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5289
    ^ self convertRest:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5290
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5291
    "Modified: / 29-07-2017 / 14:22:20 / cg"
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5292
!
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5293
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5294
convertRest:chars
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5295
    chars size == 3 ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5296
        self error:'cannot happen'.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5297
        ^ '?' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5298
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5299
    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5300
    #(
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5301
        "/ used to be matchpattern code,
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5302
        "/ but doing these glob-matches is too slow.
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5303
        "/ changed to:
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5304
        "/    start nil  code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5305
        "/    nil   end  code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5306
        "/    nil   char code
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5307
        "/    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5308
        (nil 'ds' " '#ds' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5309
        (nil 'dc' " '#dc' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5310
        (nil 'dz' " '#dz' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5311
        (nil 'ts' " '#ts' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5312
        (nil 'tc' " '#tc' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5313
        (nil 'tz' " '#tz' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5314
        (nil $d   " '#d#' " '2')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5315
        (nil $t   " '#t#' " '2')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5316
        ('cx' nil " 'cx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5317
        ('kx' nil " 'kx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5318
        ('qx' nil " 'qx#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5319
        (nil $x   " '#x#' " '48')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5320
        ('sc' nil " 'sc#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5321
        ('sz' nil " 'sz#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5322
        (nil 'ca' " '#ca' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5323
        (nil 'co' " '#co' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5324
        (nil 'cu' " '#cu' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5325
        (nil 'ch' " '#ch' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5326
        (nil 'ck' " '#ck' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5327
        (nil 'cx' " '#cx' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5328
        (nil 'cq' " '#cq' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5329
        (nil $c   " '#c#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5330
        (nil $a   " '#a#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5331
        (nil $e   " '#e#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5332
        (nil $i   " '#i#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5333
        (nil $j   " '#j#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5334
        (nil $y   " '#y#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5335
        (nil $o   " '#o#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5336
        (nil $u   " '#u#' " '0')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5337
        (nil $h   " '#h#' " '-')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5338
        (nil $l   " '#l#' " '5')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5339
        (nil $r   " '#r#' " '7')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5340
        (nil $m   " '#m#' " '6')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5341
        (nil $n   " '#n#' " '6')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5342
        (nil $s   " '#s#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5343
        (nil $z   " '#z#' " '8')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5344
        (nil $b   " '#b#' " '1')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5345
        (nil $p   " '#p#' " '1')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5346
        (nil $f   " '#f#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5347
        (nil $v   " '#v#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5348
        (nil $w   " '#w#' " '3')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5349
        (nil $g   " '#g#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5350
        (nil $k   " '#k#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5351
        (nil $q   " '#q#' " '4')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5352
        (nil nil  " '###' " '?')
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5353
    ) do:[:vector |
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5354
        |v1 v2|
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5355
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5356
        (v1 := vector at:1) notNil ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5357
            "/ prefix
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5358
            (chars startsWith:v1) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5359
        ] ifFalse:[                       
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5360
            (v2 := vector at:2) isCharacter ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5361
                "/ middle character compare
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5362
                (chars at:2) == v2 ifTrue:[^ (vector at:3) ]. 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5363
            ] ifFalse:[    
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5364
                v2 isString ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5365
                    "/ suffix
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5366
                    (chars endsWith:v2) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5367
                ] ifFalse:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5368
                   ^ '?' 
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5369
                ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5370
            ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5371
        ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5372
        
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5373
        "/ (vector first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5374
        "/     ^ vector second
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5375
        "/ ]
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5376
    ].
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5377
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5378
    self error:'cannot happen'
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5379
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5380
    "Modified: / 29-07-2017 / 14:17:38 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5381
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5382
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5383
!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5384
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5385
documentation
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5386
"
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5387
    Miracode (also called << American Soundex >>) is like Soundex with the 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5388
    addition that h and w are discarded if they separate consonants.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5389
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5390
    These variants may be specifically important because they were used in 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5391
    U.S. National Archives. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5392
    Most archive data were encoded with Miracode, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5393
    but there are some (older) entries encoded with Simplified Soundex. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5394
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5395
    The HW-rule was documented as a standard in 1910, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5396
    but actually data of 1880, 1900 and 1910 
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  5397
    censuses were encoded with mixed methods.
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5398
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5399
     self new encode:'washington' -> 'W252'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5400
     self new encode:'lee'        -> 'L000'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5401
     self new encode:'Gutierrez'  -> 'G362'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5402
     self new encode:'Pfister'    -> 'P236'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5403
     self new encode:'Jackson'    -> 'J250'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5404
     self new encode:'Tymczak'    -> 'T522'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5405
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5406
    notice:
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5407
     MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5408
     SoundexStringComparator new encode:'Ashcraft'  -> 'A226'
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5409
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5410
    see also:            
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5411
        https://www.archives.gov/research/census/soundex.html
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5412
"
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5413
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5414
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5415
!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'private'!
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5416
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5417
encode:word 
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5418
    "same as inherited, but cares for W and H"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5419
    
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5420
    |u p t prevCode|
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5421
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5422
    u := word asUppercase.
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5423
    p := u first asString.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5424
    prevCode := self translate:u first.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5425
    u from:2 to:u size do:[:c | 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5426
        t := self translate:c.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5427
        (t notNil 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5428
        and:[ t ~= '0' 
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5429
        and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5430
            p := p , t.
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5431
            p size == 4 ifTrue:[^ p ].
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5432
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5433
        (c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5434
            prevCode := t.
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5435
        ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5436
    ].
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5437
    [ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5438
        p := p , '0'
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5439
    ].
4488
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5440
    ^ (p copyFrom:1 to:4)
51f2907c7389 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 4487
diff changeset
  5441
4491
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5442
    "Created: / 02-08-2017 / 00:19:47 / cg"
d6c31bb1e928 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4490
diff changeset
  5443
    "Modified (comment): / 02-08-2017 / 14:30:47 / cg"
4489
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5444
! !
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5445
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5446
!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator class methodsFor:'documentation'!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5447
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5448
documentation
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5449
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5450
     The 'Spanish Phonetik' (spanish phonetic) code is for the spanish language 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5451
     what the soundex code is for english:
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5452
        it returns similar strings for similar sounding words 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5453
     (but is specifically aware of the pronunciation of spanish) . 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5454
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5455
     There are some other differences to soundex, though: 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5456
        its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5457
        it does not start with the first character of the input, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5458
        but returns a pure numeric string,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5459
        it uses different character groups
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5460
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5461
     This algorithm was described by Marıa del Pilar Angeles, Adrian Espino-Gamez, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5462
     and Jonathan Gil-Moncada, in 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5463
        'Comparison of a Modified Spanish phonetic,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5464
         Soundex, and Phonex coding functions during data matching process'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5465
     See  https://www.researchgate.net/publication/285589803_Comparison_of_a_Modified_Spanish_Phonetic_Soundex_and_Phonex_coding_functions_during_data_matching_process
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5466
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5467
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5468
!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5469
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5470
examples
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5471
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5472
   words sounding similar (german pronunciation) will deliver a similar code:
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5473
   
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5474
     #(
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5475
        'Marıa'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5476
        'Pilar'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5477
        'Angeles'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5478
        'Adrian'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5479
        'Gamez'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5480
     ) do:[:w |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5481
         Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::SpanishPhoneticCodeStringComparator new encode:w)
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5482
     ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5483
"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5484
! !
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5485
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5486
!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'api'!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5487
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5488
encode: aString
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5489
    "return a spanish phonetic code.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5490
     The spanishPhonetic code is for the spanish language what the soundex code is for english;
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5491
     it returns simular strings for similar sounding words. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5492
     There are some differences to soundex, though: 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5493
        its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5494
        it does not start with the first character of the input,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5495
        it uses different character groups.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5496
     This algorithm is described by Marıa del Pilar Angeles, Adrian Espino-Gamez, 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5497
     Jonathan Gil-Moncada."
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5498
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5499
    |in|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5500
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5501
    in := aString withoutSeparators asUppercase.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5502
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5503
    ^ String streamContents:[:out |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5504
        |prev|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5505
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5506
        in do:[:ch |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5507
            ch == prev ifFalse:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5508
                ch == $P ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5509
                    out nextPut:$0.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5510
                ] ifFalse:[ ('BV' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5511
                    out nextPut:$1.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5512
                ] ifFalse:[ ('FH' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5513
                    out nextPut:$2.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5514
                ] ifFalse:[ ('DT' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5515
                    out nextPut:$3.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5516
                ] ifFalse:[ ('SZCX' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5517
                    out nextPut:$4.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5518
                ] ifFalse:[ ('YL' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5519
                    out nextPut:$5.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5520
                ] ifFalse:[ ('NŃM' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5521
                    out nextPut:$6.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5522
                ] ifFalse:[ ('QK' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5523
                    out nextPut:$7.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5524
                ] ifFalse:[ ('GJ' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5525
                    out nextPut:$8.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5526
                ] ifFalse:[ ('R' includes:ch)  ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5527
                    out nextPut:$9.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5528
                ]]]]]]]]]].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5529
                prev := ch.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5530
            ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5531
        ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5532
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5533
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5534
    "
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5535
     self new encode:'Jose'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5536
    "
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5537
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5538
    "Created: / 28-07-2017 / 15:24:33 / cg"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5539
    "Modified: / 01-08-2017 / 18:48:50 / cg"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5540
! !
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5541
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5542
!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'private'!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5543
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5544
convertFirst:chars
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5545
    |c2 c3|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5546
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5547
    chars size == 3 ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5548
        c2 := (chars at:2).
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5549
        c2 == $a ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5550
        c2 == $e ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5551
        c2 == $i ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5552
        c2 == $j ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5553
        c2 == $y ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5554
        c2 == $o ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5555
        c2 == $u ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5556
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5557
        c2 == $c ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5558
            c3 := (chars at:3).
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5559
            (c3 == $a) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5560
            (c3 == $h) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5561
            (c3 == $k) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5562
            (c3 == $l) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5563
            (c3 == $o) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5564
            (c3 == $q) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5565
            (c3 == $r) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5566
            (c3 == $u) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5567
            (c3 == $x) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5568
            ^ '8'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5569
        ].    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5570
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5571
"/        #(
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5572
"/            ('#a#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5573
"/            ('#e#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5574
"/            ('#i#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5575
"/            ('#j#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5576
"/            ('#y#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5577
"/            ('#o#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5578
"/            ('#u#' '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5579
"/
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5580
"/            ('#ca' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5581
"/            ('#ch' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5582
"/            ('#ck' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5583
"/            ('#cl' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5584
"/            ('#co' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5585
"/            ('#cq' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5586
"/            ('#cr' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5587
"/            ('#cu' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5588
"/            ('#cx' '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5589
"/
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5590
"/            ('#c#' '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5591
"/        ) do:[:pair | 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5592
"/            (pair first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5593
"/                ^ pair second
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5594
"/            ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5595
"/        ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5596
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5597
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5598
    ^ self convertRest:chars
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5599
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5600
    "Modified: / 29-07-2017 / 14:22:20 / cg"
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5601
!
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5602
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5603
convertRest:chars
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5604
    chars size == 3 ifFalse:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5605
        self error:'cannot happen'.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5606
        ^ '?' 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5607
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5608
    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5609
    #(
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5610
        "/ used to be matchpattern code,
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5611
        "/ but doing these glob-matches is too slow.
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5612
        "/ changed to:
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5613
        "/    start nil  code
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5614
        "/    nil   end  code
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5615
        "/    nil   char code
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5616
        "/    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5617
        (nil 'ds' " '#ds' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5618
        (nil 'dc' " '#dc' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5619
        (nil 'dz' " '#dz' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5620
        (nil 'ts' " '#ts' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5621
        (nil 'tc' " '#tc' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5622
        (nil 'tz' " '#tz' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5623
        (nil $d   " '#d#' " '2')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5624
        (nil $t   " '#t#' " '2')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5625
        ('cx' nil " 'cx#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5626
        ('kx' nil " 'kx#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5627
        ('qx' nil " 'qx#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5628
        (nil $x   " '#x#' " '48')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5629
        ('sc' nil " 'sc#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5630
        ('sz' nil " 'sz#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5631
        (nil 'ca' " '#ca' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5632
        (nil 'co' " '#co' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5633
        (nil 'cu' " '#cu' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5634
        (nil 'ch' " '#ch' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5635
        (nil 'ck' " '#ck' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5636
        (nil 'cx' " '#cx' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5637
        (nil 'cq' " '#cq' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5638
        (nil $c   " '#c#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5639
        (nil $a   " '#a#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5640
        (nil $e   " '#e#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5641
        (nil $i   " '#i#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5642
        (nil $j   " '#j#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5643
        (nil $y   " '#y#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5644
        (nil $o   " '#o#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5645
        (nil $u   " '#u#' " '0')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5646
        (nil $h   " '#h#' " '-')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5647
        (nil $l   " '#l#' " '5')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5648
        (nil $r   " '#r#' " '7')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5649
        (nil $m   " '#m#' " '6')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5650
        (nil $n   " '#n#' " '6')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5651
        (nil $s   " '#s#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5652
        (nil $z   " '#z#' " '8')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5653
        (nil $b   " '#b#' " '1')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5654
        (nil $p   " '#p#' " '1')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5655
        (nil $f   " '#f#' " '3')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5656
        (nil $v   " '#v#' " '3')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5657
        (nil $w   " '#w#' " '3')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5658
        (nil $g   " '#g#' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5659
        (nil $k   " '#k#' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5660
        (nil $q   " '#q#' " '4')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5661
        (nil nil  " '###' " '?')
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5662
    ) do:[:vector |
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5663
        |v1 v2|
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5664
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5665
        (v1 := vector at:1) notNil ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5666
            "/ prefix
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5667
            (chars startsWith:v1) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5668
        ] ifFalse:[                       
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5669
            (v2 := vector at:2) isCharacter ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5670
                "/ middle character compare
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5671
                (chars at:2) == v2 ifTrue:[^ (vector at:3) ]. 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5672
            ] ifFalse:[    
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5673
                v2 isString ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5674
                    "/ suffix
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5675
                    (chars endsWith:v2) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5676
                ] ifFalse:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5677
                   ^ '?' 
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5678
                ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5679
            ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5680
        ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5681
        
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5682
        "/ (vector first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5683
        "/     ^ vector second
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5684
        "/ ]
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5685
    ].
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5686
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5687
    self error:'cannot happen'
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5688
2d7af11ffcd7 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4488
diff changeset
  5689
    "Modified: / 29-07-2017 / 14:17:38 / cg"
2208
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5690
! !
d430693b581a +mySQL soundex
Claus Gittinger <cg@exept.de>
parents: 2207
diff changeset
  5691
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5692
!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5693
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5694
version
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  5695
    ^ '$Header$'
2285
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  5696
!
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  5697
0527d18cfec9 changed: #documentation
Claus Gittinger <cg@exept.de>
parents: 2215
diff changeset
  5698
version_CVS
3646
82247702d48b #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3489
diff changeset
  5699
    ^ '$Header$'
2197
33e71ed6cf32 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  5700
! !
3185
9833bbba2050 class: PhoneticStringUtilities
Claus Gittinger <cg@exept.de>
parents: 2580
diff changeset
  5701