hg/stx-libbasic2: PhoneticStringUtilities.st@44fa8672d102 (annotated)

4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1	"{ Encoding: utf8 }"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	3	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	4	COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5	COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	6	All Rights Reserved
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	7
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	8	This software is furnished under a license and may be used
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	9	only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	10	inclusion of the above copyright notice. This software may not
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	11	be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	12	other person. No title to or ownership of the software is
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	13	hereby transferred.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	14	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	15	"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	16
3488 5a69e672d7f8 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3185 diff changeset	17	"{ NameSpace: Smalltalk }"
5a69e672d7f8 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3185 diff changeset	18
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	19	Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	20	instanceVariableNames:''
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	21	classVariableNames:''
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	22	poolDictionaries:''
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	23	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	24	!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	25
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	26	Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	27	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	28	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	29	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	30	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	31	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	32
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	33	PhoneticStringUtilities::PhoneticStringComparator subclass:#DaitchMokotoffStringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	34	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	35	currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	36	classVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	37	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	38	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	39	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	40
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	41	PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	42	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	43	currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	44	classVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	45	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	46	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	47	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	48
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	49	PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	50	instanceVariableNames:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	51	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	52	poolDictionaries:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	53	privateIn:PhoneticStringUtilities
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	54	!
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	55
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	56	PhoneticStringUtilities::PhoneticStringComparator subclass:#SingleResultPhoneticStringComparator
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	57	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	58	classVariableNames:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	59	poolDictionaries:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	60	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	61	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	62
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	63	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MRAStringComparator
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	64	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	65	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	66	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	67	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	68	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	69
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	70	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	71	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	72	currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	73	classVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	74	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	75	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	76	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	77
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	78	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SoundexStringComparator
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	79	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	80	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	81	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	82	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	83	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	84
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	85	PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	86	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	87	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	88	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	89	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	90	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	91
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	92	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#NYSIISStringComparator
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	93	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	94	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	95	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	96	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	97	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	98
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	99	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#PhonemStringComparator
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	100	instanceVariableNames:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	101	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	102	poolDictionaries:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	103	privateIn:PhoneticStringUtilities
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	104	!
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	105
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	106	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#Caverphone2StringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	107	instanceVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	108	classVariableNames:'CharacterTranslationDict'
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	109	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	110	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	111	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	112
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	113	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	114	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	115	classVariableNames:'CharacterTranslationDict'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	116	poolDictionaries:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	117	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	118	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	119
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	120	PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	121	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	122	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	123	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	124	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	125	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	126
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	127	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SpanishPhoneticCodeStringComparator
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	128	instanceVariableNames:''
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	129	classVariableNames:'CharacterTranslationDict'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	130	poolDictionaries:''
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	131	privateIn:PhoneticStringUtilities
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	132	!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	133
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	134	!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	135
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	136	copyright
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	137	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	138	COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	139	COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	140	All Rights Reserved
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	141
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	142	This software is furnished under a license and may be used
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	143	only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	144	inclusion of the above copyright notice. This software may not
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	145	be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	146	other person. No title to or ownership of the software is
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	147	hereby transferred.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	148	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	149	!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	150
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	151	documentation
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	152	"
2445 d55a3b1e8791 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2285 diff changeset	153	Utilities which are helpful to perform phonetic string searches or comparisons.
d55a3b1e8791 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2285 diff changeset	154	These are all variations or improvements of the soundex algorithm, which usually fails
d55a3b1e8791 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2285 diff changeset	155	to provide good results for non-english languages.
2285 0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	156
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	157	soundexCode
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	158	this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	159
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	160	nysiis
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	161	a modified soundex algorithm
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	162
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	163	miracode
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	164	another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	165
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	166	mySQLSoundex
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	167	another modified soundex algorithm used in mySQL.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	168
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	169	koelner phoneticCode
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	170	provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	171
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	172	Double metaphone
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	173	works with most european languages.
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	174
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	175	phonem
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	176	described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	177	from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	178
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	179	mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	180	Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	181
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	182	caverphone2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	183	better than soundex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	184
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	185	spanish phonetic code
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	186	an algorithm slightly adjusted to spanish names
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	187
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	188	More info for german readers is found in:
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	189	http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	190	"
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	191	!
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	192
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	193	sampleData
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	194	"
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	195	for the 50 most common german names, we get:
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	196
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	197	ext.
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	198	name soundex soundex metaphone phonet phonet2 phonix daitsch phonem koeln caverphone2 mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	199
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	200	müller M460 54600000 MLR MÜLA NILA M4000000 689000 MYLR 657 MLA1111111 MLR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	201	schmidt S530 25300000 SKMTT SHMIT ZNIT S5300000 463000 CMYD 862 SKMT111111 SCHMDT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	202	schneider S536 25360000 SKNTR SHNEIDA ZNEITA S5300000 463900 CNAYDR 8627 SKNTA11111 SCHNDR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	203	fischer F260 12600000 FSKR FISHA FIZA F8000000 749000 VYCR 387 FSKA111111 FSCHR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	204	weber W160 16000000 WBR WEBA FEBA $1000000 779000 VBR 317 WPA1111111 WBR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	205	meyer M600 56000000 MYR MEIA NEIA M0000000 619000 MAYR 67 MA11111111 MYR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	206	wagner W256 25600000 WKNR WAKNA FAKNA $2500000 756900 VACNR 3467 WKNA111111 WGNR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	207	schulz S420 24200000 SKLS SHULS ZULZ S4800000 484000 CULC 858 SKS1111111 SCHLZ
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	208	becker B260 12600000 BKR BEKA BEKA B2000000 759000 BCR 147 PKA1111111 BCKR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	209	hoffmann H155 15500000 HFMN HOFMAN UFNAN $7550000 576600 OVMAN 036 AFMN111111 HFMN
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	210	schäfer S16ß 21600000 SKFR SHEFA ZEFA S7000000 479000 CVR 837 SKFA111111 SCHFR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	211
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	212	\|cls\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	213
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	214	cls := MRAStringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	215	cls := SoundexStringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	216	cls := KoelnerPhoneticCodeStringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	217	cls := Caverphone2StringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	218	#('müller' 'schmidt' 'schneider' 'fischer' 'weber' 'meyer'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	219	'wagner' 'schulz' 'becker' 'hoffmann' 'schäfer')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	220	do:[:name \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	221	Transcript show:''''; show:name; show:''' -> '''; show:(cls encode:name); showCR:''''.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	222	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	223
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	224	KoelnerPhoneticCodeStringComparator encode:'Müller-Lüdenscheidt' -> '65752682'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	225	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	226	! !
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	227
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	228	!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	229
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	230	koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	231	"return a koelner phonetic code.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	232	The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	233	it returns simular strings for similar sounding words.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	234	There are some differences to soundex, though:
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	235	its length is not limited to 4, but depends on the length of the original string;
2207 6a98ae779773 * empty log message * Claus Gittinger <cg@exept.de> parents: 2197 diff changeset	236	it does not start with the first character of the input.
6a98ae779773 * empty log message * Claus Gittinger <cg@exept.de> parents: 2197 diff changeset	237	This algorithm is described by Postel 1969"
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	238
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	239	^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	240
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	241	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	242	#(
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	243	'Müller'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	244	'Miller'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	245	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	246	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	247	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	248	'Mülherr'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	249	'Myler'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	250	'Millar'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	251	'Myller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	252	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	253	'Müler'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	254	'Muehler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	255	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	256	'Müllerr'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	257	'Muehlherr'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	258	'Muellar'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	259	'Mueler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	260	'Mülleer'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	261	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	262	'Nüller'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	263	'Nyller'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	264	'Niler'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	265	'Czerny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	266	'Tscherny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	267	'Czernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	268	'Tschernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	269	'Schernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	270	'Scherny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	271	'Scherno'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	272	'Czerne'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	273	'Zerny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	274	'Tzernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	275	'Breschnew'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	276	) do:[:w \|
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	277	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	278	].
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	279	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	280
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	281	"
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	282	PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	283	PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	284	PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	285	PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	286	PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	287	PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	288	PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	289	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	290	!
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	291
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	292	miracodeCodeOf:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	293	"return a miracode soundex phonetic code or nil.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	294	Miracode is a slightly modified soundex algorithm.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	295	Notice that there are better algorithms around (doubleMetaphone) "
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	296
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	297	^ (MiracodeStringComparator new phoneticStringsFor:aString) first
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	298
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	299	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	300	PhoneticStringUtilities miracodeCodeOf:'claus'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	301	PhoneticStringUtilities miracodeCodeOf:'clause'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	302	PhoneticStringUtilities miracodeCodeOf:'close'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	303	PhoneticStringUtilities miracodeCodeOf:'smalltalk'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	304	PhoneticStringUtilities miracodeCodeOf:'smaltalk'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	305	PhoneticStringUtilities miracodeCodeOf:'smaltak'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	306	PhoneticStringUtilities miracodeCodeOf:'smaltok'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	307	PhoneticStringUtilities miracodeCodeOf:'smoltok'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	308	PhoneticStringUtilities miracodeCodeOf:'aa'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	309	PhoneticStringUtilities miracodeCodeOf:'by'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	310	PhoneticStringUtilities miracodeCodeOf:'bab'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	311	PhoneticStringUtilities miracodeCodeOf:'bob'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	312	PhoneticStringUtilities miracodeCodeOf:'bop'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	313	PhoneticStringUtilities miracodeCodeOf:'pub'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	314	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	315
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	316	"Created: / 28-07-2017 / 15:32:41 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	317	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	318
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	319	mySQLSoundexCodeOf:aString
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	320	"return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	321	(no 4char limitation; different order of duplicate vowel vs. duplicate code elimination).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	322	Notice that there are better algorithms around (doubleMetaphone) "
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	323
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	324	^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	325
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	326	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	327	#(
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	328	'Müller'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	329	'Miller'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	330	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	331	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	332	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	333	'Mülherr'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	334	'Myler'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	335	'Millar'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	336	'Myller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	337	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	338	'Müler'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	339	'Muehler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	340	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	341	'Müllerr'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	342	'Muehlherr'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	343	'Muellar'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	344	'Mueler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	345	'Mülleer'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	346	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	347	'Nüller'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	348	'Nyller'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	349	'Niler'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	350	'Czerny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	351	'Tscherny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	352	'Czernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	353	'Tschernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	354	'Schernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	355	'Scherny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	356	'Scherno'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	357	'Czerne'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	358	'Zerny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	359	'Tzernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	360	'Breschnew'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	361	) do:[:w \|
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	362	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	363	].
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	364	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	365
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	366	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	367	PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	368	PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	369	PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	370	PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	371	PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	372	PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'.
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	373	PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	374	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	375
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	376	"Modified (comment): / 28-07-2017 / 15:34:03 / cg"
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	377	!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	378
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	379	soundexCodeOf:aString
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	380	"return a soundex phonetic code or nil.
2207 6a98ae779773 * empty log message * Claus Gittinger <cg@exept.de> parents: 2197 diff changeset	381	Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	382	tool when searching for words where the correct spelling is unknown.
4194 12b5e3e2219b #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4184 diff changeset	383	(read Knuth or search the web if you don't know what a soundex code is).
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	384	Caveat: 'similar sounding words' means: 'similar sounding in english'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	385	Notice that there are better algorithms around (doubleMetaphone) "
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	386
2210 9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	387	^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	388
2210 9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	389	"/ old code - now use code in private class...
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	390	"/ \|inStream codeStream ch last lch codeLength codes code lastCode\|
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	391	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	392	"/ inStream := aString readStream.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	393	"/ inStream skipSeparators.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	394	"/ inStream atEnd ifTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	395	"/ ^ nil
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	396	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	397	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	398	"/ ch := inStream next.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	399	"/ ch isLetter ifFalse:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	400	"/ ^ nil
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	401	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	402	"/ codeLength := 0.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	403	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	404	"/ codes := Dictionary new.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	405	"/ codes atAll:'bpfv' put:$1.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	406	"/ codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	407	"/ codes atAll:'dt' put:$3.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	408	"/ codes atAll:'l' put:$4.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	409	"/ codes atAll:'nm' put:$5.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	410	"/ codes atAll:'r' put:$6.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	411	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	412	"/ codeStream := WriteStream on:(String new:4).
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	413	"/ codeStream nextPut:(ch asUppercase).
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	414	"/ last := ch asLowercase.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	415	"/ lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	416	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	417	"/ [inStream atEnd] whileFalse:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	418	"/ ch := inStream next.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	419	"/ lch := ch asLowercase.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	420	"/ lch = last ifFalse:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	421	"/ last := lch.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	422	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	423	"/ code := codes at:lch ifAbsent:nil.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	424	"/ (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	425	"/ codeLength < 3 ifTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	426	"/ codeStream nextPut:code.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	427	"/ codeLength := codeLength + 1.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	428	"/ codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	429	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	430	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	431	"/ lastCode := code.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	432	"/ ]
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	433	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	434	"/ [ codeLength < 3 ] whileTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	435	"/ codeStream nextPut:$0.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	436	"/ codeLength := codeLength + 1.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	437	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	438	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	439	"/ ^ codeStream contents
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	440
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	441	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	442	PhoneticStringUtilities soundexCodeOf:'claus'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	443	PhoneticStringUtilities soundexCodeOf:'clause'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	444	PhoneticStringUtilities soundexCodeOf:'close'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	445	PhoneticStringUtilities soundexCodeOf:'smalltalk'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	446	PhoneticStringUtilities soundexCodeOf:'smaltalk'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	447	PhoneticStringUtilities soundexCodeOf:'smaltak'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	448	PhoneticStringUtilities soundexCodeOf:'smaltok'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	449	PhoneticStringUtilities soundexCodeOf:'smoltok'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	450	PhoneticStringUtilities soundexCodeOf:'aa'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	451	PhoneticStringUtilities soundexCodeOf:'by'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	452	PhoneticStringUtilities soundexCodeOf:'bab'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	453	PhoneticStringUtilities soundexCodeOf:'bob'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	454	PhoneticStringUtilities soundexCodeOf:'bop'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	455	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	456
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	457	"Modified (comment): / 28-07-2017 / 15:33:53 / cg"
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	458	! !
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	459
3648 fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	460	!PhoneticStringUtilities class methodsFor:'queries'!
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	461
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	462	isUtilityClass
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	463	^ self == PhoneticStringUtilities
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	464	! !
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	465
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	466	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	467
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	468	defaultClass
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	469	^SoundexStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	470	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	471
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	472	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'documentation'!
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	473
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	474	documentation
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	475	"
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	476	abstract superclass for various phonetic comparators.
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	477	They returns similar strings for similar sounding words, which can be used
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	478	to find similar sounding words in a search list.
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	479
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	480	Notice, that some comparators are better for particular languages.
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	481	"
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	482	!
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	483
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	484	examples
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	485	"
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	486	PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	487	does:'miller' soundLike:'miler'.
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	488
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	489	PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	490	does:'miller' soundLike:'milner'.
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	491
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	492	PhoneticStringUtilities::SoundexStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	493	does:'müller' soundLike:'mueller'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	494
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	495	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	496	does:'müller' soundLike:'mueller'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	497	"
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	498	! !
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	499
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	500	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	501
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	502	new
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	503	^ self basicNew initialize.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	504	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	505
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	506	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'queries'!
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	507
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	508	isAbstract
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	509	^ self == PhoneticStringUtilities::PhoneticStringComparator
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	510	! !
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	511
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	512	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'utilities'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	513
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	514	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	515	^ (self new phoneticStringsFor:word) first
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	516
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	517	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	518	SoundexStringComparator encode:'Fischer' -> 'F260'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	519	Caverphone2StringComparator encode:'Fischer' -> 'FSKA111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	520	KoelnerPhoneticCodeStringComparator encode:'Fischer' -> '387'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	521	MRAStringComparator encode:'Fischer' -> 'FSCHR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	522	SpanishPhoneticCodeStringComparator encode:'Fischer' -> '24429'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	523	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	524
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	525	"Created: / 02-08-2017 / 01:15:50 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	526	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	527
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	528	!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	529
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	530	does:aString soundLike:anotherString
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	531	\|translations1 translations2\|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	532
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	533	translations1 := self phoneticStringsFor:aString.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	534	translations2 := self phoneticStringsFor:anotherString.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	535
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	536	^ translations1 contains:[:t1 \|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	537	translations2 contains:[:t2 \| t1 = t2]]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	538
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	539	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	540	PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	541	does:'miller' soundLike:'miler'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	542
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	543	PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	544	does:'miller' soundLike:'milner'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	545
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	546	PhoneticStringUtilities::SoundexStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	547	does:'müller' soundLike:'mueller'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	548
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	549	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	550	does:'müller' soundLike:'mueller'.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	551	"
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	552
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	553	"Modified (comment): / 13-07-2017 / 17:51:43 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	554	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	555
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	556	phoneticStringsFor: aString
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	557	"Should answer an array of alternate phonetic strings for the given input string."
4485 735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	558
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	559	self subclassResponsibility
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	560
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	561	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	562	(PhoneticStringUtilities::SoundexStringComparator new
4485 735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	563	phoneticStringsFor:'miller') first
735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	564
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	565	'miller' asSoundexCode
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	566	"
4485 735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	567
735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	568	"Modified (comment): / 27-07-2017 / 15:07:59 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	569	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	570
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	571	!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	572
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	573	initialize
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	574	"Invoked when a new instance is created."
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	575
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	576	"/ please change as required (and remove this comment)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	577
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	578	"/ super initialize. -- commented since inherited method does nothing
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	579	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	580
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	581	!PhoneticStringUtilities::DaitchMokotoffStringComparator class methodsFor:'documentation'!
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	582
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	583	documentation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	584	"
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	585	self encode:'AUERBACH' -> 097400, 097500
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	586
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	587	Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	588	The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	589	yielding greater accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	590	but differences in spelling.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	591
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	592	The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	593	- coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	594	- the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	595	- rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	596	- multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	597
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	598	This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	599	encode:aString - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	600	phoneticStringsFor:String - branching enabled, all codes will be returned, separated by '\|'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	601
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	602	[see also:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	603	'Wikipedia - Daitch-Mokotoff Soundex'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	604	http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	605
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	606	'Avotaynu - Soundexing and Genealogy'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	607	http://www.avotaynu.com/soundex.htm
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	608	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	609	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	610
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	611	javaCode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	612	"<<END
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	613	/*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	614	* Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	615	* contributor license agreements. See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	616	* this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	617	* The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	618	* (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	619	* the License. You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	620	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	621	* http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	622	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	623	* Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	624	* distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	625	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	626	* See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	627	* limitations under the License.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	628	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	629	package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	630
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	631	import org.apache.commons.codec.CharEncoding;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	632	import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	633	import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	634
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	635	import java.io.InputStream;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	636	import java.util.*;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	637
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	638	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	639	* Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	640	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	641	* The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	642	* accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	643	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	644	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	645	* The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	646	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	647	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	648	* <li>coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	649	* <li>the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	650	* <li>rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	651	* <li>multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	652	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	653	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	654	* This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	655	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	656	* <li>{@link #encode(String)} - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	657	* <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '\|'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	658	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	659	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	660	* Note: this implementation has additional branching rules compared to the original description of the algorithm. The
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	661	* rules can be customized by overriding the default rules contained in the resource file
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	662	* {@code org/apache/commons/codec/language/dmrules.txt}.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	663	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	664	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	665	* This class is thread-safe.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	666	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	667	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	668	* @see Soundex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	669	* @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	670	* @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	671	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	672	* @version $Id$
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	673	* @since 1.10
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	674	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	675	public class DaitchMokotoffSoundex implements StringEncoder {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	676
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	677	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	678	* Inner class representing a branch during DM soundex encoding.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	679	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	680	private static final class Branch {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	681	private final StringBuilder builder;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	682	private String cachedString;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	683	private String lastReplacement;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	684
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	685	private Branch() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	686	builder = new StringBuilder();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	687	lastReplacement = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	688	cachedString = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	689	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	690
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	691	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	692	* Creates a new branch, identical to this branch.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	693	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	694	* @return a new, identical branch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	695	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	696	public Branch createBranch() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	697	final Branch branch = new Branch();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	698	branch.builder.append(toString());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	699	branch.lastReplacement = this.lastReplacement;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	700	return branch;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	701	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	702
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	703	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	704	public boolean equals(final Object other) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	705	if (this == other) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	706	return true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	707	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	708	if (!!(other instanceof Branch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	709	return false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	710	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	711
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	712	return toString().equals(((Branch) other).toString());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	713	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	714
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	715	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	716	* Finish this branch by appending '0's until the maximum code length has been reached.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	717	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	718	public void finish() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	719	while (builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	720	builder.append('0');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	721	cachedString = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	722	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	723	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	724
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	725	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	726	public int hashCode() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	727	return toString().hashCode();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	728	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	729
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	730	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	731	* Process the next replacement to be added to this branch.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	732	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	733	* @param replacement
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	734	* the next replacement to append
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	735	* @param forceAppend
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	736	* indicates if the default processing shall be overridden
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	737	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	738	public void processNextReplacement(final String replacement, final boolean forceAppend) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	739	final boolean append = lastReplacement == null \|\| !!lastReplacement.endsWith(replacement) \|\| forceAppend;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	740
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	741	if (append && builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	742	builder.append(replacement);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	743	// remove all characters after the maximum length
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	744	if (builder.length() > MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	745	builder.delete(MAX_LENGTH, builder.length());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	746	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	747	cachedString = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	748	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	749
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	750	lastReplacement = replacement;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	751	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	752
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	753	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	754	public String toString() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	755	if (cachedString == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	756	cachedString = builder.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	757	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	758	return cachedString;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	759	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	760	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	761
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	762	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	763	* Inner class for storing rules.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	764	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	765	private static final class Rule {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	766	private final String pattern;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	767	private final String[] replacementAtStart;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	768	private final String[] replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	769	private final String[] replacementDefault;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	770
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	771	protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	772	final String replacementDefault) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	773	this.pattern = pattern;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	774	this.replacementAtStart = replacementAtStart.split("\\\|");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	775	this.replacementBeforeVowel = replacementBeforeVowel.split("\\\|");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	776	this.replacementDefault = replacementDefault.split("\\\|");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	777	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	778
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	779	public int getPatternLength() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	780	return pattern.length();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	781	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	782
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	783	public String[] getReplacements(final String context, final boolean atStart) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	784	if (atStart) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	785	return replacementAtStart;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	786	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	787
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	788	final int nextIndex = getPatternLength();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	789	final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	790	if (nextCharIsVowel) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	791	return replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	792	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	793
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	794	return replacementDefault;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	795	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	796
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	797	private boolean isVowel(final char ch) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	798	return ch == 'a' \|\| ch == 'e' \|\| ch == 'i' \|\| ch == 'o' \|\| ch == 'u';
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	799	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	800
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	801	public boolean matches(final String context) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	802	return context.startsWith(pattern);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	803	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	804
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	805	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	806	public String toString() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	807	return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart),
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	808	Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault));
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	809	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	810	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	811
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	812	private static final String COMMENT = "//";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	813	private static final String DOUBLE_QUOTE = "\"";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	814
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	815	private static final String MULTILINE_COMMENT_END = "*/";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	816
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	817	private static final String MULTILINE_COMMENT_START = "/*";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	818
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	819	/** The resource file containing the replacement and folding rules */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	820	private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	821
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	822	/** The code length of a DM soundex value. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	823	private static final int MAX_LENGTH = 6;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	824
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	825	/** Transformation rules indexed by the first character of their pattern. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	826	private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	827
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	828	/** Folding rules. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	829	private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	830
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	831	static {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	832	final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	833	if (rulesIS == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	834	throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	835	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	836
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	837	final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	838	parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	839	scanner.close();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	840
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	841	// sort RULES by pattern length in descending order
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	842	for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	843	final List<Rule> ruleList = rule.getValue();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	844	Collections.sort(ruleList, new Comparator<Rule>() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	845	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	846	public int compare(final Rule rule1, final Rule rule2) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	847	return rule2.getPatternLength() - rule1.getPatternLength();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	848	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	849	});
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	850	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	851	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	852
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	853	private static void parseRules(final Scanner scanner, final String location,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	854	final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	855	int currentLine = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	856	boolean inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	857
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	858	while (scanner.hasNextLine()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	859	currentLine++;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	860	final String rawLine = scanner.nextLine();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	861	String line = rawLine;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	862
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	863	if (inMultilineComment) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	864	if (line.endsWith(MULTILINE_COMMENT_END)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	865	inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	866	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	867	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	868	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	869
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	870	if (line.startsWith(MULTILINE_COMMENT_START)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	871	inMultilineComment = true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	872	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	873	// discard comments
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	874	final int cmtI = line.indexOf(COMMENT);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	875	if (cmtI >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	876	line = line.substring(0, cmtI);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	877	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	878
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	879	// trim leading-trailing whitespace
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	880	line = line.trim();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	881
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	882	if (line.length() == 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	883	continue; // empty lines can be safely skipped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	884	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	885
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	886	if (line.contains("=")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	887	// folding
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	888	final String[] parts = line.split("=");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	889	if (parts.length !!= 2) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	890	throw new IllegalArgumentException("Malformed folding statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	891	" parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	892	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	893	final String leftCharacter = parts[0];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	894	final String rightCharacter = parts[1];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	895
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	896	if (leftCharacter.length() !!= 1 \|\| rightCharacter.length() !!= 1) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	897	throw new IllegalArgumentException("Malformed folding statement - " +
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	898	"patterns are not single characters: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	899	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	900
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	901	asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0));
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	902	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	903	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	904	// rule
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	905	final String[] parts = line.split("\\s+");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	906	if (parts.length !!= 4) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	907	throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	908	" parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	909	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	910	try {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	911	final String pattern = stripQuotes(parts[0]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	912	final String replacement1 = stripQuotes(parts[1]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	913	final String replacement2 = stripQuotes(parts[2]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	914	final String replacement3 = stripQuotes(parts[3]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	915
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	916	final Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	917	final char patternKey = r.pattern.charAt(0);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	918	List<Rule> rules = ruleMapping.get(patternKey);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	919	if (rules == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	920	rules = new ArrayList<Rule>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	921	ruleMapping.put(patternKey, rules);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	922	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	923	rules.add(r);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	924	} catch (final IllegalArgumentException e) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	925	throw new IllegalStateException(
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	926	"Problem parsing line '" + currentLine + "' in " + location, e);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	927	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	928	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	929	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	930	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	931	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	932	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	933
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	934	private static String stripQuotes(String str) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	935	if (str.startsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	936	str = str.substring(1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	937	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	938
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	939	if (str.endsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	940	str = str.substring(0, str.length() - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	941	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	942
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	943	return str;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	944	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	945
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	946	/** Whether to use ASCII folding prior to encoding. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	947	private final boolean folding;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	948
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	949	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	950	* Creates a new instance with ASCII-folding enabled.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	951	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	952	public DaitchMokotoffSoundex() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	953	this(true);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	954	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	955
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	956	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	957	* Creates a new instance.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	958	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	959	* With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	960	* è -> e.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	961	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	962	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	963	* @param folding
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	964	* if ASCII-folding shall be performed before encoding
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	965	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	966	public DaitchMokotoffSoundex(final boolean folding) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	967	this.folding = folding;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	968	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	969
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	970	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	971	* Performs a cleanup of the input string before the actual soundex transformation.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	972	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	973	* Removes all whitespace characters and performs ASCII folding if enabled.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	974	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	975	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	976	* @param input
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	977	* the input string to cleanup
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	978	* @return a cleaned up string
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	979	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	980	private String cleanup(final String input) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	981	final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	982	for (char ch : input.toCharArray()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	983	if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	984	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	985	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	986
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	987	ch = Character.toLowerCase(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	988	if (folding && FOLDINGS.containsKey(ch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	989	ch = FOLDINGS.get(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	990	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	991	sb.append(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	992	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	993	return sb.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	994	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	995
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	996	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	997	* Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	998	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	999	* This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1000	* EncoderException if the supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1001	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1002	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1003	* @see #soundex(String)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1004	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1005	* @param obj
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1006	* Object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1007	* @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1008	* supplied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1009	* @throws EncoderException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1010	* if the parameter supplied is not of type java.lang.String
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1011	* @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1012	* if a character is not mapped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1013	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1014	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1015	public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1016	if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1017	throw new EncoderException(
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1018	"Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1019	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1020	return encode((String) obj);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1021	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1022
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1023	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1024	* Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1025	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1026	* @see #soundex(String)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1027	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1028	* @param source
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1029	* A String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1030	* @return A DM Soundex code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1031	* @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1032	* if a character is not mapped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1033	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1034	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1035	public String encode(final String source) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1036	if (source == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1037	return null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1038	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1039	return soundex(source, false)[0];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1040	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1041
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1042	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1043	* Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1044	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1045	* In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1046	* separated by '\|'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1047	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1048	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1049	* Example: the name "AUERBACH" is encoded as both
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1050	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1051	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1052	* <li>097400</li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1053	* <li>097500</li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1054	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1055	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1056	* Thus the result will be "097400\|097500".
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1057	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1058	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1059	* @param source
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1060	* A String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1061	* @return A string containing a set of DM Soundex codes corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1062	* @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1063	* if a character is not mapped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1064	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1065	public String soundex(final String source) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1066	final String[] branches = soundex(source, true);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1067	final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1068	int index = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1069	for (final String branch : branches) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1070	sb.append(branch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1071	if (++index < branches.length) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1072	sb.append('\|');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1073	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1074	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1075	return sb.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1076	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1077
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1078	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1079	* Perform the actual DM Soundex algorithm on the input string.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1080	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1081	* @param source
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1082	* A String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1083	* @param branching
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1084	* If branching shall be performed
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1085	* @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1086	* selected branching mode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1087	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1088	private String[] soundex(final String source, final boolean branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1089	if (source == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1090	return null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1091	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1092
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1093	final String input = cleanup(source);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1094
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1095	final Set<Branch> currentBranches = new LinkedHashSet<Branch>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1096	currentBranches.add(new Branch());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1097
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1098	char lastChar = '\0';
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1099	for (int index = 0; index < input.length(); index++) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1100	final char ch = input.charAt(index);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1101
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1102	// ignore whitespace inside a name
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1103	if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1104	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1105	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1106
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1107	final String inputContext = input.substring(index);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1108	final List<Rule> rules = RULES.get(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1109	if (rules == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1110	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1111	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1112
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1113	// use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1114	@SuppressWarnings("unchecked")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1115	final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1116
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1117	for (final Rule rule : rules) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1118	if (rule.matches(inputContext)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1119	if (branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1120	nextBranches.clear();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1121	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1122	final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1123	final boolean branchingRequired = replacements.length > 1 && branching;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1124
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1125	for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1126	for (final String nextReplacement : replacements) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1127	// if we have multiple replacements, always create a new branch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1128	final Branch nextBranch = branchingRequired ? branch.createBranch() : branch;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1129
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1130	// special rule: occurrences of mn or nm are treated differently
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1131	final boolean force = (lastChar == 'm' && ch == 'n') \|\| (lastChar == 'n' && ch == 'm');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1132
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1133	nextBranch.processNextReplacement(nextReplacement, force);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1134
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1135	if (branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1136	nextBranches.add(nextBranch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1137	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1138	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1139	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1140	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1141	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1142
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1143	if (branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1144	currentBranches.clear();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1145	currentBranches.addAll(nextBranches);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1146	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1147	index += rule.getPatternLength() - 1;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1148	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1149	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1150	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1151
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1152	lastChar = ch;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1153	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1154
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1155	final String[] result = new String[currentBranches.size()];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1156	int index = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1157	for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1158	branch.finish();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1159	result[index++] = branch.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1160	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1161
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1162	return result;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1163	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1164	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1165	END>>"
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	1166	! !
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	1167
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1168	!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1169
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1170	copyright
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1171	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1172	Copyright (c) 2002-2004 Robert Jarvis
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1173
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1174	Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1175	files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use,
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1176	copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1177	the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1178
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1179	The above copyright notice and this permission notice shall be included in all copies or substantial
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1180	portions of the Software.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1181
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1182	THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1183	INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1184	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1185	WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1186	USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1187	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1188	! !
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1189
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1190	!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1191
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1192	isSlavoGermanic:aString
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1193	^ #('w' 'k' 'cz' 'witz' 'ä' 'ö' 'ü' 'ß') contains:[:sub \| aString includesString:sub]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1194
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1195	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1196	self isSlavoGermanic:'walter'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1197	self isSlavoGermanic:'horowitz'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1198	self isSlavoGermanic:'müller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1199	self isSlavoGermanic:'miller'
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1200	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1201
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1202	"Modified: / 28-07-2017 / 10:14:38 / cg"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1203	! !
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1204
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1205	!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1206
3685 01ebbac96899 #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3648 diff changeset	1207	documentation
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1208	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1209	The Double Metaphone algorithm
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1210
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1211	see internet: https://en.wikipedia.org/wiki/Metaphone
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1212	"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1213	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1214
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1215	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1216
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1217	currentIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1218	^currentIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1219	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1220
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1221	currentIndex: anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1222	currentIndex := anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1223	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1224
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1225	inputKey
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1226	^inputKey
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1227	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1228
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1229	inputKey: aString
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1230	inputKey := aString asUppercase
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1231	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1232
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1233	primaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1234	^primaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1235	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1236
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1237	primaryTranslation: anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1238	primaryTranslation := anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1239	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1240
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1241	secondaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1242	^secondaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1243	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1244
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1245	secondaryTranslation: anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1246	secondaryTranslation := anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1247	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1248
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1249	skipCount
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1250	^skipCount
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1251	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1252
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1253	skipCount: anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1254	skipCount := anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1255	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1256
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1257	startIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1258	^startIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1259	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1260
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1261	startIndex: anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1262	startIndex := anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1263	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1264
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1265	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1266
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1267	phoneticStringsFor:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1268	"Private - Answers an array of alternate phonetic strings for the given input string."
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1269
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1270	inputKey := aString.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1271	self performInitialProcessing.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1272	self processRemainingCharacters.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1273	^ Array with:primaryTranslation with:secondaryTranslation
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1274
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1275	"Modified (format): / 28-07-2017 / 11:25:02 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1276	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1277
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1278	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1279
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1280	initialize
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1281	super initialize.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1282
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1283	startIndex := 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1284	primaryTranslation := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1285	secondaryTranslation := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1286	skipCount := 0.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1287	currentIndex := 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1288
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1289	"Modified: / 28-07-2017 / 11:18:44 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1290	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1291
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1292	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1293
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1294	addPrimaryTranslation:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1295	primaryTranslation := (primaryTranslation , aString)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1296
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1297	"Modified: / 28-07-2017 / 11:19:09 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1298	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1299
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1300	addSecondaryTranslation:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1301	secondaryTranslation := secondaryTranslation , aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1302
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1303	"Modified: / 28-07-2017 / 11:17:11 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1304	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1305
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1306	isSlavoGermanic: aString
4521 cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1307	^((aString includesAny: 'WK') or:
cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1308	[ (aString indexOfSubCollection: 'CZ' startingAt: 1) >= 1 ]) or:
cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1309	[ (aString indexOfSubCollection: 'WITZ' startingAt: 1) >= 1 ]
cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1310
cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1311	"Modified: / 09-10-2017 / 17:10:46 / stefan"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1312	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1313
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1314	keyAt: anInteger
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1315	(anInteger between:1 and:inputKey size) ifTrue: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1316	^ inputKey at: anInteger
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1317	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1318	^ Character space
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1319
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1320	"Modified: / 28-07-2017 / 11:38:30 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1321	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1322
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1323	keyLeftString: lengthInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1324	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1325	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1326
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1327	keyMidString: lengthInteger from: fromInteger
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1328	\| result from len additionalSpaces \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1329
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1330	result := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1331	from := fromInteger.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1332	len := lengthInteger.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1333
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1334	"Prepend spaces if caller is requesting characters from before the start of the string"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1335
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1336	[ from < 1 ] whileTrue:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1337	[ result := result, ' '.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1338	from := from + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1339	len := len - 1 ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1340
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1341	from + len - 1 > inputKey size
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1342	ifTrue:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1343	[ additionalSpaces := from + len - 1 - inputKey size.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1344	len := inputKey size - from + 1 ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1345	ifFalse: [ additionalSpaces := 0 ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1346
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1347	result := result, (inputKey copyFrom: from to: (from+len-1 min: inputKey size)).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1348
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1349	[ additionalSpaces > 0 ] whileTrue:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1350	[ result := result, ' '.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1351	additionalSpaces := additionalSpaces - 1 ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1352
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1353	^result
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1354
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1355	"Modified: / 28-07-2017 / 11:20:43 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1356	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1357
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1358	keyRightString: lengthInteger
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1359	^self keyMidString: lengthInteger from: inputKey size - lengthInteger + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1360
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1361	"Modified: / 28-07-2017 / 11:20:51 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1362	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1363
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1364	performInitialProcessing
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1365	inputKey size > 1 ifTrue:[
33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1366	(#( 'GN' 'KN' 'PN' 'WR' 'PS' ) includes:(inputKey copyFrom:1 to:2)) ifTrue:[
33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1367	startIndex := startIndex + 1
33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1368	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1369	].
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1370
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1371	(self keyAt:1) = $X ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1372	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1373	addPrimaryTranslation:'S';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1374	addSecondaryTranslation:'S'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1375	startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1376	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1377	(self keyAt:1) isVowel ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1378	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1379	addPrimaryTranslation:'A';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1380	addSecondaryTranslation:'A'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1381	startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1382	]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1383
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1384	"Modified: / 01-08-2017 / 19:29:19 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1385	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1386
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1387	processB
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1388	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1389	addPrimaryTranslation: 'P';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1390	addSecondaryTranslation: 'P'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1391
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1392	(self keyAt: (currentIndex + 1)) == $B ifTrue: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1393	skipCount := skipCount + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1394	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1395
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1396	"Modified: / 28-07-2017 / 11:26:03 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1397	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1398
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1399	processC
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1400	"i"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1401	((((currentIndex >= 3
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1402	and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1403	and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1404	and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1405	and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1406	or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1407	and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1408	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1409	[ self addPrimaryTranslation: 'K'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1410	self addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1411	skipCount := skipCount + 2.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1412	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1413
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1414	"ii"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1415	(inputKey beginsWith: 'CAESAR')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1416	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1417	[ self addPrimaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1418	self addSecondaryTranslation: 'S'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1419	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1420	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1421
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1422	"iii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1423	(self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1424	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1425	[ self addPrimaryTranslation: 'K'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1426	self addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1427	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1428	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1429
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1430	"iv"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1431	(self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1432	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1433	[ (currentIndex > 1 "a"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1434	and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1435	ifTrue: [ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1436	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1437	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1438	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1439	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1440
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1441	(currentIndex = 1 "b"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1442	and: [ (inputKey size > 5 and: [(inputKey copyFrom: 1 to: 6) = 'CHARAC'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1443	or: [ (inputKey copyFrom: 1 to: 6) = 'CHARIS' ]] )
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1444	or: [inputKey size > 4 and: [ ((((inputKey copyFrom: 1 to: 4) = 'CHOR'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1445	or: [ (inputKey copyFrom: 1 to: 4) = 'CHYM' ])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1446	or: [ (inputKey copyFrom: 1 to: 4) = 'CHIA' ])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1447	or: [ (inputKey copyFrom: 1 to: 4) = 'CHEM' ])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1448	and: [ (inputKey copyFrom: 1 to: 4) ~= 'CHORE' ] ] ] ])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1449	ifTrue: [ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1450	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1451	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1452	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1453	^self ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1454
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1455	(((((#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: 4)) "c"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1456	or: [ (inputKey copyFrom: 1 to: 3) = 'SCH' ])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1457	or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1458	includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1459	or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1460	or: [ ((currentIndex = 1)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1461	or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1462	and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1463	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1464	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1465	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1466	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1467	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1468	^self ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1469	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1470	[ currentIndex > 1
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1471	ifTrue:
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1472	[ (inputKey copyFrom: 1 to: 2) = 'MC'
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1473	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1474	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1475	addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1476	addSecondaryTranslation: 'K' ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1477	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1478	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1479	addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1480	addSecondaryTranslation: 'K' ] ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1481	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1482	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1483	addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1484	addSecondaryTranslation: 'X' ].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1485	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1486	^self ] ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1487
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1488	"v"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1489	(self keyAt: currentIndex+1) = $Z
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1490	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1491	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1492	addPrimaryTranslation: 'S';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1493	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1494	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1495	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1496
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1497	"vi"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1498	(self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1499	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1500	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1501	addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1502	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1503	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1504	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1505
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1506	"vii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1507	((self keyAt: currentIndex+1) = $C
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1508	and: [ ((currentIndex = 2)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1509	and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1510	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1511	[ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1512	and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1513	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1514	[ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1515	or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1516	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1517	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1518	addPrimaryTranslation: 'KS';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1519	addSecondaryTranslation: 'KS'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1520	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1521	^self ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1522	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1523	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1524	addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1525	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1526	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1527	^self ] ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1528	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1529	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1530	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1531	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1532	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1533	^self ] ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1534
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1535	"viii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1536	(#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1537	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1538	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1539	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1540	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1541	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1542	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1543
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1544	"ix"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1545	(#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1546	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1547	[ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1548	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1549	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1550	addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1551	addSecondaryTranslation: 'X' ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1552	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1553	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1554	addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1555	addSecondaryTranslation: 'S'].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1556	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1557	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1558
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1559	"x"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1560	self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1561	addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1562	addSecondaryTranslation: 'K'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1563
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1564	"xi"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1565	(#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1566	ifTrue:
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1567	[ skipCount := skipCount + 2 ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1568	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1569	[ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1570	and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1571	ifTrue: [ skipCount := skipCount + 1] ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1572
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1573	"Modified: / 28-07-2017 / 11:29:11 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1574	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1575
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1576	processCedille
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1577	self
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1578	addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1579	addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1580	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1581
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1582	processD
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1583	"i"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1584	(self keyAt: currentIndex+1) = $G
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1585	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1586	[ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1587	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1588	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1589	addPrimaryTranslation: 'J';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1590	addSecondaryTranslation: 'J'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1591	skipCount := skipCount + 2.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1592	^self ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1593	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1594	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1595	addPrimaryTranslation: 'TK';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1596	addSecondaryTranslation: 'TK'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1597	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1598	^self ] ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1599
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1600	"ii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1601	(#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1602	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1603	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1604	addPrimaryTranslation: 'T';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1605	addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1606	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1607	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1608
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1609	"iii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1610	self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1611	addPrimaryTranslation: 'T';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1612	addSecondaryTranslation: 'T'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1613
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1614	"Modified: / 28-07-2017 / 11:27:39 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1615	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1616
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1617	processF
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1618	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1619	addPrimaryTranslation: 'F';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1620	addSecondaryTranslation: 'F'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1621
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1622	(self keyAt: currentIndex+1) = $F
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1623	ifTrue: [ skipCount := skipCount + 1 ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1624
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1625	"Modified (format): / 28-07-2017 / 11:29:21 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1626	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1627
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1628	processG
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1629	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1630	case 'G':
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1631	if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1632	{"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1633	\| word \|
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1634	(self keyAt: currentIndex + 1) = $H
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1635	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1636	"if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1637
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1638	(currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1639	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1640	" {
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1641	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1642	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1643	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1644	}"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1645
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1646	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1647	addPrimaryTranslation: 'K';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1648	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1649	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1650	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1651	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1652
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1653	"if(current < 3)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1654	{"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1655
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1656	currentIndex < 4
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1657	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1658
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1659	" //'ghislane', ghiradelli
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1660	if(current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1661	{ "
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1662	currentIndex = 1
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1663	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1664	"if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1665
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1666	(self keyAt: currentIndex + 2) = $I
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1667	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1668	"MetaphAdd(J);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1669	self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1670	addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1671	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1672	"MetaphAdd(K);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1673	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1674	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1675	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1676	" current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1677	break;"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1678	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1679	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1680	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1681	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1682
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1683	" //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1684	if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1685	//e.g., 'bough'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1686	OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1687	//e.g., 'broughton'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1688	OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1689	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1690	(((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1691	or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1692	or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1693	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1694	"current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1695	break;"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1696	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1697	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1698	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1699	" //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1700	if((current > 2)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1701	AND (GetAt(current - 1) == 'U')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1702	AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1703	(currentIndex > 3 and: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1704	((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1705	#($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1706	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1707	]) ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1708	"MetaphAdd(F);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1709	self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1710	addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1711	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1712	" if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1713	MetaphAdd(K);"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1714	(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1715	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1716	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1717	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1718	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1719	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1720	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1721	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1722	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1723	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1724	"if(GetAt(current + 1) == 'N')"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1725	(self keyAt: currentIndex + 1) = $N
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1726	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1727	"if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1728	(currentIndex = 2 and: [(inputKey at: 1) isVowel and: [(self isSlavoGermanic: inputKey) not]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1729	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1730	"MetaphAdd(KN, N);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1731	self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1732	addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1733	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1734	" //not e.g. 'cagney'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1735	if(!!StringAt((current + 2), 2, EY, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1736	AND (GetAt(current + 1) !!= 'Y')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1737	AND !!SlavoGermanic())"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1738	((inputKey size >= (currentIndex + 2)) and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1739	(inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: inputKey size)) ~= 'EY' and: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1740	(self keyAt: currentIndex + 1) ~= $Y and: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1741	(self isSlavoGermanic: inputKey) not
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1742	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1743	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1744	]) ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1745	self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1746	addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1747	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1748	self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1749	addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1750	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1751	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1752	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1753	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1754	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1755	" //'tagliaro'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1756	if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1757	((inputKey size >= (currentIndex + 3)) and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1758	(inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1759	(self isSlavoGermanic: inputKey) not]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1760	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1761	self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1762	addSecondaryTranslation: 'L'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1763	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1764	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1765	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1766	" //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1767	if((current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1768	AND ((GetAt(current + 1) == 'Y')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1769	OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1770	(currentIndex = 1 and: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1771	((self keyAt: currentIndex + 1) = $Y) or: [
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1772	(#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes:
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1773	(inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1774	]]) ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1775	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1776	addSecondaryTranslation: 'J'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1777	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1778	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1779	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1780	" // -ger-, -gy-
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1781	if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1782	AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1783	AND !!StringAt((current - 1), 1, E, I, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1784	AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1785	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1786	(((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: inputKey size)) = 'ER' or: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1787	((self keyAt: currentIndex + 1) = $Y)])
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1788	and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := inputKey copyFrom: 1 to: (6 min: inputKey size))) not)
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1789	and: [(self keyAt: currentIndex - 1) ~= $E
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1790	and: [(#('RGY' 'OGY') includes: (inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1791	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1792	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1793	addSecondaryTranslation: 'J'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1794	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1795	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1796	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1797
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1798	" // italian e.g, 'biaggi'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1799	if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1800	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1801	((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size)))])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1802	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1803	" //obvious germanic
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1804	if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1805	OR StringAt((current + 1), 2, ET, )) MetaphAdd(K);"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1806	word := (inputKey copyFrom: 1 to: 4).
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1807	((#('VAN ' 'VON ') includes: word) or: [(word copyFrom: 1 to: 3) = 'SCH' or: [(word copyFrom: 1 to: 2) = 'ET']])
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1808	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1809	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1810	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1811	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1812	" //always soft if french ending
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1813	if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1814	MetaphAdd(J);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1815	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1816	MetaphAdd(J, K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1817	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1818	break;"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1819	(((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: inputKey size)), ' ') copyFrom: 1 to: 4) = 'IER '
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1820	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1821	self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1822	addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1823	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1824	self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1825	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1826	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1827
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1828	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1829	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1830	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1831	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1832
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1833	" if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1834	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1835	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1836	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1837	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1838	break;"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1839
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1840	(self keyAt: (currentIndex + 1)) = $G
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1841	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1842	skipCount := skipCount + 1.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1843	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1844	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1845	addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1846
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1847	"Modified: / 28-07-2017 / 11:31:33 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1848	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1849
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1850	processH
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1851	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1852	case 'H':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1853	//only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1854	if(((current == 0) OR IsVowel(current - 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1855	AND IsVowel(current + 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1856	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1857	MetaphAdd(H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1858	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1859	}else//also takes care of 'HH'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1860	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1861	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1862	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1863
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1864	(((currentIndex = 1)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1865	or: [ (self keyAt: currentIndex - 1) isVowel])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1866	and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1867	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1868	self addPrimaryTranslation: 'H';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1869	addSecondaryTranslation: 'H'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1870	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1871	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1872	]
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1873
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1874	"Modified: / 28-07-2017 / 11:29:52 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1875	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1876
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1877	processJ
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1878	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1879	case 'J':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1880	//obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1881	if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1882	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1883	if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1884	MetaphAdd(H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1885	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1886	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1887	MetaphAdd(J, H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1888	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1889	current +=1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1890	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1891	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1892
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1893	if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1894	MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1895	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1896	//spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1897	if(IsVowel(current - 1)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1898	AND !!SlavoGermanic()
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1899	AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1900	MetaphAdd(J, H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1901	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1902	if(current == last)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1903	MetaphAdd(J, );
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1904	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1905	if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1906	AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1907	MetaphAdd(J);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1908
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1909	if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1910	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1911	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1912	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1913	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1914	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1915	\| currentWord firstWord nextLetter \|
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1916	currentWord := inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1917	firstWord := inputKey copyFrom: 1 to: (4 min: inputKey size).
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1918	nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1919	(currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1920	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1921	((currentIndex = 1 and: [inputKey size = 4 or: [inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1922	or: [firstWord = 'SAN '])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1923	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1924	self addPrimaryTranslation: 'H';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1925	addSecondaryTranslation: 'H'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1926	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1927	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1928	addSecondaryTranslation: 'H'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1929	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1930	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1931	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1932	(currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1933	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1934	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1935	addSecondaryTranslation: 'A'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1936	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1937	((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1938	and: [(self isSlavoGermanic: inputKey) not and: [nextLetter == $A or: [nextLetter == $O]]])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1939	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1940	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1941	addSecondaryTranslation: 'H'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1942	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1943	currentIndex = inputKey size
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1944	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1945	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1946	addSecondaryTranslation: ' '.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1947	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1948	((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1949	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1950	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1951	addSecondaryTranslation: 'J'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1952	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1953	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1954	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1955	].
3489 6ef5f530df03 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3488 diff changeset	1956	nextLetter == $J
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1957	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1958	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1959	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1960
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1961	"Modified: / 28-07-2017 / 11:31:41 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1962	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1963
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1964	processK
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1965	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1966	case 'K':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1967	if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1968	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1969	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1970	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1971	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1972	break;
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1973	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1974
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1975	(self keyAt: currentIndex + 1) = $K
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1976	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1977	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1978	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1979	self addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1980	addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1981
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1982	"Modified: / 28-07-2017 / 11:31:46 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1983	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1984
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1985	processL
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1986
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1987	"case 'L':
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1988	if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1989	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1990	//spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1991	if(((current == (length - 3))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1992	AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1993	OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1994	AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1995	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1996	MetaphAdd(L, );
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1997	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1998	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1999	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2000	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2001	}else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2002	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2003	MetaphAdd(L);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2004	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2005	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2006	\| currentWord \|
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2007	(self keyAt: currentIndex + 1) = $L
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2008	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2009	(((currentIndex = (inputKey size - 2))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2010	and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size))])])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2011	or: [((#('AS' 'OS') includes: (inputKey copyFrom: inputKey size - 1 to: inputKey size)) or: [#($A $O) includes: (self keyAt: inputKey size)]) and: [currentWord = 'ALLE']
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2012	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2013	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2014	self addPrimaryTranslation: 'L';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2015	addSecondaryTranslation: ' '.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2016	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2017	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2018	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2019	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2020	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2021	self addPrimaryTranslation: 'L';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2022	addSecondaryTranslation: 'L'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2023
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2024	"Modified: / 28-07-2017 / 11:32:03 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2025	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2026
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2027	processM
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2028
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2029	"case 'M':
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2030	if((StringAt((current - 1), 3, UMB, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2031	AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2032	//'dumb','thumb'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2033	OR (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2034	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2035	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2036	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2037	MetaphAdd(M);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2038	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2039	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2040	(((currentIndex > 1 and: [(inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: inputKey size)) = 'UMB'])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2041	and: [currentIndex + 1 = inputKey size or: [(inputKey copyFrom: (currentIndex + 2 min: inputKey size) to: (currentIndex + 4 min: inputKey size)) = 'ER']])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2042	or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2043	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2044	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2045	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2046	self addPrimaryTranslation: 'M';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2047	addSecondaryTranslation: 'M'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2048
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2049	"Modified: / 28-07-2017 / 11:32:08 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2050	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2051
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2052	processN
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2053	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2054	case 'N':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2055	if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2056	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2057	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2058	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2059	MetaphAdd(N);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2060	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2061
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2062	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2063
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2064	(self keyAt: currentIndex + 1) = $N
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2065	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2066	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2067	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2068	self addPrimaryTranslation: 'N';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2069	addSecondaryTranslation: 'N'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2070
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2071	"Modified: / 28-07-2017 / 11:32:14 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2072	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2073
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2074	processNtilde
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2075	"case 'Ñ':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2076	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2077	MetaphAdd(N);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2078	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2079	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2080	self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2081	addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2082	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2083
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2084	processP
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2085	"case 'P':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2086	if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2087	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2088	MetaphAdd(F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2089	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2090	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2091	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2092
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2093	//also account for campbell, raspberry
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2094	if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2095	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2096	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2097	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2098	MetaphAdd(P);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2099	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2100	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2101	\| nextLetter \|
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2102	(nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2103	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2104	self addPrimaryTranslation: 'F';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2105	addSecondaryTranslation: 'F'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2106	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2107	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2108	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2109	(#($P $B) includes: nextLetter)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2110	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2111	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2112	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2113	self addPrimaryTranslation: 'P';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2114	addSecondaryTranslation: 'P'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2115	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2116
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2117	"Modified: / 28-07-2017 / 11:32:28 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2118	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2119
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2120	processQ
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2121	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2122	case 'Q':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2123	if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2124	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2125	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2126	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2127	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2128	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2129
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2130	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2131
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2132	(self keyAt: currentIndex + 1) = $Q
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2133	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2134	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2135	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2136	self addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2137	addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2138
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2139	"Modified: / 28-07-2017 / 11:32:32 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2140	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2141
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2142	processR
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2143	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2144	case 'R':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2145	//french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2146	if((current == last)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2147	AND !!SlavoGermanic()
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2148	AND StringAt((current - 2), 2, IE, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2149	AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2150	MetaphAdd(, R);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2151	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2152	MetaphAdd(R);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2153
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2154	if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2155	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2156	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2157	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2158	break;
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2159	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2160	(currentIndex = inputKey size and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2161	(self isSlavoGermanic: inputKey) not and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2162	(inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2163	(#('ME' 'MA') includes: (inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2164	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2165	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2166	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2167	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2168	self addPrimaryTranslation: '';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2169	addSecondaryTranslation: 'R'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2170	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2171	self addPrimaryTranslation: 'R';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2172	addSecondaryTranslation: 'R'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2173	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2174	(self keyAt: currentIndex + 1) = $R
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2175	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2176	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2177	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2178
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2179	"Modified: / 28-07-2017 / 11:32:37 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2180	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2181
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2182	processRemainingCharacters
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2183	startIndex to: inputKey size do:[ :i \|
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2184	\| c methodSelector \|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2185
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2186	skipCount = 0 ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2187	((primaryTranslation size > 4) and: [ secondaryTranslation size > 4 ])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2188	ifTrue: [ ^self ].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2189
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2190	currentIndex := i.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2191	c := self keyAt: i.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2192
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2193	(c isVowel not and: [c ~= $Y]) ifTrue:[
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2194	c == $Ç ifTrue: [
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2195	methodSelector := #processCedille
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2196	] ifFalse: [ c == $Ñ ifTrue: [
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2197	methodSelector := #processNtilde
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2198	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2199	methodSelector := ('process', c asString) asSymbol
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2200	]].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2201	self perform: methodSelector
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2202	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2203	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2204	skipCount := skipCount - 1
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2205	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2206	]
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2207
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2208	"Modified: / 28-07-2017 / 11:24:15 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2209	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2210
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2211	processS
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2212	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2213	case 'S':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2214	//special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2215	if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2216	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2217	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2218	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2219	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2220
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2221	//special case 'sugar-'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2222	if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2223	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2224	MetaphAdd(X, S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2225	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2226	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2227	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2228
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2229	if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2230	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2231	//germanic
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2232	if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2233	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2234	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2235	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2236	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2237	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2238	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2239
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2240	//italian & armenian
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2241	if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2242	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2243	if(!!SlavoGermanic())
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2244	MetaphAdd(S, X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2245	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2246	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2247	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2248	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2249	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2250
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2251	//german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2252	//also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2253	if(((current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2254	AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2255	OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2256	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2257	MetaphAdd(S, X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2258	if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2259	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2260	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2261	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2262	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2263	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2264
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2265	if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2266	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2267	//Schlesinger's rule
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2268	if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2269	//dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2270	if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2271	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2272	//'schermerhorn', 'schenker'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2273	if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2274	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2275	MetaphAdd(X, SK);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2276	}else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2277	MetaphAdd(SK);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2278	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2279	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2280	}else{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2281	if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2282	MetaphAdd(X, S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2283	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2284	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2285	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2286	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2287	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2288
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2289	if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2290	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2291	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2292	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2293	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2294	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2295	//else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2296	MetaphAdd(SK);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2297	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2298	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2299	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2300
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2301	//french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2302	if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2303	MetaphAdd(, S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2304	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2305	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2306
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2307	if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2308	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2309	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2310	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2311	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2312	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2313
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2314	\| nextChar char2 chars char \|
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2315	(#('ISL' 'YSL') includes: (inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2316	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2317	^self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2318	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2319	(currentIndex = 1 and: [(inputKey copyFrom: 1 to: (5 min: inputKey size)) = 'SUGAR'])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2320	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2321	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2322	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2323	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2324	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2325	(inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SH'
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2326	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2327	(#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (inputKey copyFrom: (currentIndex + 1 min: inputKey size) to: ((currentIndex + 5) min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2328	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2329	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2330	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2331	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2332	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2333	addSecondaryTranslation: 'X'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2334	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2335	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2336	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2337	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2338	((#('SIO' 'SIA') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 2 min: inputKey size)))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2339	or: [(inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size)) = 'SIAN'])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2340	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2341	(self isSlavoGermanic: inputKey) not
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2342	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2343	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2344	addSecondaryTranslation: 'X'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2345	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2346	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2347	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2348	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2349	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2350	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2351	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2352	((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2353	or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2354	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2355	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2356	addSecondaryTranslation: 'X'.
3488 5a69e672d7f8 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3185 diff changeset	2357	nextChar == $Z
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2358	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2359	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2360	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2361	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2362	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2363	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2364	((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SC')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2365	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2366	(char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2367	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2368	(#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := inputKey copyFrom: ((currentIndex + 3) min: inputKey size) to: ((currentIndex + 4) min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2369	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2370	(#('ER' 'EN') includes: chars)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2371	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2372	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2373	addSecondaryTranslation: 'SK'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2374	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2375	self addPrimaryTranslation: 'SK';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2376	addSecondaryTranslation: 'SK'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2377	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2378	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2379	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2380	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2381	((currentIndex = 1 and: [(char := inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2382	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2383	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2384	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2385	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2386	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2387	addSecondaryTranslation: 'X'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2388	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2389	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2390	^self .
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2391	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2392	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2393	(#($I $E $Y) includes: char2)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2394	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2395	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2396	addSecondaryTranslation: 'S'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2397	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2398	^self .
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2399	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2400	self addPrimaryTranslation: 'SK';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2401	addSecondaryTranslation: 'SK'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2402	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2403	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2404	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2405	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2406	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2407	(currentIndex = inputKey size and: [(#('AI' 'OI') includes: (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2408	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2409	self addPrimaryTranslation: '';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2410	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2411	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2412	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2413	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2414	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2415	(#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2416	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2417	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2418	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2419	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2420
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2421	"Modified: / 28-07-2017 / 11:34:18 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2422	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2423
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2424	processT
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2425	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2426	case 'T':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2427	if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2428	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2429	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2430	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2431	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2432	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2433
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2434	if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2435	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2436	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2437	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2438	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2439	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2440
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2441	if(StringAt(current, 2, TH, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2442	OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2443	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2444	//special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2445	if(StringAt((current + 2), 2, OM, AM, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2446	OR StringAt(0, 4, VAN , VON , )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2447	OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2448	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2449	MetaphAdd(T);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2450	}else{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2451	MetaphAdd(0, T);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2452	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2453	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2454	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2455	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2456
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2457	if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2458	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2459	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2460	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2461	MetaphAdd(T);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2462	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2463	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2464	((inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: inputKey size)) = 'TION')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2465	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2466	self addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2467	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2468	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2469	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2470	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2471	(#('TIA' 'TCH') includes: (inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2472	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2473	self addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2474	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2475	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2476	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2477	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2478	(((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'TH') or: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2479	((inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)) = 'TTH')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2480	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2481	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2482	((#('OM' 'AM') includes: (inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: inputKey size)))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2483	or: [(#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: (4 min: inputKey size)))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2484	or: [(inputKey copyFrom: 1 to: (3 min: inputKey size)) = 'SCH']
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2485	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2486	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2487	self addPrimaryTranslation: 'T';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2488	addSecondaryTranslation: 'T'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2489	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2490	self addPrimaryTranslation: '0';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2491	addSecondaryTranslation: 'T'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2492	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2493	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2494	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2495	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2496	(#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2497	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2498	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2499	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2500	self addPrimaryTranslation: 'T';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2501	addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2502
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2503	"Modified: / 28-07-2017 / 11:33:33 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2504	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2505
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2506	processV
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2507	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2508	case 'V':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2509	if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2510	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2511	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2512	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2513	MetaphAdd(F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2514	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2515
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2516
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2517	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2518
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2519	(self keyAt: currentIndex + 1) = $V
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2520	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2521	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2522	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2523	self addPrimaryTranslation: 'F';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2524	addSecondaryTranslation: 'F'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2525
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2526	"Modified: / 28-07-2017 / 11:34:27 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2527	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2528
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2529	processW
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2530	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2531	case 'W':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2532	//can also be in middle of word
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2533	if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2534	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2535	MetaphAdd(R);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2536	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2537	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2538	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2539
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2540	if((current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2541	AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2542	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2543	//Wasserman should match Vasserman
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2544	if(IsVowel(current + 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2545	MetaphAdd(A, F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2546	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2547	//need Uomo to match Womo
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2548	MetaphAdd(A);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2549	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2550
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2551	//Arnow should match Arnoff
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2552	if(((current == last) AND IsVowel(current - 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2553	OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2554	OR StringAt(0, 3, SCH, ))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2555	{
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2556	MetaphAdd(, F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2557	current +=1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2558	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2559	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2560
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2561	//polish e.g. 'filipowicz'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2562	if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2563	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2564	MetaphAdd(TS, FX);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2565	current +=4;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2566	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2567	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2568
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2569	//else skip it
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2570	current +=1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2571	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2572	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2573	\| word nextLetter \|
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2574	((word := inputKey copyFrom: currentIndex to: (currentIndex + 1 min: inputKey size)) = 'WR')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2575	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2576	self addPrimaryTranslation: 'R';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2577	addSecondaryTranslation: 'R'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2578	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2579	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2580	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2581	((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2582	word = 'WH'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2583	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2584	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2585	nextLetter isVowel
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2586	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2587	self addPrimaryTranslation: 'A';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2588	addSecondaryTranslation: 'F'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2589	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2590	self addPrimaryTranslation: 'A';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2591	addSecondaryTranslation: 'A'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2592	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2593	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2594	((((currentIndex = inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2595	or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: inputKey size))])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2596	or: [inputKey startsWith:'SCH'])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2597	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2598	self addPrimaryTranslation: '';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2599	addSecondaryTranslation: 'F'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2600	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2601	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2602	(#('WICZ' 'WITZ') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 4 min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2603	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2604	self addPrimaryTranslation: 'TS';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2605	addSecondaryTranslation: 'FX'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2606	skipCount := skipCount + 3.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2607	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2608	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2609
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2610	"Modified: / 28-07-2017 / 11:34:51 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2611	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2612
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2613	processX
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2614	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2615	case 'X':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2616	//french e.g. breaux
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2617	if(!!((current == last)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2618	AND (StringAt((current - 3), 3, IAU, EAU, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2619	OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2620	MetaphAdd(KS);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2621
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2622	if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2623	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2624	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2625	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2626	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2627	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2628
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2629
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2630	((currentIndex = inputKey size)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2631	and: [(#('IAU' 'EAU') includes: (inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2632	or: [(#('AU' 'OU') includes: (inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]])
2580 7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-) Claus Gittinger <cg@exept.de> parents: 2445 diff changeset	2633	ifFalse: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2634	self addPrimaryTranslation: 'KS';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2635	addSecondaryTranslation: 'KS'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2636	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2637	(#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2638	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2639	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2640	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2641	]
2580 7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-) Claus Gittinger <cg@exept.de> parents: 2445 diff changeset	2642
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2643	"Modified: / 28-07-2017 / 11:34:44 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2644	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2645
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2646	processZ
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2647	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2648	case 'Z':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2649	//chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2650	if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2651	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2652	MetaphAdd(J);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2653	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2654	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2655	}else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2656	if(StringAt((current + 1), 2, ZO, ZI, ZA, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2657	OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2658	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2659	MetaphAdd(S, TS);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2660	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2661	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2662	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2663
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2664	if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2665	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2666	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2667	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2668	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2669	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2670
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2671	(self keyAt: currentIndex + 1) = $H
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2672	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2673	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2674	addSecondaryTranslation: 'J'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2675	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2676	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2677	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2678	((#('ZO' 'ZI' 'ZA') includes: (inputKey copyFrom: ((currentIndex + 1) min: inputKey size) to: ((currentIndex + 2) min: inputKey size))) or: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2679	(self isSlavoGermanic: inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2680	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2681	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2682	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2683	addSecondaryTranslation: 'TS'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2684	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2685	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2686	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2687	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2688	(self keyAt: currentIndex + 1) = $Z
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2689	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2690	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2691	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2692	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2693	]
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2694
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2695	"Modified: / 28-07-2017 / 11:35:12 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2696	! !
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2697
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2698	!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2699
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2700	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2701	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2702	There are many extended and enhanced soundex variants around;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2703	here is one, called 'extended soundex'. It is destribed for example in
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2704	http://www.epidata.dk/documentation.php.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2705	An author or origin is unknown.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2706
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2707	The number of digits is increased to 5 or 8;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2708	The first character is not used literally; instead it is encoded like the rest.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2709	This might have a negative effect on names starting with a vovel, though.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2710
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2711	Overall, it can be doubted if this is really an enhancement after all.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2712	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2713	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2714
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2715	!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2716
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2717	phoneticStringsFor:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2718	"generates both an extended soundex of length 5 and one of length 8"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2719
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2720	\|first second u t prevCode\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2721
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2722	u := aString asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2723	first := second := ''.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2724	u do:[:c \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2725	t := self translate:c.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2726	(t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2727	first := first , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2728	second := second , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2729	second size == 8 ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2730	^ Array with:(first copyTo:5) with:second
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2731	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2732	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2733	prevCode := t
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2734	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2735	[ first size < 5 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2736	first := first , '0'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2737	second := second , '0'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2738	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2739	[ second size < 8 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2740	second := second , '0'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2741	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2742	^ Array with:first with:second
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2743
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2744	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2745	self basicNew phoneticStringsFor:'müller' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2746	self basicNew phoneticStringsFor:'miller' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2747	self basicNew phoneticStringsFor:'muller' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2748	self basicNew phoneticStringsFor:'muler' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2749	self basicNew phoneticStringsFor:'schmidt' #('38600' '38600000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2750	self basicNew phoneticStringsFor:'schneider' #('38690' '38690000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2751	self basicNew phoneticStringsFor:'fischer' #('23900' '23900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2752	self basicNew phoneticStringsFor:'weber' #('19000' '19000000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2753	self basicNew phoneticStringsFor:'meyer' #('89000' '89000000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2754	self basicNew phoneticStringsFor:'wagner' #('48900' '48900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2755	self basicNew phoneticStringsFor:'schulz' #('37500' '37500000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2756	self basicNew phoneticStringsFor:'becker' #('13900' '13900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2757	self basicNew phoneticStringsFor:'hoffmann' #('28800' '28800000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2758	self basicNew phoneticStringsFor:'schäfer' #('32900' '32900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2759	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2760	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2761
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2762	!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2763
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2764	translate:aCharacter
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2765	"use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2766
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2767	"vowels serve as separators"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2768	aCharacter == $A ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2769	aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2770	aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2771	aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2772	aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2773	aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2774
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2775	aCharacter == $B ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2776	aCharacter == $P ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2777
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2778	aCharacter == $F ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2779	aCharacter == $V ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2780
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2781	aCharacter == $C ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2782	aCharacter == $S ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2783	aCharacter == $K ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2784
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2785	aCharacter == $G ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2786	aCharacter == $J ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2787
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2788	aCharacter == $Q ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2789	aCharacter == $X ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2790	aCharacter == $Z ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2791
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2792	aCharacter == $D ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2793	aCharacter == $G ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2794	aCharacter == $T ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2795
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2796	aCharacter == $L ifTrue:[^ '7' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2797
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2798	aCharacter == $M ifTrue:[^ '8' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2799	aCharacter == $N ifTrue:[^ '8' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2800
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2801	aCharacter == $R ifTrue:[^ '9' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2802	^ nil
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2803	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2804
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2805	!PhoneticStringUtilities::SingleResultPhoneticStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2806
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2807	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2808	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2809	documentation to be added.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2810
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2811	[author:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2812	cg
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2813
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2814	[instance variables:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2815
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2816	[class variables:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2817
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2818	[see also:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2819
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2820	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2821	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2822
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2823	!PhoneticStringUtilities::SingleResultPhoneticStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2824
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2825	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2826	^ self subclassResponsibility
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2827
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2828	"Created: / 28-07-2017 / 15:20:49 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2829	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2830
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2831	phoneticStringsFor:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2832	^ Array with:(self encode:word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2833
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2834	"Created: / 28-07-2017 / 15:20:38 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2835	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2836
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2837	!PhoneticStringUtilities::MRAStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2838
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2839	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2840	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2841	Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2842
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2843	The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2844
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2845	[see also:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2846	https://en.wikipedia.org/wiki/Match_Rating_Approach
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2847
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2848	G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2849	''Accessing Individual Records from Personal Data Files Using Nonunique Identifiers''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2850	US National Institute of Standards and Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2851	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2852	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2853
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2854	rCode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2855	"<<END
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2856	## Copyright (c) 2015, James P. Howard, II <jh@jameshoward.us>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2857	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2858	## Redistribution and use in source and binary forms, with or without
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2859	## modification, are permitted provided that the following conditions are
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2860	## met:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2861	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2862	## Redistributions of source code must retain the above copyright
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2863	## notice, this list of conditions and the following disclaimer.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2864	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2865	## Redistributions in binary form must reproduce the above copyright
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2866	## notice, this list of conditions and the following disclaimer in
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2867	## the documentation and/or other materials provided with the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2868	## distribution.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2869	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2870	## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2871	## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2872	## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2873	## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2874	## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2875	## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2876	## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2877	## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2878	## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2879	## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2880	## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2881
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2882	#' @rdname mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2883	#' @title Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2884	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2885	#' @description
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2886	#' The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2887	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2888	#' @param word string or vector of strings to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2889	#' @param x MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2890	#' @param y MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2891	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2892	#' @details
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2893	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2894	#' The variable \code{word} is the name to be encoded. The variable
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2895	#' \code{maxCodeLen} is \emph{not} supported in this algorithm encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2896	#' because the algorithm itself is dependent upon its six-character
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2897	#' length. The variables \code{x} and \code{y} are MRA-encoded and are
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2898	#' compared to each other using the MRA comparison specification.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2899	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2900	#' @return The \code{mra_encode} function returns match rating approach
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2901	#' encoded character vector. The \code{mra_compare} returns a boolean
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2902	#' vector which is \code{TRUE} if \code{x} and \code{y} pass the MRA
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2903	#' comparison test.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2904	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2905	#' @references
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2906	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2907	#' G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2908	#' \emph{Accessing Individual Records from Personal Data Files Using
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2909	#' Nonunique Identifiers,} US National Institute of Standards and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2910	#' Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2911	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2912	#' @family phonics
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2913	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2914	#' @examples
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2915	#' mra_encode("William")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2916	#' mra_encode(c("Peter", "Peady"))
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2917	#' mra_encode("Stevenson")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2918
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2919	#' @rdname mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2920	#' @name mra_encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2921	#' @export
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2922	mra_encode <- function(word) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2923
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2924	## First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2925	word <- gsub("[^[:alpha:]]*", "", word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2926	word <- toupper(word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2927
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2928	## First character of key = first character of name
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2929	first <- substr(word, 1, 1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2930	word <- substr(word, 2, nchar(word))
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2931
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2932	## Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2933	word <- gsub("[AEIOU]", "", word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2934	word <- paste(first, word, sep = "")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2935
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2936	## Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2937	word <- gsub("([A-Z])\\1+", "\\1", word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2938
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2939	## If longer than 6 characters, take first and last 3...and we have
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2940	## to vectorize it
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2941	for(i in 1:length(word)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2942	if((l = nchar(word[i])) > 6) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2943	first <- substr(word[i], 1, 3)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2944	last <- substr(word[i], l - 2, l)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2945	word[i] <- paste(first, last, sep = "");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2946	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2947	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2948
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2949	return(word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2950	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2951
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2952	#' @rdname mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2953	#' @name mra_compare
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2954	#' @export
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2955	mra_compare <- function(x, y) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2956	mra <- data.frame(x = x, y = y, sim = 0, min = 100, stringsAsFactors = FALSE)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2957
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2958	## Obtain the minimum rating value by calculating the length sum of
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2959	## the encoded strings and using table A (from Wikipedia). We start
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2960	## by setting the minimum to be the sum and move from there.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2961	mra$lensum <- nchar(mra$x) + nchar(mra$y)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2962	mra$min[mra$lensum == 12] <- 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2963	mra$min[mra$lensum > 7 && mra$lensum <= 11] <- 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2964	mra$min[mra$lensum > 4 && mra$lensum <= 7] <- 4
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2965	mra$min[mra$lensum <= 4] <- 5
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2966
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2967	## If the length difference between the encoded strings is 3 or
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2968	## greater, then no similarity comparison is done. For us, we
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2969	## continue the similarity comparison out of laziness and ensure the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2970	## minimum is impossibly high to meet.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2971	mra$min[abs(nchar(mra$x) - nchar(mra$y)) >= 3] <- 100
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2972
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2973	## Start the comparison.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2974	x <- strsplit(mra$x, split = "")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2975	y <- strsplit(mra$y, split = "")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2976	rows <- nrow(mra)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2977	for(i in 1:rows) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2978	## Process the encoded strings from left to right and remove any
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2979	## identical characters found from both strings respectively.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2980	j <- 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2981	while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2982	if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2983	x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2984	y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2985	} else
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2986	j <- j + 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2987	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2988
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2989	## Process the unmatched characters from right to left and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2990	## remove any identical characters found from both names
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2991	## respectively.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2992	x[[i]] <- rev(x[[i]])
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2993	y[[i]] <- rev(y[[i]])
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2994	j <- 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2995	while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2996	if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2997	x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2998	y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2999	} else
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3000	j <- j + 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3001	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3002	## Subtract the number of unmatched characters from 6 in the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3003	## longer string. This is the similarity rating.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3004	len <- min(length(x[[i]]), length(y[[i]]))
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3005	mra$sim[i] <- 6 - len
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3006	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3007
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3008	## If the similarity is greater than or equal to the minimum
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3009	## required, it is a successful match.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3010	mra$match <- (mra$sim >= mra$min)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3011	return(mra$match)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3012	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3013
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3014	END>>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3015	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3016
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3017	!PhoneticStringUtilities::MRAStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3018
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3019	encode:wordIn
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3020	"see https://en.wikipedia.org/wiki/Match_Rating_Approach"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3021
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3022	\|word prev\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3023
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3024	word := wordIn.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3025
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3026	"/ First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3027
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3028	word := word select:#isLetter thenCollect:#asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3029
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3030	"/ Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3031
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3032	word := word first asString , ((word from:2) reject:#isVowel).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3033
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3034	"/ Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3035
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3036	prev := nil.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3037	word := word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3038	collect:[:char \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3039	char == prev ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3040	$*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3041	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3042	prev := char.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3043	char.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3044	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3045	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3046	thenSelect:[:char \| char ~~ $*].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3047
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3048	"/ If longer than 6 characters, take first and last 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3049	word size > 6 ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3050	word := (word copyFirst:3),(word copyLast:3)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3051	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3052	^ word.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3053
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3054	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3055	self new encode:'Catherine' -> 'CTHRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3056	self new encode:'CatherineCatherine' -> 'CTHHRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3057	self new encode:'Butter' -> 'BTR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3058	self new encode:'Byrne' -> 'BYRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3059	self new encode:'Boern' -> 'BRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3060	self new encode:'Smith' -> 'SMTH'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3061	self new encode:'Smyth' -> 'SMYTH'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3062	self new encode:'Kathryn' -> 'KTHRYN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3063	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3064
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3065	"Created: / 28-07-2017 / 15:19:22 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3066	"Modified (comment): / 31-07-2017 / 15:14:31 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3067	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3068
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3069	!PhoneticStringUtilities::MetaphoneStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3070
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3071	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3072	"
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3073	Ongoing work - do not use at the moment
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3074
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3075	Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3076
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3077	Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3078	Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3079
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3080	Hanging on the Metaphone by Lawrence Philips in Computer Language of Dec. 1990, p 39.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3081	Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3082	https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm6
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3083
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3084	They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3085	For more information, see https://issues.apache.org/jira/browse/CODEC-57
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3086
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3087	Metaphone uses the following rules:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3088
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3089	Doubled letters except 'c' -> drop 2nd letter.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3090	Vowels are only kept when they are the first letter.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3091	B -> B unless at the end of a word after 'm' as in 'dumb'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3092	C -> X (sh) if -cia- or -ch-
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3093	S if -ci-, -ce- or -cy-
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3094	K otherwise, including -sch-
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3095	D -> J if in -dge-, -dgy- or -dgi-; T otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3096	F -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3097	G -> silent if in -gh- and not at end or before a vowel in -gn- or -gned- (also see dge etc. above)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3098	J if before i or e or y if not double gg; K otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3099	H -> silent if after vowel and no vowel follows; H otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3100	J -> J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3101	K -> silent if after 'c'; K otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3102	L -> L
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3103	M -> M
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3104	N -> N
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3105	P -> F if before 'h'; P otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3106	Q -> K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3107	R -> R
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3108	S -> X (sh) if before 'h' or in -sio- or -sia-; S otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3109	T -> X (sh) if -tia- or -tio- 0 (th) if before 'h' silent if in -tch-; T otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3110	V -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3111	W -> silent if not followed by a vowel W if followed by a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3112	X -> KS
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3113	Y -> silent if not followed by a vowel Y if followed by a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3114	Z -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3115
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3116	Initial Letter Exceptions
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3117
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3118	Initial kn-, gn- pn, ae- or wr- -> drop first letter
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3119	Initial x- -> change to 's'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3120	Initial wh- -> change to 'w'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3121
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3122
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3123	self new encode:'a'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3124	self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3125	self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3126	self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3127	self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3128	self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3129	self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3130	self new encode:'weber'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3131	self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3132	self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3133	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3134	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3135
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3136	javaCode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3137	"<<END
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3138	/*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3139	* Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3140	* contributor license agreements. See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3141	* this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3142	* The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3143	* (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3144	* the License. You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3145	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3146	* http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3147	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3148	* Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3149	* distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3150	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3151	* See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3152	* limitations under the License.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3153	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3154
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3155	package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3156
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3157	import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3158	import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3159
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3160	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3161	* Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3162	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3163	* Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3164	* Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3165	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3166	* <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3167	* p 39.</CITE>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3168	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3169	* Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3170	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3171	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3172	* <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3173	* (broken link 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3174	* <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3175	* (link checked 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3176	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3177	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3178	* They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3179	* For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3180	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3181	* This class is conditionally thread-safe.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3182	* The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3183	* but is not volatile, and accesses are not synchronized.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3184	* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3185	* is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3186	* after initial setup.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3187	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3188	* @version $Id$
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3189	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3190	public class Metaphone implements StringEncoder {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3191
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3192	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3193	* Five values in the English language
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3194	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3195	private static final String VOWELS = "AEIOU";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3196
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3197	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3198	* Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3199	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3200	private static final String FRONTV = "EIY";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3201
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3202	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3203	* Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3204	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3205	private static final String VARSON = "CSPTG";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3206
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3207	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3208	* The max code length for metaphone is 4
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3209	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3210	private int maxCodeLen = 4;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3211
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3212	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3213	* Creates an instance of the Metaphone encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3214	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3215	public Metaphone() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3216	super();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3217	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3218
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3219	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3220	* Find the metaphone value of a String. This is similar to the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3221	* soundex algorithm, but better at finding similar sounding words.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3222	* All input is converted to upper case.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3223	* Limitations: Input format is expected to be a single ASCII word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3224	* with only characters in the A - Z range, no punctuation or numbers.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3225	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3226	* @param txt String to find the metaphone code for
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3227	* @return A metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3228	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3229	public String metaphone(final String txt) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3230	boolean hard = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3231	int txtLength;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3232	if (txt == null \|\| (txtLength = txt.length()) == 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3233	return "";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3234	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3235	// single character is itself
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3236	if (txtLength == 1) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3237	return txt.toUpperCase(java.util.Locale.ENGLISH);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3238	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3239
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3240	final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3241
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3242	final StringBuilder local = new StringBuilder(40); // manipulate
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3243	final StringBuilder code = new StringBuilder(10); // output
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3244	// handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3245	switch(inwd[0]) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3246	case 'K':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3247	case 'G':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3248	case 'P': /* looking for KN, etc*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3249	if (inwd[1] == 'N') {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3250	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3251	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3252	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3253	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3254	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3255	case 'A': /* looking for AE */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3256	if (inwd[1] == 'E') {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3257	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3258	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3259	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3260	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3261	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3262	case 'W': /* looking for WR or WH */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3263	if (inwd[1] == 'R') { // WR -> R
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3264	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3265	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3266	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3267	if (inwd[1] == 'H') {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3268	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3269	local.setCharAt(0, 'W'); // WH -> W
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3270	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3271	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3272	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3273	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3274	case 'X': /* initial X becomes S */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3275	inwd[0] = 'S';
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3276	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3277	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3278	default:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3279	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3280	} // now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3281
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3282	final int wdsz = local.length();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3283	int n = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3284
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3285	while (code.length() < this.getMaxCodeLen() &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3286	n < wdsz ) { // max code size of 4 works well
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3287	final char symb = local.charAt(n);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3288	// remove duplicate letters except C
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3289	if (symb !!= 'C' && isPreviousChar( local, n, symb ) ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3290	n++;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3291	} else { // not dup
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3292	switch(symb) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3293	case 'A':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3294	case 'E':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3295	case 'I':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3296	case 'O':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3297	case 'U':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3298	if (n == 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3299	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3300	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3301	break; // only use vowel if leading char
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3302	case 'B':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3303	if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3304	isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3305	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3306	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3307	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3308	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3309	case 'C': // lots of C special cases
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3310	/* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3311	if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3312	!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3313	FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3314	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3315	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3316	if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3317	code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3318	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3319	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3320	if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3321	FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3322	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3323	break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3324	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3325	if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3326	isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3327	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3328	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3329	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3330	if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3331	if (n == 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3332	wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3333	isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3334	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3335	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3336	code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3337	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3338	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3339	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3340	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3341	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3342	case 'D':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3343	if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3344	isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3345	FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3346	code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3347	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3348	code.append('T');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3349	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3350	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3351	case 'G': // GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3352	if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3353	isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3354	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3355	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3356	if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3357	isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3358	!!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3359	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3360	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3361	if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3362	( regionMatch(local, n, "GN") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3363	regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3364	break; // silent G
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3365	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3366	if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3367	// NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3368	hard = true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3369	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3370	hard = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3371	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3372	if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3373	FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3374	!!hard) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3375	code.append('J');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3376	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3377	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3378	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3379	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3380	case 'H':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3381	if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3382	break; // terminal H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3383	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3384	if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3385	VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3386	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3387	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3388	if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3389	code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3390	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3391	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3392	case 'F':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3393	case 'J':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3394	case 'L':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3395	case 'M':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3396	case 'N':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3397	case 'R':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3398	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3399	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3400	case 'K':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3401	if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3402	if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3403	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3404	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3405	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3406	code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3407	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3408	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3409	case 'P':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3410	if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3411	// PH -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3412	code.append('F');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3413	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3414	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3415	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3416	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3417	case 'Q':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3418	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3419	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3420	case 'S':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3421	if (regionMatch(local,n,"SH") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3422	regionMatch(local,n,"SIO") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3423	regionMatch(local,n,"SIA")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3424	code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3425	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3426	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3427	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3428	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3429	case 'T':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3430	if (regionMatch(local,n,"TIA") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3431	regionMatch(local,n,"TIO")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3432	code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3433	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3434	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3435	if (regionMatch(local,n,"TCH")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3436	// Silent if in "TCH"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3437	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3438	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3439	// substitute numeral 0 for TH (resembles theta after all)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3440	if (regionMatch(local,n,"TH")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3441	code.append('0');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3442	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3443	code.append('T');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3444	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3445	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3446	case 'V':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3447	code.append('F'); break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3448	case 'W':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3449	case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3450	if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3451	isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3452	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3453	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3454	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3455	case 'X':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3456	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3457	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3458	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3459	case 'Z':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3460	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3461	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3462	default:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3463	// do nothing
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3464	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3465	} // end switch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3466	n++;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3467	} // end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3468	if (code.length() > this.getMaxCodeLen()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3469	code.setLength(this.getMaxCodeLen());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3470	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3471	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3472	return code.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3473	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3474
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3475	private boolean isVowel(final StringBuilder string, final int index) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3476	return VOWELS.indexOf(string.charAt(index)) >= 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3477	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3478
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3479	private boolean isPreviousChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3480	boolean matches = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3481	if( index > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3482	index < string.length() ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3483	matches = string.charAt(index - 1) == c;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3484	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3485	return matches;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3486	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3487
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3488	private boolean isNextChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3489	boolean matches = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3490	if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3491	index < string.length() - 1 ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3492	matches = string.charAt(index + 1) == c;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3493	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3494	return matches;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3495	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3496
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3497	private boolean regionMatch(final StringBuilder string, final int index, final String test) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3498	boolean matches = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3499	if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3500	index + test.length() - 1 < string.length() ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3501	final String substring = string.substring( index, index + test.length());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3502	matches = substring.equals( test );
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3503	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3504	return matches;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3505	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3506
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3507	private boolean isLastChar(final int wdsz, final int n) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3508	return n + 1 == wdsz;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3509	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3510
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3511
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3512	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3513	* Encodes an Object using the metaphone algorithm. This method
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3514	* is provided in order to satisfy the requirements of the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3515	* Encoder interface, and will throw an EncoderException if the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3516	* supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3517	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3518	* @param obj Object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3519	* @return An object (or type java.lang.String) containing the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3520	* metaphone code which corresponds to the String supplied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3521	* @throws EncoderException if the parameter supplied is not
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3522	* of type java.lang.String
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3523	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3524	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3525	public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3526	if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3527	throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3528	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3529	return metaphone((String) obj);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3530	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3531
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3532	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3533	* Encodes a String using the Metaphone algorithm.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3534	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3535	* @param str String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3536	* @return The metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3537	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3538	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3539	public String encode(final String str) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3540	return metaphone(str);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3541	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3542
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3543	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3544	* Tests is the metaphones of two strings are identical.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3545	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3546	* @param str1 First of two strings to compare
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3547	* @param str2 Second of two strings to compare
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3548	* @return <code>true</code> if the metaphones of these strings are identical,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3549	* <code>false</code> otherwise.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3550	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3551	public boolean isMetaphoneEqual(final String str1, final String str2) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3552	return metaphone(str1).equals(metaphone(str2));
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3553	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3554
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3555	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3556	* Returns the maxCodeLen.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3557	* @return int
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3558	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3559	public int getMaxCodeLen() { return this.maxCodeLen; }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3560
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3561	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3562	* Sets the maxCodeLen.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3563	* @param maxCodeLen The maxCodeLen to set
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3564	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3565	public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3566
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3567	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3568	END>>"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3569	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3570
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3571	!PhoneticStringUtilities::MetaphoneStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3572
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3573	encode:txt
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3574	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3575	self new encode:'a'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3576	self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3577	self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3578	self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3579	self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3580	self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3581	self new encode:'weber'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3582	self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3583	self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3584	self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3585	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3586
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3587	\|hard txtLength local code inwd ch ch2 wdsz n\|
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3588
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3589	inwd := txt.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3590	hard := false.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3591	txtLength := 0.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3592
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3593	(txtLength := txt size) == 0 ifTrue:[^ ''].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3594
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3595	inwd := txt asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3596	"/ single character is itself
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3597	(txtLength == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3598	^ inwd
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3599	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3600
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3601	code := '' writeStream.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3602	local := inwd.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3603
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3604	"/ handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3605	ch := inwd at:(0+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3606	ch2 := inwd at:(1+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3607	('KGP' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3608	"/ looking for KN, etc
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3609	"/ KNx -> Nx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3610	"/ GNx -> Nx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3611	"/ PNx -> Nx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3612	(ch2 == $N) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3613	local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3614	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3615	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3616	('A' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3617	"/ looking for AE
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3618	"/ AEx -> Ex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3619	(ch2 == $E) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3620	local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3621	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3622	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3623	('W' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3624	"/ looking for WR or WH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3625	(ch2 == $R) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3626	"/ WRx -> Wx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3627	local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3628	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3629	(ch2 == $H) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3630	"/ // WH -> W
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3631	local := 'W',(inwd from:2+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3632	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3633	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3634	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3635	('X' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3636	"/ initial X becomes S */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3637	"/ Xx -> Sx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3638	local := 'S',(inwd from:1+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3639	]]]].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3640
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3641	"/ now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3642
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3643	wdsz := local size.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3644	n := 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3645
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3646	[ n <= wdsz ] whileTrue:[
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3647	"/ max code size of 4 works well
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3648
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3649	\|symb prevChar nextChar nextNextChar isLastChar isPrevToLastChar\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3650
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3651	symb := local at:n.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3652	(n > 1) ifTrue:[ prevChar := local at:(n-1) ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3653	(isLastChar := (n == wdsz)) ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3654	nextChar := local at:(n+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3655	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3656	isPrevToLastChar := (n == (wdsz-1)).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3657	(n+2) <= wdsz ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3658	nextNextChar := local at:(n+2)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3659	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3660
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3661	"/ remove duplicate letters except C and except first
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3662	(symb == $C or:[ nextChar ~~ symb or:[ n == 1] ]) ifTrue:[
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3663	"/ not dup
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3664	('AEIOU' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3665	"/ only use vowel if leading char
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3666	(n == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3667	code nextPut:symb
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3668	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3669	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3670	('B' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3671	"/ if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3672	"/ isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3673	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3674	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3675	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3676	"/ break;
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3677	(isLastChar and:[ prevChar == $M]) ifTrue:[
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3678	"/ B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3679	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3680	code nextPut:symb.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3681	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3682	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3683	('C' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3684	"/ lots of C special cases
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3685	"/ /* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3686	"/ if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3687	"/ !!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3688	"/ FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3689	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3690	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3691	"/ if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3692	"/ code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3693	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3694	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3695	"/ if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3696	"/ FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3697	"/ code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3698	"/ break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3699	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3700	"/ if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3701	"/ isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3702	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3703	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3704	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3705	"/ if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3706	"/ if (n == 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3707	"/ wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3708	"/ isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3709	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3710	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3711	"/ code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3712	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3713	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3714	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3715	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3716	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3717	(prevChar == $S and:[ 'EIY' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3718	"/ discard if SCI, SCE or SCY
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3719	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3720	((nextChar == $I) and:[ nextNextChar == $A ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3721	"/ "CIA" -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3722	code nextPut:$X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3723	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3724	('IEY' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3725	"/ CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3726	code nextPut:$S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3727	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3728	((prevChar == $S) and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3729	"/ SCH->sk
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3730	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3731	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3732	nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3733	"/ CH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3734	('AEIOU' includes:nextNextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3735	code nextPut:$K "/ CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3736	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3737	code nextPut:$X "/ CHvowel -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3738	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3739	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3740	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3741	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3742	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3743	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3744	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3745	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3746
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3747	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3748	('D' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3749	"/ if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3750	"/ isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3751	"/ FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3752	"/ code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3753	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3754	"/ code.append('T');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3755	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3756	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3757	((nextChar == $G)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3758	and:[ (local from:n) startsWithAnyOf:#('DGE' 'DGI' 'DGY') ])
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3759	ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3760	code nextPut:$J.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3761	n := n + 2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3762	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3763	code nextPut:$T.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3764	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3765	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3766	('G' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3767	"/ GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3768	"/ if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3769	"/ isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3770	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3771	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3772	"/ if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3773	"/ isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3774	"/ !!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3775	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3776	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3777	"/ if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3778	"/ ( regionMatch(local, n, "GN") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3779	"/ regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3780	"/ break; // silent G
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3781	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3782	"/ if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3783	"/ // NOTE: Given that duplicated chars are removed, I dont see how this can ever be true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3784	"/ hard = true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3785	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3786	"/ hard = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3787	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3788	"/ if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3789	"/ FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3790	"/ !!hard) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3791	"/ code.append('J');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3792	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3793	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3794	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3795	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3796	(isPrevToLastChar and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3797	"/ GH silent at end
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3798	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3799	(isPrevToLastChar not and:[ nextChar == $H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3800	and:[ ('AEIOU' includes:nextNextChar) not ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3801	"/ GH silent before consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3802	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3803	(n > 1 and:[ nextChar == $N ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3804	"/ GN -> silent G
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3805	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3806	hard := (prevChar == $G).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3807	(isLastChar not and:[ hard not and:[ ('EIY' includes:nextChar) ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3808	code nextPut:$J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3809	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3810	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3811	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3812	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3813	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3814	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3815	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3816	('H' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3817	"/ case 'H':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3818	"/ if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3819	"/ break; // terminal H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3820	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3821	"/ if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3822	"/ VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3823	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3824	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3825	"/ if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3826	"/ code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3827	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3828	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3829	isLastChar ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3830	"/ ignore terminal H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3831	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3832	('CSPTG' includes:prevChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3833	"/ ignore CH, SH, PH, TH, GH (H treated there)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3834	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3835	('AEIOU' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3836	"/ Hvowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3837	code nextPut:$H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3838	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3839	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3840	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3841	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3842	('FJLMNR' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3843	"/ case 'F':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3844	"/ case 'J':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3845	"/ case 'L':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3846	"/ case 'M':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3847	"/ case 'N':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3848	"/ case 'R':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3849	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3850	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3851	code nextPut:symb.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3852	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3853	('K' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3854	"/ case 'K':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3855	"/ if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3856	"/ if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3857	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3858	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3859	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3860	"/ code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3861	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3862	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3863	n > 1 ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3864	"/ not initial
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3865	prevChar ~~ $C ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3866	code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3867	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3868	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3869	code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3870	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3871	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3872	('P' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3873	"/ case 'P':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3874	"/ if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3875	"/ // PH -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3876	"/ code.append('F');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3877	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3878	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3879	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3880	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3881	nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3882	"/ PH -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3883	code nextPut:$F.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3884	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3885	code nextPut:symb.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3886	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3887	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3888	('Q' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3889	"/ case 'Q':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3890	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3891	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3892	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3893
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3894	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3895	('S' includes:symb) ifTrue:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3896	"/ case 'S':
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3897	"/ if (regionMatch(local,n,"SH") \|\|
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3898	"/ regionMatch(local,n,"SIO") \|\|
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3899	"/ regionMatch(local,n,"SIA")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3900	"/ code.append('X');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3901	"/ } else {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3902	"/ code.append('S');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3903	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3904	"/ break;
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3905	"/ SH -> X (as in shave or ashton)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3906	"/ SIO -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3907	"/ SIA -> X (as in ASIA)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3908	((nextChar == $H)
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3909	or:[ ((nextChar == $I) and:[ 'OA' includes:nextNextChar])]
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3910	) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3911	code nextPut:$X
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3912	] ifFalse:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3913	code nextPut:$S
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3914	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3915	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3916	('T' includes:symb) ifTrue:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3917	"/ case 'T':
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3918	"/ if (regionMatch(local,n,"TIA") \|\|
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3919	"/ regionMatch(local,n,"TIO")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3920	"/ code.append('X');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3921	"/ break;
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3922	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3923	"/ if (regionMatch(local,n,"TCH")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3924	"/ // Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3925	"/ break;
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3926	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3927	"/ // substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3928	"/ if (regionMatch(local,n,"TH")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3929	"/ code.append('0');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3930	"/ } else {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3931	"/ code.append('T');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3932	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3933	"/ break;
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3934	(nextChar == $I and:[ 'AO' includes:nextNextChar]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3935	code nextPut:$X.
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3936	] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3937	(nextChar == $C and:[ nextNextChar == $H]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3938	"/ Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3939	"/ cg - huh; hutch - methinksthereisat
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3940	] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3941	"/ substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3942	nextChar == $H ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3943	code nextPut:$0.
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3944	] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3945	code nextPut:$T.
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3946	].
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3947	].
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3948	].
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3949	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3950	('V' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3951	"/ case 'V':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3952	"/ code.append('F'); break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3953	code nextPut:$F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3954
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3955	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3956	('WY' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3957	"/ case 'W':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3958	"/ case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3959	"/ if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3960	"/ isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3961	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3962	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3963	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3964
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3965	"/ silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3966	(isLastChar not and:[ 'AEIOU' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3967	code nextPut:symb
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3968	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3969	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3970	('X' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3971	"/ case 'X':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3972	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3973	"/ code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3974	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3975	code nextPutAll:'KS'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3976	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3977	('Z' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3978	"/ case 'Z':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3979	"/ code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3980	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3981	code nextPut:$S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3982	] ifFalse:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3983	"/ default:
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3984	"/ // do nothing
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3985	"/ break;
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3986	]]]]]]]]]]]]]]]]. "/ end switch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3987	]. "/ end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3988	n := n + 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3989	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3990	^ code contents
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3991
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3992	"Created: / 02-08-2017 / 09:51:31 / cg"
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3993	"Modified: / 03-08-2017 / 14:55:22 / cg"
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3994	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3995
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3996	!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3997
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3998	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3999	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4000	WARNING: this is the so called 'simplified soundex' algorithm;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4001	there are more variants like miracode (american soundex) or
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4002	mysqlSoundex around.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4003
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4004	Be sure to use the correct algorithm, if the generated strings must be compatible
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4005	(otherwise, the differences are probably too small to be noticed as effect, but
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4006	your search will be different)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4007
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4008	The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4009
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4010	SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4011	components of names, but by doing so reports more matches.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4012
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4013	There are some variations around in the literature;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4014	the following is called 'simplified soundex', and the rules for coding a name are:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4015
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4016	1. The first letter of the name is used in its un-coded form to serve as the prefix
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4017	character of the code. (The rest of the code is numerical).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4018
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4019	2. Thereafter, W and H are ignored entirely.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4020
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4021	3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4022
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4023	4. Other letters of the name are converted to a numerical equivalent:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4024	B, P, F, V 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4025	C, G, J, K, Q, S, X, Z 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4026	D, T 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4027	L 4
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4028	M, N 5
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4029	R 6
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4030
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4031	5. There are two exceptions:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4032	1. Letters that follow prefix letters which would, if coded, have the same
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4033	numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4034
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4035	2. The second letter of any pair of consonants having the same code number is likewise ignored,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4036	i.e. unless there is a ''separator'' between them in the name.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4037
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4038	6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4039	Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4040
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4041	Notice, that in another variant, w and h are treated slightly differently.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4042	This is only of relevance, if you need to reconstruct original soundex codes of other programs
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4043	or for the original 1880 us census data.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4044	SoundexStringComparator new encode:'Ashcraft' -> 'A226'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4045	vs.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4046	MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4047
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4048	Also notice, that soundex deals better with english.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4049	For german and other languages, other algorithms may provide better results.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4050	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4051	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4052
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4053	!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4054
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4055	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4056	\|u p t prevCode\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4057
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4058	u := word asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4059	p := u first asString.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4060	prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4061	u from:2 to:u size do:[:c \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4062	t := self translate:c.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4063	(t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4064	p := p , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4065	p size == 4 ifTrue:[^ p ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4066	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4067	prevCode := t
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4068	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4069	[ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4070	p := p , '0'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4071	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4072	^ (p copyFrom:1 to:4)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4073
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4074	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4075	self new encode:'washington' -> 'W252'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4076	self new encode:'lee' -> 'L000'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4077	self new encode:'Gutierrez' -> 'G362'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4078	self new encode:'Pfister' -> 'P236'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4079	self new encode:'Jackson' -> 'J250'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4080	self new encode:'Tymczak' -> 'T522'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4081	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4082
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4083	"notice:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4084	MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4085	self new encode:'Ashcraft' -> 'A226'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4086	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4087
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4088	"Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4089	"Modified (comment): / 01-08-2017 / 19:01:43 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4090	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4091
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4092	!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4093
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4094	translate:aCharacter
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4095	"use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4096
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4097	"vowels serve as separators"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4098	aCharacter == $A ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4099	aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4100	aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4101	aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4102	aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4103	aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4104
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4105	aCharacter == $B ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4106	aCharacter == $P ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4107	aCharacter == $F ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4108	aCharacter == $V ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4109
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4110	aCharacter == $C ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4111	aCharacter == $S ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4112	aCharacter == $K ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4113	aCharacter == $G ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4114	aCharacter == $J ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4115	aCharacter == $Q ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4116	aCharacter == $X ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4117	aCharacter == $Z ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4118
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4119	aCharacter == $D ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4120	aCharacter == $T ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4121
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4122	aCharacter == $L ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4123
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4124	aCharacter == $M ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4125	aCharacter == $N ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4126
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4127	aCharacter == $R ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4128	^ nil
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4129
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4130	"Modified: / 02-08-2017 / 01:35:40 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4131	"Modified (comment): / 02-08-2017 / 14:30:11 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4132	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4133
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4134	!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4135
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4136	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4137	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4138	MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4139	and also removing vokals first, then removing duplicate codes
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4140	(whereas the soundex code does this in reverse order).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4141
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4142	These variations are important, if you need the miracode soundex codes to be generated.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4143	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4144	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4145
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4146	!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4147
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4148	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4149	"same as inherited, but cares for 0, W and H"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4150
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4151	\|u p t prevCode\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4152
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4153	u := word asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4154	p := u first asString.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4155	prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4156	u from:2 to:u size do:[:c \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4157	t := self translate:c.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4158	(t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4159	p := p , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4160	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4161	(t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4162	prevCode := t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4163	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4164	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4165	[ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4166	p := p , '0'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4167	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4168	^ p
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4169
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4170	"Created: / 28-07-2017 / 15:23:41 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4171	"Modified: / 31-07-2017 / 17:53:51 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4172	"Modified (comment): / 02-08-2017 / 14:31:15 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4173	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4174
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4175	!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4176
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4177	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4178	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4179	NYSIIS Algorithm:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4180
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4181	1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4182	remove all ''S'' and ''Z'' chars from the end of the surname
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4183
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4184	2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4185	transcode initial strings
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4186	MAC => MC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4187	PF => F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4188
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4189	3.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4190	Transcode trailing strings as follows,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4191
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4192	IX => IC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4193	EX => EC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4194	YE,EE,IE => Y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4195	NT,ND => D
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4196
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4197	4.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4198	transcode ''EV'' to ''EF'' if not at start of name
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4199
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4200	5.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4201	use first character of name as first character of key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4202
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4203	6.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4204	remove any ''W'' that follows a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4205
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4206	7.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4207	replace all vowels with ''A''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4208
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4209	8.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4210	transcode ''GHT'' to ''GT''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4211
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4212	9.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4213	transcode ''DG'' to ''G''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4214
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4215	10.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4216	transcode ''PH'' to ''F''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4217
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4218	11.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4219	if not first character, eliminate all ''H'' preceded or followed by a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4220
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4221	12.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4222	change ''KN'' to ''N'', else ''K'' to ''C''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4223
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4224	13.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4225	if not first character, change ''M'' to ''N''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4226
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4227	14.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4228	if not first character, change ''Q'' to ''G''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4229
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4230	15.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4231	transcode ''SH'' to ''S''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4232
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4233	16.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4234	transcode ''SCH'' to ''S''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4235
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4236	17.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4237	transcode ''YW'' to ''Y''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4238
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4239	18.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4240	if not first or last character, change ''Y'' to ''A''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4241
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4242	19.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4243	transcode ''WR'' to ''R''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4244
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4245	20.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4246	if not first character, change ''Z'' to ''S''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4247
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4248	21.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4249	transcode terminal ''AY'' to ''Y''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4250
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4251	22.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4252	remove traling vowels
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4253
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4254	23.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4255	collapse all strings of repeated characters
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4256
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4257	24.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4258	if first char of original surname was a vowel, append it to the code
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4259	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4260	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4261
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4262	!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4263
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4264	encode:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4265	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4266
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4267	k := self rule1:(aString asUppercase).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4268	"2. Transcode initial strings: MAC => MC PF => F"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4269	k := self rule2:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4270	k := self rule3:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4271	k := self rule4:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4272	k := self rule5:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4273	k := self rule6:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4274	k := self rule7:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4275	k := self rule8:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4276	k := self rule9:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4277	k := self rule10:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4278	k := self rule11:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4279	k := self rule12:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4280	k := self rule13:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4281	k := self rule14:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4282	k := self rule15:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4283	k := self rule16:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4284	k := self rule17:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4285	k := self rule18:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4286	k := self rule19:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4287	k := self rule20:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4288	k := self rule21:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4289	k := self rule22:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4290	k := self rule23:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4291	k := self rule24:k originalKey:aString.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4292	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4293
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4294	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4295	self new encode:'hello'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4296	self new encode:'bliss'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4297	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4298	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4299	self new phoneticStringsFor:'hello'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4300	self new phoneticStringsFor:'bliss'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4301	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4302
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4303	"Created: / 28-07-2017 / 15:34:52 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4304	"Modified (comment): / 02-08-2017 / 14:31:47 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4305	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4306
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4307	!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4308
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4309	rule10:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4310	"10. transcode 'PH' to 'F' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4311
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4312	^ self transcodeAll:'PH' of:key to:'F' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4313
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4314	"Modified (format): / 02-08-2017 / 14:34:27 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4315	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4316
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4317	rule11:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4318	\|k c\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4319
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4320	"11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4321	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4322	c := SortedCollection sortBlock:[:a :b \| b < a ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4323	2 to:key size do:[:i \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4324	(key at:i) = $H ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4325	((key at:i - 1) isVowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4326	or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4327	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4328	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4329	c do:[:n \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4330	k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4331	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4332	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4333	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4334
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4335	rule12:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4336	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4337
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4338	"12. change 'KN' to 'N', else 'K' to 'C' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4339	k := self transcodeAll:'KN' of:key to:'K' startingAt:1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4340	k := self transcodeAll:'K' of:k to:'C' startingAt:1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4341	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4342
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4343	"Modified (format): / 02-08-2017 / 14:34:48 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4344	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4345
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4346	rule13:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4347	"13. if not first character, change 'M' to 'N' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4348
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4349	^ self transcodeAll:'M' of:key to:'N' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4350
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4351	"Modified (format): / 02-08-2017 / 14:35:00 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4352	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4353
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4354	rule14:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4355	"14. if not first character, change 'Q' to 'G' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4356
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4357	^ self transcodeAll:'Q' of:key to:'G' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4358
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4359	"Modified (format): / 02-08-2017 / 14:35:08 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4360	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4361
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4362	rule15:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4363	"15. transcode 'SH' to 'S' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4364
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4365	^ self transcodeAll:'SH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4366
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4367	"Modified (format): / 02-08-2017 / 14:35:18 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4368	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4369
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4370	rule16:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4371	"16. transcode 'SCH' to 'S' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4372
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4373	^ self transcodeAll:'SCH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4374
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4375	"Modified (format): / 02-08-2017 / 14:35:25 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4376	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4377
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4378	rule17:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4379	"17. transcode 'YW' to 'Y' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4380
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4381	^ self transcodeAll:'YW' of:key to:'Y' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4382
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4383	"Modified (format): / 02-08-2017 / 14:35:33 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4384	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4385
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4386	rule18:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4387	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4388
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4389	"18. if not first or last character, change 'Y' to 'A' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4390	k := self transcodeAll:'Y' of:key to:'A' startingAt:2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4391	key last = $Y ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4392	k at:k size put:$Y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4393	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4394	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4395
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4396	"Modified (format): / 02-08-2017 / 14:35:44 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4397	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4398
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4399	rule19:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4400	"19. transcode 'WR' to 'R' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4401
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4402	^ self transcodeAll:'WR' of:key to:'R' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4403
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4404	"Modified (format): / 02-08-2017 / 14:35:52 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4405	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4406
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4407	rule1:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4408	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4409
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4410	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4411	"1. Remove all 'S' and 'Z' chars from the end of the name"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4412	[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4413	'SZ' includes:k last
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4414	] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4415	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4416	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4417
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4418	rule20:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4419	"20. if not first character, change 'Z' to 'S' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4420
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4421	^ self transcodeAll:'Z' of:key to:'S' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4422
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4423	"Modified (format): / 02-08-2017 / 14:36:00 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4424	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4425
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4426	rule21:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4427	"21. transcode terminal 'AY' to 'Y' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4428
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4429	^ self transcodeAll:'AY' of:key to:'Y' startingAt:key size - 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4430
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4431	"Modified (format): / 02-08-2017 / 14:36:08 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4432	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4433
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4434	rule22:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4435	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4436
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4437	"22. remove trailing vowels "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4438	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4439	[ k last isVowel ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4440	k := k copyButLast
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4441	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4442	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4443
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4444	"Modified: / 02-08-2017 / 14:36:42 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4445	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4446
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4447	rule23:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4448	\|k c\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4449
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4450	"23. collapse all strings of repeated characters "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4451	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4452	c := SortedCollection sortBlock:[:a :b \| b < a ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4453	k size to:2 do:[:i \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4454	(k at:i) = (k at:i - 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4455	c add:i
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4456	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4457	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4458	c do:[:n \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4459	k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4460	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4461	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4462	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4463
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4464	rule24:key originalKey:originalKey
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4465	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4466
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4467	"24. if first char of original surname was a vowel, append it to the code"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4468	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4469	originalKey first isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4470	k := k , originalKey first asString asUppercase
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4471	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4472	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4473	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4474
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4475	rule2:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4476	"2. Transcode initial strings: MAC => MC PF => F"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4477
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4478	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4479
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4480	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4481	(k startsWith:'MAC') ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4482	k := 'MC' , (k copyFrom:4)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4483	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4484	(k startsWith:'PF') ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4485	k := 'F' , (k copyFrom:3)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4486	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4487	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4488
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4489	"Modified (format): / 02-08-2017 / 14:31:40 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4490	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4491
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4492	rule3:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4493	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4494
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4495	"3. Transcode trailing strings as follows:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4496	IX => IC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4497	EX => EC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4498	YE, EE, IE => Y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4499	NT, ND => D"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4500
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4501	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4502	k := self transcodeTrailing:#( 'IX' ) of:k to:'IC'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4503	k := self transcodeTrailing:#( 'EX' ) of:k to:'EC'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4504	k := self transcodeTrailing:#( 'YE' 'EE' 'IE' ) of:k to:'Y'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4505	k := self transcodeTrailing:#( 'NT' 'ND' ) of:k to:'D'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4506	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4507
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4508	"Modified (format): / 02-08-2017 / 14:32:24 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4509	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4510
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4511	rule4:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4512	"4. Transcode 'EV' to 'EF' if not at start of name"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4513
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4514	^ self transcodeAll:'EV' of:key to:'EF' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4515
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4516	"Modified (format): / 02-08-2017 / 14:32:35 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4517	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4518
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4519	rule5:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4520	"5. Use first character of name as first character of key.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4521	Ignored because we're doing an in-place conversion"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4522
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4523	^ key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4524
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4525	"Modified (comment): / 02-08-2017 / 14:32:45 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4526	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4527
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4528	rule6:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4529	\|k i\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4530
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4531	"6. Remove any 'W' that follows a vowel"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4532	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4533	i := 2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4534	[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4535	(i := k indexOf:$W startingAt:i) > 0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4536	] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4537	(k at:i - 1) isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4538	k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4539	i := i - 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4540	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4541	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4542	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4543	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4544
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4545	rule7:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4546	"7. replace all vowels with 'A' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4547	^ key collect:[:ch \| ch isVowel ifTrue:[$A] ifFalse:[ch]].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4548
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4549	"Modified: / 02-08-2017 / 14:33:56 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4550	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4551
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4552	rule8:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4553	"8. transcode 'GHT' to 'GT' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4554
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4555	^ self transcodeAll:'GHT' of:key to:'GT' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4556
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4557	"Modified (format): / 02-08-2017 / 14:34:05 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4558	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4559
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4560	rule9:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4561	"9. transcode 'DG' to 'G' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4562
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4563	^ self transcodeAll:'DG' of:key to:'G' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4564
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4565	"Modified (format): / 02-08-2017 / 14:34:15 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4566	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4567
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4568	transcodeAll:aString of:key to:replacementString startingAt:start
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4569	\|k i\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4570
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4571	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4572	[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4573	(i := k indexOfSubCollection:aString startingAt:start) > 0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4574	] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4575	k := (k copyFrom:1 to:i - 1) , replacementString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4576	, (k copyFrom:i + aString size to:k size)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4577	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4578	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4579	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4580
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4581	transcodeTrailing:anArrayOfStrings of:key to:replacementString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4582	\|answer\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4583
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4584	answer := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4585	anArrayOfStrings do:[:aString \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4586	answer := self
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4587	transcodeAll:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4588	of:answer
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4589	to:replacementString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4590	startingAt:(answer size - aString size) + 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4591	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4592	^ answer
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4593	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4594
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4595	!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4596
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4597	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4598	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4599	Implementation of the PHONEM algorithm, as described in
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4600	'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4601	Ein Programm fuer kontextsensitive phonetische Textumwandlung
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4602	ct Magazin fuer Computer & Technik 25/1998'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4603
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4604	This algorithm deals better with the german language (it cares for umlauts)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4605	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4606	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4607
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4608	!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4609
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4610	encode:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4611	\|s idx t t2\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4612
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4613	s := aString asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4614
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4615	idx := 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4616	[idx < (s size-1)] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4617	t2 := nil.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4618	t := s copyFrom:idx to:idx+1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4619	t = 'SC' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4620	ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4621	ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4622	ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4623	ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4624	ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4625	ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4626	ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4627	ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4628	ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4629	ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4630	ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4631	ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4632	ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4633	ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4634	ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4635	ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4636	t2 notNil ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4637	s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4638	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4639	idx := idx + 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4640	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4641	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4642
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4643	"/ single character substitutions via tr
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4644	s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4645	s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4646	s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4647	^ s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4648
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4649	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4650	self basicNew encode:'müller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4651	self basicNew encode:'mueller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4652	self basicNew encode:'möller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4653	self basicNew encode:'miller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4654	self basicNew encode:'muller' -> 'MULR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4655	self basicNew encode:'muler' -> 'MULR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4656
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4657	self basicNew phoneticStringsFor:'müller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4658	self basicNew phoneticStringsFor:'mueller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4659	self basicNew phoneticStringsFor:'möller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4660	self basicNew phoneticStringsFor:'miller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4661	self basicNew phoneticStringsFor:'muller' #('MULR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4662	self basicNew phoneticStringsFor:'muler' #('MULR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4663
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4664	self basicNew phoneticStringsFor:'schmidt' #('CMYD')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4665	self basicNew phoneticStringsFor:'schneider' #('CNAYDR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4666	self basicNew phoneticStringsFor:'fischer' #('VYCR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4667	self basicNew phoneticStringsFor:'weber' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4668	self basicNew phoneticStringsFor:'weeber' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4669	self basicNew phoneticStringsFor:'webber' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4670	self basicNew phoneticStringsFor:'wepper' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4671
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4672	self basicNew phoneticStringsFor:'meyer' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4673	self basicNew phoneticStringsFor:'maier' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4674	self basicNew phoneticStringsFor:'mayer' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4675	self basicNew phoneticStringsFor:'mayr' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4676	self basicNew phoneticStringsFor:'meir' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4677
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4678	self basicNew phoneticStringsFor:'wagner' #('VACNR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4679	self basicNew phoneticStringsFor:'schulz' #('CULC')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4680	self basicNew phoneticStringsFor:'becker' #('BCR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4681	self basicNew phoneticStringsFor:'hoffmann' #('OVMAN')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4682	self basicNew phoneticStringsFor:'haus' #('AUS')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4683
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4684	self basicNew phoneticStringsFor:'schäfer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4685	self basicNew phoneticStringsFor:'scheffer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4686	self basicNew phoneticStringsFor:'schaeffer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4687	self basicNew phoneticStringsFor:'schaefer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4688	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4689
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4690	"Created: / 28-07-2017 / 15:38:08 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4691	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4692
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4693	!PhoneticStringUtilities::Caverphone2StringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4694
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4695	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4696	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4697	Caverphone (2) Algorithm:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4698
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4699	see http://caversham.otago.ac.nz/files/working/ctp150804.pdf
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4700
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4701	Caverphone 2.0 is being made available for free use for the benefit of anyone who has a use for it,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4702	with the proviso that the Caversham Project at the University of Otago should be acknowledged as the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4703	original source (which is hereby done ;-).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4704
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4705	• Start with a Surname or Firstname
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4706	• Convert to lowercase
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4707	This coding system is case sensitive, implementations should acknowledge that a is not the same as A
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4708	• Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4709	The main intention of this is to remove spaces, hyphens, and apostrophes.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4710	example: o'brian becomes obrian
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4711	• If the name starts with cough make it cou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4712	2 is being used as a temporary placeholder to indicate a consonant which we are no longer interested in.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4713	• If the name starts with rough make it rou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4714	• If the name starts with tough make it tou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4715	• If the name starts with enough make it enou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4716	• If the name starts with gn make it 2n
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4717	• If the name ends with mb make it m2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4718	• replace cq with 2q
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4719	• replace ci with si
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4720	• replace ce with se
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4721	• replace cy with sy
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4722	• replace tch with 2ch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4723	• replace c with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4724	• replace q with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4725	• replace x with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4726	• replace v with f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4727	• replace dg with 2g
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4728	• replace tio with sio
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4729	• replace tia with sia
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4730	• replace d with t
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4731	• replace ph with fh
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4732	• replace b with p
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4733	• replace sh with s2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4734	• replace z with s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4735	• replace and initial vowel with an A
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4736	• replace all other vowels with a 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4737	3 is a temporary placeholder marking a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4738	• replace 3gh3 with 3kh3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4739	Exceptions are dealt with before the general case. gh between vowels is an except of the more general gh rule.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4740	• replace gh with 22
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4741	• replace g with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4742	• replace groups of the letter s with a S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4743	Continuous strings of s are replace by a single S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4744	• replace groups of the letter t with a T
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4745	• replace groups of the letter p with a P
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4746	• replace groups of the letter k with a K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4747	• replace groups of the letter f with a F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4748	• replace groups of the letter m with a M
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4749	• replace groups of the letter n with a N
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4750	• replace w3 with W3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4751	• replace wy with Wy
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4752	• replace wh3 with Wh3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4753	• replace why with Why
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4754	• replace w with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4755	• replace and initial h with an A
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4756	• replace all other occurrences of h with a 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4757	• replace r3 with R3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4758	• replace ry with Ry
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4759	• replace r with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4760	• replace l3 with L3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4761	• replace ly with Ly
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4762	• replace l with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4763	• replace j with y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4764	• replace y3 with Y3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4765	• replace y with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4766	• remove all 2s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4767	• remove all 3s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4768	• put six (v1) / ten (v2) 1s on the end
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4769	• take the first six characters as the code (caverphone 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4770	/ take the first ten characters as the code (caverphone 2);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4771
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4772	self new encode:'david' -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4773	self new encode:'whittle' -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4774
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4775	self new encode:'Stevenson' -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4776	self new encode:'Peter' -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4777
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4778	self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4779	self new encode:'lee' -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4780	self new encode:'Gutierrez' -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4781	self new encode:'Pfister' -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4782	self new encode:'Jackson' -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4783	self new encode:'Tymczak' -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4784
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4785	self new encode:'add' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4786	self new encode:'aid' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4787	self new encode:'at' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4788	self new encode:'art' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4789	self new encode:'earth' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4790	self new encode:'head' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4791	self new encode:'old' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4792
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4793	self new encode:'ready' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4794	self new encode:'rather' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4795	self new encode:'able' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4796	self new encode:'appear' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4797
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4798	self new encode:'Deedee' -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4799	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4800	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4801
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4802	!PhoneticStringUtilities::Caverphone2StringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4803
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4804	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4805	\|txt\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4806
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4807	word size == 0 ifTrue:[^ '1111111111' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4808
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4809	"/ 1. Convert to lowercase
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4810	txt := word asLowercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4811
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4812	"/ 2. Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4813	txt := txt select:#isLetter.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4814
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4815	#(
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4816	"/ oldSeq newSeq repeat
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4817
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4818	"/ 2.5. Remove final e
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4819	'e$' '' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4820	"/ 3. Handle various start options
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4821	'^cough' 'cou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4822	'^rough' 'rou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4823	'^tough' 'tou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4824	'^enough' 'enou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4825	'^trough' 'trou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4826
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4827	'^gn' '2n' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4828	'mb$' 'm2' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4829
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4830	"/ 4. Handle replacements
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4831	'cq' '2q' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4832	'ci' 'si' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4833	'ce' 'se' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4834	'cy' 'sy' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4835	'tch' '2ch' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4836	'c' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4837	'q' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4838	'x' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4839	'v' 'f' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4840	'dg' '2g' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4841	'tio' 'sio' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4842	'tia' 'sia' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4843	'd' 't' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4844	'ph' 'fh' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4845	'b' 'p' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4846	'sh' 's2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4847	'z' 's' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4848
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4849	'^a' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4850	'^e' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4851	'^i' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4852	'^o' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4853	'^u' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4854
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4855	'a' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4856	'e' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4857	'i' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4858	'o' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4859	'u' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4860	'j' 'y' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4861
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4862	'^y3' 'Y3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4863	'^y' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4864
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4865	'y' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4866	'3gh3' '3kh3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4867	'gh' '22' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4868	'g' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4869	's' 'S' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4870	'SS' 'S' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4871	't' 'T' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4872	'TT' 'T' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4873	'p' 'P' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4874	'PP' 'P' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4875	'k' 'K' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4876	'KK' 'K' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4877	'f' 'F' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4878	'FF' 'F' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4879	'm' 'M' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4880	'MM' 'M' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4881	'n' 'N' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4882	'NN' 'N' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4883	'w3' 'W3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4884	'wh3' 'Wh3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4885	'w$' '3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4886	'w' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4887	'^h' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4888	'h' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4889	'r3' 'R3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4890	'r$' '3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4891	'r' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4892	'l3' 'L3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4893	'l$' '3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4894	'l' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4895
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4896	"/ 5. removals
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4897
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4898	'2' '' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4899	'3$' 'A' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4900	'3' '' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4901	) inGroupsOf:3 do:[:pat :repl :repeat\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4902	\|s txtBefore\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4903
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4904	txtBefore := txt.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4905	(pat startsWith:$^) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4906	s := pat copyButFirst.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4907	repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4908	[txt startsWith:s] whileTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4909	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4910	(txt startsWith:s) ifTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4911	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4912	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4913	(pat endsWith:$$) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4914	s := pat copyButLast.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4915	repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4916	[txt endsWith:s] whileTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4917	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4918	(txt endsWith:s) ifTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4919	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4920	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4921	repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4922	txt := txt copyReplaceAllSubcollections:pat with:repl
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4923	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4924	txt := txt copyReplaceSubcollection:pat with:repl
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4925	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4926	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4927	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4928	"/ txt ~= txtBefore ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4929	"/ Transcript showCR:(pat,' \| ',repl,' -> ',txt).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4930	"/ ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4931	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4932
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4933	"/ 6. put ten 1s on the end
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4934	txt := txt,'1111111111'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4935
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4936	"/ 7. take the first ten characters as the code
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4937	^ txt copyTo:10
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4938
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4939	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4940	self new encode:'david' -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4941	self new encode:'whittle' -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4942
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4943	self new encode:'Stevenson' -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4944	self new encode:'Peter' -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4945
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4946	self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4947	self new encode:'lee' -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4948	self new encode:'Gutierrez' -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4949	self new encode:'Pfister' -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4950	self new encode:'Jackson' -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4951	self new encode:'Tymczak' -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4952
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4953	self new encode:'add' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4954	self new encode:'aid' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4955	self new encode:'at' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4956	self new encode:'art' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4957	self new encode:'earth' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4958	self new encode:'head' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4959	self new encode:'old' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4960
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4961	self new encode:'ready' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4962	self new encode:'rather' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4963	self new encode:'able' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4964	self new encode:'appear' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4965
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4966	self new encode:'Deedee' -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4967	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4968
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4969	"Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4970	"Modified: / 02-08-2017 / 01:42:35 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4971	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4972
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4973	!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4974
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4975	documentation
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4976	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4977	The 'Kölner Phonetik' (cologne phonetic) code is for the german language
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4978	what the soundex code is for english:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4979	it returns similar strings for similar sounding words
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4980	(but is specifically aware of the pronunciation of German and eastern languages) .
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4981
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4982	There are some other differences to soundex, though:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4983	its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4984	it does not start with the first character of the input, but returns a pure numeric string.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4985
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4986	This algorithm was described by Postel 1969,
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4987	See http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4988
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4989	self new phoneticStringsFor:'Müller-Lüdenscheidt' -> #('65752682')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4990	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4991	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4992
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4993	examples
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4994	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4995	words sounding similar (german pronunciation) will deliver a similar code:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4996
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4997	#(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4998	'Müller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	4999	'Miller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5000	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5001	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5002	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5003	'Mülherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5004	'Myler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5005	'Millar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5006	'Myller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5007	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5008	'Müler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5009	'Muehler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5010	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5011	'Müllerr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5012	'Muehlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5013	'Muellar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5014	'Mueler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5015	'Mülleer'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5016	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5017	'Nüller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5018	'Nyller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5019	'Niler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5020	'Czerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5021	'Tscherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5022	'Czernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5023	'Tschernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5024	'Schernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5025	'Scherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5026	'Scherno'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5027	'Czerne'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5028	'Zerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5029	'Tzernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5030	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5031	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5032	'Breschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5033	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5034	'Braeschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5035	'Braessneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5036	'Pressneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5037	'Presznäph'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5038	'Präschnäf'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5039	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5040	'Breschnijeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5041	'Breschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5042	'Bräschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5043	'Braschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5044	'Broschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5045	) do:[:w \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5046	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5047	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5048	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5049	! !
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5050
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5051	!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5052
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5053	encode: aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5054	"return a koelner phonetic code.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5055	The koelnerPhonetic code is for the german language what the soundex code is for english;
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5056	it returns simular strings for similar sounding words.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5057	There are some differences to soundex, though:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5058	its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5059	it does not start with the first character of the input.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5060	This algorithm is described by Postel 1969"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5061
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5062	\|in ret val rslt\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5063
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5064	in := aString withoutSeparators asLowercase.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5065	in := in copyReplaceString:'ph' withString:'f'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5066	(in includesAny:'öäüß') ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5067	in := in copyReplaceAll:$ü withAll:'u'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5068	in := in copyReplaceAll:$ä withAll:'a'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5069	in := in copyReplaceAll:$ö withAll:'o'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5070	in := in copyReplaceAll:$ß withAll:'ss'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5071	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5072	in := in select:[:ch \| ch isLetter].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5073	in := '#',in,'#'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5074
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5075	ret := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5076	1 to:in size-2 do:[:i \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5077	\|sub\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5078
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5079	sub := in copyFrom:i to:i+2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5080	val := (i==1)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5081	ifTrue:[ self convertFirst:sub ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5082	ifFalse:[ self convertRest:sub ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5083	ret := ret,val
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5084	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5085
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5086	ret := ret select:[:ch \| ch ~= $-].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5087
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5088	(ret startsWith:'0') ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5089	ret := '0',(ret select:[:ch \| ch ~= $0]).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5090	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5091	ret := ret select:[:ch \| ch ~= $0].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5092	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5093
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5094	rslt := String streamContents:[:s \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5095	\|prev\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5096
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5097	ret do:[:ch \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5098	ch ~= prev ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5099	s nextPut:ch
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5100	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5101	prev := ch.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5102	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5103	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5104	^ rslt.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5105
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5106	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5107	#(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5108	'Müller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5109	'Miller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5110	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5111	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5112	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5113	'Mülherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5114	'Myler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5115	'Millar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5116	'Myller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5117	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5118	'Müler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5119	'Muehler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5120	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5121	'Müllerr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5122	'Muehlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5123	'Muellar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5124	'Mueler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5125	'Mülleer'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5126	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5127	'Nüller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5128	'Nyller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5129	'Niler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5130	'Czerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5131	'Tscherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5132	'Czernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5133	'Tschernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5134	'Schernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5135	'Scherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5136	'Scherno'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5137	'Czerne'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5138	'Zerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5139	'Tzernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5140	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5141	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5142	'Breschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5143	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5144	'Braeschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5145	'Braessneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5146	'Pressneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5147	'Presznäph'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5148	'Präschnäf'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5149	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5150	'Breschnijeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5151	'Breschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5152	) do:[:w \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5153	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5154	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5155	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5156
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5157	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5158	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnew' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5159	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5160	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braeschneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5161	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braessneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5162	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Pressneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5163	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Presznäph' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5164	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Präschnäf' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5165	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnjeff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5166	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnijeff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5167	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnieff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5168	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5169	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5170	self basicNew encode:'müller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5171	self basicNew encode:'möller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5172	self basicNew encode:'miller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5173	self basicNew encode:'muller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5174	self basicNew encode:'muler' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5175	self basicNew encode:'schmidt' -> '862'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5176	self basicNew encode:'schneider' -> '8627'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5177	self basicNew encode:'fischer' -> '387'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5178	self basicNew encode:'weber' -> '317'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5179	self basicNew encode:'meyer' -> '67'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5180	self basicNew encode:'wagner' -> '3467'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5181	self basicNew encode:'schulz' -> '858'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5182	self basicNew encode:'becker' -> '147'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5183	self basicNew encode:'hoffmann' -> '036'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5184	self basicNew encode:'schäfer' -> '837'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5185	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5186
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5187	"Created: / 28-07-2017 / 15:24:33 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5188	! !
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5189
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5190	!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5191
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5192	convertFirst:chars
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5193	\|c2 c3\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5194
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5195	chars size == 3 ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5196	c2 := (chars at:2).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5197	c2 == $a ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5198	c2 == $e ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5199	c2 == $i ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5200	c2 == $j ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5201	c2 == $y ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5202	c2 == $o ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5203	c2 == $u ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5204
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5205	c2 == $c ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5206	c3 := (chars at:3).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5207	(c3 == $a) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5208	(c3 == $h) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5209	(c3 == $k) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5210	(c3 == $l) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5211	(c3 == $o) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5212	(c3 == $q) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5213	(c3 == $r) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5214	(c3 == $u) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5215	(c3 == $x) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5216	^ '8'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5217	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5218
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5219	"/ #(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5220	"/ ('#a#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5221	"/ ('#e#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5222	"/ ('#i#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5223	"/ ('#j#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5224	"/ ('#y#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5225	"/ ('#o#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5226	"/ ('#u#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5227	"/
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5228	"/ ('#ca' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5229	"/ ('#ch' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5230	"/ ('#ck' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5231	"/ ('#cl' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5232	"/ ('#co' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5233	"/ ('#cq' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5234	"/ ('#cr' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5235	"/ ('#cu' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5236	"/ ('#cx' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5237	"/
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5238	"/ ('#c#' '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5239	"/ ) do:[:pair \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5240	"/ (pair first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5241	"/ ^ pair second
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5242	"/ ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5243	"/ ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5244	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5245
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5246	^ self convertRest:chars
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5247
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5248	"Modified: / 29-07-2017 / 14:22:20 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5249	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5250
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5251	convertRest:chars
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5252	chars size == 3 ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5253	self error:'cannot happen'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5254	^ '?'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5255	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5256
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5257	#(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5258	"/ used to be matchpattern code,
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5259	"/ but doing these glob-matches is too slow.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5260	"/ changed to:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5261	"/ start nil code
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5262	"/ nil end code
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5263	"/ nil char code
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5264	"/
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5265	(nil 'ds' " '#ds' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5266	(nil 'dc' " '#dc' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5267	(nil 'dz' " '#dz' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5268	(nil 'ts' " '#ts' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5269	(nil 'tc' " '#tc' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5270	(nil 'tz' " '#tz' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5271	(nil $d " '#d#' " '2')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5272	(nil $t " '#t#' " '2')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5273	('cx' nil " 'cx#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5274	('kx' nil " 'kx#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5275	('qx' nil " 'qx#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5276	(nil $x " '#x#' " '48')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5277	('sc' nil " 'sc#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5278	('sz' nil " 'sz#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5279	(nil 'ca' " '#ca' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5280	(nil 'co' " '#co' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5281	(nil 'cu' " '#cu' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5282	(nil 'ch' " '#ch' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5283	(nil 'ck' " '#ck' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5284	(nil 'cx' " '#cx' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5285	(nil 'cq' " '#cq' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5286	(nil $c " '#c#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5287	(nil $a " '#a#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5288	(nil $e " '#e#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5289	(nil $i " '#i#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5290	(nil $j " '#j#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5291	(nil $y " '#y#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5292	(nil $o " '#o#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5293	(nil $u " '#u#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5294	(nil $h " '#h#' " '-')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5295	(nil $l " '#l#' " '5')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5296	(nil $r " '#r#' " '7')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5297	(nil $m " '#m#' " '6')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5298	(nil $n " '#n#' " '6')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5299	(nil $s " '#s#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5300	(nil $z " '#z#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5301	(nil $b " '#b#' " '1')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5302	(nil $p " '#p#' " '1')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5303	(nil $f " '#f#' " '3')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5304	(nil $v " '#v#' " '3')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5305	(nil $w " '#w#' " '3')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5306	(nil $g " '#g#' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5307	(nil $k " '#k#' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5308	(nil $q " '#q#' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5309	(nil nil " '###' " '?')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5310	) do:[:vector \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5311	\|v1 v2\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5312
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5313	(v1 := vector at:1) notNil ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5314	"/ prefix
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5315	(chars startsWith:v1) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5316	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5317	(v2 := vector at:2) isCharacter ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5318	"/ middle character compare
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5319	(chars at:2) == v2 ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5320	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5321	v2 isString ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5322	"/ suffix
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5323	(chars endsWith:v2) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5324	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5325	^ '?'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5326	]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5327	]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5328	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5329
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5330	"/ (vector first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5331	"/ ^ vector second
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5332	"/ ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5333	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5334
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5335	self error:'cannot happen'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5336
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5337	"Modified: / 29-07-2017 / 14:17:38 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5338	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5339
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5340	!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5341
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5342	documentation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5343	"
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5344	Miracode (also called << American Soundex >>) is like Soundex with the
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5345	addition that h and w are discarded if they separate consonants.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5346
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5347	These variants may be specifically important because they were used in
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5348	U.S. National Archives.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5349	Most archive data were encoded with Miracode,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5350	but there are some (older) entries encoded with Simplified Soundex.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5351
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5352	The HW-rule was documented as a standard in 1910,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5353	but actually data of 1880, 1900 and 1910
3185 9833bbba2050 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 2580 diff changeset	5354	censuses were encoded with mixed methods.
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5355
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5356	self new encode:'washington' -> 'W252'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5357	self new encode:'lee' -> 'L000'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5358	self new encode:'Gutierrez' -> 'G362'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5359	self new encode:'Pfister' -> 'P236'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5360	self new encode:'Jackson' -> 'J250'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5361	self new encode:'Tymczak' -> 'T522'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5362
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5363	notice:
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5364	MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5365	SoundexStringComparator new encode:'Ashcraft' -> 'A226'
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5366
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5367	see also:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5368	https://www.archives.gov/research/census/soundex.html
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5369	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5370	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5371
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5372	!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'private'!
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5373
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5374	encode:word
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5375	"same as inherited, but cares for W and H"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5376
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5377	\|u p t prevCode\|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5378
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5379	u := word asUppercase.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5380	p := u first asString.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5381	prevCode := self translate:u first.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5382	u from:2 to:u size do:[:c \|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5383	t := self translate:c.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5384	(t notNil
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5385	and:[ t ~= '0'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5386	and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5387	p := p , t.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5388	p size == 4 ifTrue:[^ p ].
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5389	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5390	(c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5391	prevCode := t.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5392	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5393	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5394	[ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5395	p := p , '0'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5396	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5397	^ (p copyFrom:1 to:4)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5398
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5399	"Created: / 02-08-2017 / 00:19:47 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5400	"Modified (comment): / 02-08-2017 / 14:30:47 / cg"
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5401	! !
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5402
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5403	!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator class methodsFor:'documentation'!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5404
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5405	documentation
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5406	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5407	The 'Spanish Phonetik' (spanish phonetic) code is for the spanish language
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5408	what the soundex code is for english:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5409	it returns similar strings for similar sounding words
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5410	(but is specifically aware of the pronunciation of spanish) .
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5411
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5412	There are some other differences to soundex, though:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5413	its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5414	it does not start with the first character of the input,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5415	but returns a pure numeric string,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5416	it uses different character groups
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5417
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5418	This algorithm was described by Marıa del Pilar Angeles, Adrian Espino-Gamez,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5419	and Jonathan Gil-Moncada, in
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5420	'Comparison of a Modiﬁed Spanish phonetic,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5421	Soundex, and Phonex coding functions during data matching process'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5422	See https://www.researchgate.net/publication/285589803_Comparison_of_a_Modified_Spanish_Phonetic_Soundex_and_Phonex_coding_functions_during_data_matching_process
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5423
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5424	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5425	!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5426
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5427	examples
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5428	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5429	words sounding similar (german pronunciation) will deliver a similar code:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5430
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5431	#(
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5432	'Marıa'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5433	'Pilar'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5434	'Angeles'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5435	'Adrian'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5436	'Gamez'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5437	) do:[:w \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5438	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::SpanishPhoneticCodeStringComparator new encode:w)
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5439	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5440	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5441	! !
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5442
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5443	!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'api'!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5444
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5445	encode: aString
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5446	"return a spanish phonetic code.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5447	The spanishPhonetic code is for the spanish language what the soundex code is for english;
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5448	it returns simular strings for similar sounding words.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5449	There are some differences to soundex, though:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5450	its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5451	it does not start with the first character of the input,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5452	it uses different character groups.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5453	This algorithm is described by Marıa del Pilar Angeles, Adrian Espino-Gamez,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5454	Jonathan Gil-Moncada."
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5455
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5456	\|in\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5457
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5458	in := aString withoutSeparators asUppercase.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5459
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5460	^ String streamContents:[:out \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5461	\|prev\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5462
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5463	in do:[:ch \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5464	ch == prev ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5465	ch == $P ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5466	out nextPut:$0.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5467	] ifFalse:[ ('BV' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5468	out nextPut:$1.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5469	] ifFalse:[ ('FH' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5470	out nextPut:$2.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5471	] ifFalse:[ ('DT' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5472	out nextPut:$3.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5473	] ifFalse:[ ('SZCX' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5474	out nextPut:$4.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5475	] ifFalse:[ ('YL' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5476	out nextPut:$5.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5477	] ifFalse:[ ('NŃM' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5478	out nextPut:$6.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5479	] ifFalse:[ ('QK' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5480	out nextPut:$7.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5481	] ifFalse:[ ('GJ' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5482	out nextPut:$8.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5483	] ifFalse:[ ('R' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5484	out nextPut:$9.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5485	]]]]]]]]]].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5486	prev := ch.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5487	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5488	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5489	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5490
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5491	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5492	self new encode:'Jose'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5493	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5494
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5495	"Created: / 28-07-2017 / 15:24:33 / cg"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5496	"Modified: / 01-08-2017 / 18:48:50 / cg"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5497	! !
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5498
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5499	!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'private'!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5500
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5501	convertFirst:chars
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5502	\|c2 c3\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5503
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5504	chars size == 3 ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5505	c2 := (chars at:2).
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5506	c2 == $a ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5507	c2 == $e ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5508	c2 == $i ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5509	c2 == $j ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5510	c2 == $y ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5511	c2 == $o ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5512	c2 == $u ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5513
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5514	c2 == $c ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5515	c3 := (chars at:3).
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5516	(c3 == $a) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5517	(c3 == $h) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5518	(c3 == $k) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5519	(c3 == $l) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5520	(c3 == $o) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5521	(c3 == $q) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5522	(c3 == $r) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5523	(c3 == $u) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5524	(c3 == $x) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5525	^ '8'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5526	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5527
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5528	"/ #(
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5529	"/ ('#a#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5530	"/ ('#e#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5531	"/ ('#i#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5532	"/ ('#j#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5533	"/ ('#y#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5534	"/ ('#o#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5535	"/ ('#u#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5536	"/
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5537	"/ ('#ca' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5538	"/ ('#ch' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5539	"/ ('#ck' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5540	"/ ('#cl' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5541	"/ ('#co' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5542	"/ ('#cq' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5543	"/ ('#cr' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5544	"/ ('#cu' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5545	"/ ('#cx' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5546	"/
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5547	"/ ('#c#' '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5548	"/ ) do:[:pair \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5549	"/ (pair first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5550	"/ ^ pair second
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5551	"/ ]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5552	"/ ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5553	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5554
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5555	^ self convertRest:chars
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5556
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5557	"Modified: / 29-07-2017 / 14:22:20 / cg"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5558	!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5559
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5560	convertRest:chars
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5561	chars size == 3 ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5562	self error:'cannot happen'.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5563	^ '?'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5564	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5565
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5566	#(
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5567	"/ used to be matchpattern code,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5568	"/ but doing these glob-matches is too slow.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5569	"/ changed to:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5570	"/ start nil code
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5571	"/ nil end code
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5572	"/ nil char code
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5573	"/
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5574	(nil 'ds' " '#ds' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5575	(nil 'dc' " '#dc' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5576	(nil 'dz' " '#dz' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5577	(nil 'ts' " '#ts' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5578	(nil 'tc' " '#tc' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5579	(nil 'tz' " '#tz' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5580	(nil $d " '#d#' " '2')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5581	(nil $t " '#t#' " '2')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5582	('cx' nil " 'cx#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5583	('kx' nil " 'kx#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5584	('qx' nil " 'qx#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5585	(nil $x " '#x#' " '48')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5586	('sc' nil " 'sc#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5587	('sz' nil " 'sz#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5588	(nil 'ca' " '#ca' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5589	(nil 'co' " '#co' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5590	(nil 'cu' " '#cu' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5591	(nil 'ch' " '#ch' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5592	(nil 'ck' " '#ck' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5593	(nil 'cx' " '#cx' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5594	(nil 'cq' " '#cq' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5595	(nil $c " '#c#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5596	(nil $a " '#a#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5597	(nil $e " '#e#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5598	(nil $i " '#i#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5599	(nil $j " '#j#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5600	(nil $y " '#y#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5601	(nil $o " '#o#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5602	(nil $u " '#u#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5603	(nil $h " '#h#' " '-')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5604	(nil $l " '#l#' " '5')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5605	(nil $r " '#r#' " '7')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5606	(nil $m " '#m#' " '6')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5607	(nil $n " '#n#' " '6')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5608	(nil $s " '#s#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5609	(nil $z " '#z#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5610	(nil $b " '#b#' " '1')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5611	(nil $p " '#p#' " '1')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5612	(nil $f " '#f#' " '3')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5613	(nil $v " '#v#' " '3')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5614	(nil $w " '#w#' " '3')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5615	(nil $g " '#g#' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5616	(nil $k " '#k#' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5617	(nil $q " '#q#' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5618	(nil nil " '###' " '?')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5619	) do:[:vector \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5620	\|v1 v2\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5621
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5622	(v1 := vector at:1) notNil ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5623	"/ prefix
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5624	(chars startsWith:v1) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5625	] ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5626	(v2 := vector at:2) isCharacter ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5627	"/ middle character compare
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5628	(chars at:2) == v2 ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5629	] ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5630	v2 isString ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5631	"/ suffix
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5632	(chars endsWith:v2) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5633	] ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5634	^ '?'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5635	]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5636	]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5637	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5638
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5639	"/ (vector first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5640	"/ ^ vector second
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5641	"/ ]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5642	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5643
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5644	self error:'cannot happen'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5645
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5646	"Modified: / 29-07-2017 / 14:17:38 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5647	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5648
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5649	!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5650
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5651	version
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	5652	^ '$Header$'
2285 0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	5653	!
0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	5654
0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	5655	version_CVS
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	5656	^ '$Header$'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5657	! !
3185 9833bbba2050 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 2580 diff changeset	5658

author	Claus Gittinger <cg@exept.de>
	Tue, 25 Jun 2019 14:28:51 +0200
changeset 5050	44fa8672d102
parent 4521	cfe4f333794f
child 5212	76ae0b6f061e
permissions	-rw-r--r--