hg/stx-libbasic2: PhoneticStringUtilities.st@7355a4b11cb6 (annotated)

4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1	"{ Encoding: utf8 }"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	3	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	4	COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5	COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	6	All Rights Reserved
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	7
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	8	This software is furnished under a license and may be used
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	9	only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	10	inclusion of the above copyright notice. This software may not
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	11	be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	12	other person. No title to or ownership of the software is
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	13	hereby transferred.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	14	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	15	"{ Package: 'stx:libbasic2' }"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	16
3488 5a69e672d7f8 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3185 diff changeset	17	"{ NameSpace: Smalltalk }"
5a69e672d7f8 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3185 diff changeset	18
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	19	Object subclass:#PhoneticStringUtilities
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	20	instanceVariableNames:''
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	21	classVariableNames:''
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	22	poolDictionaries:''
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	23	category:'Collections-Text-Support'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	24	!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	25
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	26	Object subclass:#PhoneticStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	27	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	28	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	29	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	30	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	31	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	32
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	33	PhoneticStringUtilities::PhoneticStringComparator subclass:#DaitchMokotoffStringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	34	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	35	currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	36	classVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	37	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	38	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	39	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	40
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	41	PhoneticStringUtilities::PhoneticStringComparator subclass:#DoubleMetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	42	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	43	currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	44	classVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	45	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	46	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	47	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	48
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	49	PhoneticStringUtilities::PhoneticStringComparator subclass:#ExtendedSoundexStringComparator
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	50	instanceVariableNames:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	51	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	52	poolDictionaries:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	53	privateIn:PhoneticStringUtilities
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	54	!
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	55
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	56	PhoneticStringUtilities::PhoneticStringComparator subclass:#SingleResultPhoneticStringComparator
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	57	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	58	classVariableNames:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	59	poolDictionaries:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	60	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	61	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	62
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	63	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MRAStringComparator
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	64	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	65	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	66	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	67	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	68	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	69
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	70	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#MetaphoneStringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	71	instanceVariableNames:'inputKey primaryTranslation secondaryTranslation startIndex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	72	currentIndex skipCount'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	73	classVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	74	poolDictionaries:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	75	privateIn:PhoneticStringUtilities
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	76	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	77
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	78	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SoundexStringComparator
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	79	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	80	classVariableNames:'CharacterTranslationDict'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	81	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	82	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	83	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	84
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	85	PhoneticStringUtilities::SoundexStringComparator subclass:#MySQLSoundexStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	86	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	87	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	88	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	89	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	90	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	91
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	92	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#NYSIISStringComparator
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	93	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	94	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	95	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	96	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	97	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	98
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	99	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#PhonemStringComparator
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	100	instanceVariableNames:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	101	classVariableNames:'CharacterTranslationDict'
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	102	poolDictionaries:''
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	103	privateIn:PhoneticStringUtilities
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	104	!
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	105
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	106	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#Caverphone2StringComparator
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	107	instanceVariableNames:''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	108	classVariableNames:'CharacterTranslationDict'
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	109	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	110	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	111	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	112
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	113	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#KoelnerPhoneticCodeStringComparator
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	114	instanceVariableNames:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	115	classVariableNames:'CharacterTranslationDict'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	116	poolDictionaries:''
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	117	privateIn:PhoneticStringUtilities
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	118	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	119
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	120	PhoneticStringUtilities::SoundexStringComparator subclass:#MiracodeStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	121	instanceVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	122	classVariableNames:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	123	poolDictionaries:''
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	124	privateIn:PhoneticStringUtilities
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	125	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	126
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	127	PhoneticStringUtilities::SingleResultPhoneticStringComparator subclass:#SpanishPhoneticCodeStringComparator
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	128	instanceVariableNames:''
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	129	classVariableNames:'CharacterTranslationDict'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	130	poolDictionaries:''
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	131	privateIn:PhoneticStringUtilities
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	132	!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	133
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	134	!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	135
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	136	copyright
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	137	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	138	COPYRIGHT (c) 1994 by Claus Gittinger
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	139	COPYRIGHT (c) 2009 by eXept Software AG
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	140	All Rights Reserved
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	141
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	142	This software is furnished under a license and may be used
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	143	only in accordance with the terms of that license and with the
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	144	inclusion of the above copyright notice. This software may not
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	145	be provided or otherwise made available to, or used by, any
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	146	other person. No title to or ownership of the software is
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	147	hereby transferred.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	148	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	149	!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	150
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	151	documentation
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	152	"
2445 d55a3b1e8791 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2285 diff changeset	153	Utilities which are helpful to perform phonetic string searches or comparisons.
d55a3b1e8791 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2285 diff changeset	154	These are all variations or improvements of the soundex algorithm, which usually fails
d55a3b1e8791 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2285 diff changeset	155	to provide good results for non-english languages.
2285 0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	156
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	157	soundexCode
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	158	this algorithm was originally contained in the CharacterArray class;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	159
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	160	nysiis
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	161	a modified soundex algorithm
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	162
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	163	miracode
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	164	another modified soundex algorithm ('american soundex') used in the 1880 census.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	165
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	166	mySQLSoundex
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	167	another modified soundex algorithm used in mySQL.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	168
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	169	koelner phoneticCode
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	170	provides a functionality similar to soundex, but much more tuned towards the German language
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	171
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	172	Double metaphone
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	173	works with most european languages.
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	174
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	175	phonem
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	176	described in Georg Wilde and Carsten Meyer, 'Doppelgaenger gesucht - Ein Programm fuer kontextsensitive phonetische Textumwandlung'
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	177	from 'ct Magazin fuer Computer & Technik 25/1999'.
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	178
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	179	mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	180	Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	181
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	182	caverphone2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	183	better than soundex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	184
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	185	spanish phonetic code
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	186	an algorithm slightly adjusted to spanish names
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	187
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	188	More info for german readers is found in:
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	189	http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	190	"
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	191	!
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	192
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	193	sampleData
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	194	"
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	195	for the 50 most common german names, we get:
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	196
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	197	ext.
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	198	name soundex soundex metaphone phonet phonet2 phonix daitsch phonem koeln caverphone2 mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	199
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	200	müller M460 54600000 MLR MÜLA NILA M4000000 689000 MYLR 657 MLA1111111 MLR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	201	schmidt S530 25300000 SKMTT SHMIT ZNIT S5300000 463000 CMYD 862 SKMT111111 SCHMDT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	202	schneider S536 25360000 SKNTR SHNEIDA ZNEITA S5300000 463900 CNAYDR 8627 SKNTA11111 SCHNDR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	203	fischer F260 12600000 FSKR FISHA FIZA F8000000 749000 VYCR 387 FSKA111111 FSCHR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	204	weber W160 16000000 WBR WEBA FEBA $1000000 779000 VBR 317 WPA1111111 WBR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	205	meyer M600 56000000 MYR MEIA NEIA M0000000 619000 MAYR 67 MA11111111 MYR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	206	wagner W256 25600000 WKNR WAKNA FAKNA $2500000 756900 VACNR 3467 WKNA111111 WGNR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	207	schulz S420 24200000 SKLS SHULS ZULZ S4800000 484000 CULC 858 SKS1111111 SCHLZ
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	208	becker B260 12600000 BKR BEKA BEKA B2000000 759000 BCR 147 PKA1111111 BCKR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	209	hoffmann H155 15500000 HFMN HOFMAN UFNAN $7550000 576600 OVMAN 036 AFMN111111 HFMN
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	210	schäfer S16ß 21600000 SKFR SHEFA ZEFA S7000000 479000 CVR 837 SKFA111111 SCHFR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	211
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	212	\|cls\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	213
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	214	cls := MRAStringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	215	cls := SoundexStringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	216	cls := KoelnerPhoneticCodeStringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	217	cls := Caverphone2StringComparator.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	218	#('müller' 'schmidt' 'schneider' 'fischer' 'weber' 'meyer'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	219	'wagner' 'schulz' 'becker' 'hoffmann' 'schäfer')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	220	do:[:name \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	221	Transcript show:''''; show:name; show:''' -> '''; show:(cls encode:name); showCR:''''.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	222	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	223
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	224	KoelnerPhoneticCodeStringComparator encode:'Müller-Lüdenscheidt' -> '65752682'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	225	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	226	! !
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	227
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	228	!PhoneticStringUtilities class methodsFor:'phonetic codes'!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	229
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	230	koelnerPhoneticCodeOf:aString
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	231	"return a koelner phonetic code.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	232	The koelnerPhonetic code is for the german language what the soundex code is for english;
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	233	it returns simular strings for similar sounding words.
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	234	There are some differences to soundex, though:
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	235	its length is not limited to 4, but depends on the length of the original string;
2207 6a98ae779773 * empty log message * Claus Gittinger <cg@exept.de> parents: 2197 diff changeset	236	it does not start with the first character of the input.
6a98ae779773 * empty log message * Claus Gittinger <cg@exept.de> parents: 2197 diff changeset	237	This algorithm is described by Postel 1969"
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	238
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	239	^ (KoelnerPhoneticCodeStringComparator new phoneticStringsFor:aString) first
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	240
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	241	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	242	#(
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	243	'Müller'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	244	'Miller'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	245	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	246	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	247	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	248	'Mülherr'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	249	'Myler'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	250	'Millar'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	251	'Myller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	252	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	253	'Müler'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	254	'Muehler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	255	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	256	'Müllerr'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	257	'Muehlherr'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	258	'Muellar'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	259	'Mueler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	260	'Mülleer'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	261	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	262	'Nüller'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	263	'Nyller'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	264	'Niler'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	265	'Czerny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	266	'Tscherny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	267	'Czernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	268	'Tschernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	269	'Schernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	270	'Scherny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	271	'Scherno'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	272	'Czerne'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	273	'Zerny'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	274	'Tzernie'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	275	'Breschnew'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	276	) do:[:w \|
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	277	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities koelnerPhoneticCodeOf:w)
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	278	].
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	279	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	280
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	281	"
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	282	PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschnew'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	283	PhoneticStringUtilities koelnerPhoneticCodeOf:'Breschneff'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	284	PhoneticStringUtilities koelnerPhoneticCodeOf:'Braeschneff'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	285	PhoneticStringUtilities koelnerPhoneticCodeOf:'Braessneff'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	286	PhoneticStringUtilities koelnerPhoneticCodeOf:'Pressneff'. '17863'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	287	PhoneticStringUtilities koelnerPhoneticCodeOf:'Presznäph'. '17863'.
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	288	PhoneticStringUtilities koelnerPhoneticCodeOf:'Preschnjiev'. '17863'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	289	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	290	!
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	291
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	292	miracodeCodeOf:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	293	"return a miracode soundex phonetic code or nil.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	294	Miracode is a slightly modified soundex algorithm.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	295	Notice that there are better algorithms around (doubleMetaphone) "
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	296
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	297	^ (MiracodeStringComparator new phoneticStringsFor:aString) first
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	298
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	299	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	300	PhoneticStringUtilities miracodeCodeOf:'claus'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	301	PhoneticStringUtilities miracodeCodeOf:'clause'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	302	PhoneticStringUtilities miracodeCodeOf:'close'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	303	PhoneticStringUtilities miracodeCodeOf:'smalltalk'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	304	PhoneticStringUtilities miracodeCodeOf:'smaltalk'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	305	PhoneticStringUtilities miracodeCodeOf:'smaltak'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	306	PhoneticStringUtilities miracodeCodeOf:'smaltok'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	307	PhoneticStringUtilities miracodeCodeOf:'smoltok'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	308	PhoneticStringUtilities miracodeCodeOf:'aa'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	309	PhoneticStringUtilities miracodeCodeOf:'by'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	310	PhoneticStringUtilities miracodeCodeOf:'bab'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	311	PhoneticStringUtilities miracodeCodeOf:'bob'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	312	PhoneticStringUtilities miracodeCodeOf:'bop'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	313	PhoneticStringUtilities miracodeCodeOf:'pub'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	314	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	315
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	316	"Created: / 28-07-2017 / 15:32:41 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	317	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	318
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	319	mySQLSoundexCodeOf:aString
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	320	"return the mySQL soundex code. The mysql soundex coed is different from the miracode 'american' soundex
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	321	(no 4char limitation; different order of duplicate vowel vs. duplicate code elimination).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	322	Notice that there are better algorithms around (doubleMetaphone) "
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	323
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	324	^ (MySQLSoundexStringComparator new phoneticStringsFor:aString) first
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	325
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	326	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	327	#(
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	328	'Müller'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	329	'Miller'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	330	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	331	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	332	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	333	'Mülherr'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	334	'Myler'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	335	'Millar'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	336	'Myller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	337	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	338	'Müler'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	339	'Muehler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	340	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	341	'Müllerr'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	342	'Muehlherr'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	343	'Muellar'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	344	'Mueler'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	345	'Mülleer'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	346	'Mueller'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	347	'Nüller'
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	348	'Nyller'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	349	'Niler'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	350	'Czerny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	351	'Tscherny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	352	'Czernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	353	'Tschernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	354	'Schernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	355	'Scherny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	356	'Scherno'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	357	'Czerne'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	358	'Zerny'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	359	'Tzernie'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	360	'Breschnew'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	361	) do:[:w \|
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	362	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities mySQLSoundexCodeOf:w)
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	363	].
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	364	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	365
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	366	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	367	PhoneticStringUtilities mySQLSoundexCodeOf:'Breschnew'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	368	PhoneticStringUtilities mySQLSoundexCodeOf:'Breschneff'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	369	PhoneticStringUtilities mySQLSoundexCodeOf:'Braeschneff'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	370	PhoneticStringUtilities mySQLSoundexCodeOf:'Braessneff'.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	371	PhoneticStringUtilities mySQLSoundexCodeOf:'Pressneff'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	372	PhoneticStringUtilities mySQLSoundexCodeOf:'Presznäph'.
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	373	PhoneticStringUtilities mySQLSoundexCodeOf:'Preschnjiev'.
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	374	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	375
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	376	"Modified (comment): / 28-07-2017 / 15:34:03 / cg"
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	377	!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	378
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	379	soundexCodeOf:aString
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	380	"return a soundex phonetic code or nil.
2207 6a98ae779773 * empty log message * Claus Gittinger <cg@exept.de> parents: 2197 diff changeset	381	Soundex (1918, 1922) returns similar codes for similar sounding words, making it a useful
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	382	tool when searching for words where the correct spelling is unknown.
4194 12b5e3e2219b #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4184 diff changeset	383	(read Knuth or search the web if you don't know what a soundex code is).
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	384	Caveat: 'similar sounding words' means: 'similar sounding in english'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	385	Notice that there are better algorithms around (doubleMetaphone) "
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	386
2210 9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	387	^ (SoundexStringComparator new phoneticStringsFor:aString) first
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	388
2210 9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	389	"/ old code - now use code in private class...
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	390	"/ \|inStream codeStream ch last lch codeLength codes code lastCode\|
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	391	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	392	"/ inStream := aString readStream.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	393	"/ inStream skipSeparators.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	394	"/ inStream atEnd ifTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	395	"/ ^ nil
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	396	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	397	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	398	"/ ch := inStream next.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	399	"/ ch isLetter ifFalse:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	400	"/ ^ nil
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	401	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	402	"/ codeLength := 0.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	403	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	404	"/ codes := Dictionary new.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	405	"/ codes atAll:'bpfv' put:$1.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	406	"/ codes atAll:'cskgjqxz' put:$2.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	407	"/ codes atAll:'dt' put:$3.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	408	"/ codes atAll:'l' put:$4.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	409	"/ codes atAll:'nm' put:$5.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	410	"/ codes atAll:'r' put:$6.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	411	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	412	"/ codeStream := WriteStream on:(String new:4).
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	413	"/ codeStream nextPut:(ch asUppercase).
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	414	"/ last := ch asLowercase.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	415	"/ lastCode := codes at:last ifAbsent:nil.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	416	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	417	"/ [inStream atEnd] whileFalse:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	418	"/ ch := inStream next.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	419	"/ lch := ch asLowercase.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	420	"/ lch = last ifFalse:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	421	"/ last := lch.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	422	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	423	"/ code := codes at:lch ifAbsent:nil.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	424	"/ (code notNil and:[ code ~= lastCode]) ifTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	425	"/ codeLength < 3 ifTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	426	"/ codeStream nextPut:code.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	427	"/ codeLength := codeLength + 1.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	428	"/ codeLength > 3 ifTrue:[^ codeStream contents].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	429	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	430	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	431	"/ lastCode := code.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	432	"/ ]
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	433	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	434	"/ [ codeLength < 3 ] whileTrue:[
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	435	"/ codeStream nextPut:$0.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	436	"/ codeLength := codeLength + 1.
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	437	"/ ].
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	438	"/
9c428fe51c78 * empty log message * Claus Gittinger <cg@exept.de> parents: 2209 diff changeset	439	"/ ^ codeStream contents
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	440
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	441	"
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	442	PhoneticStringUtilities soundexCodeOf:'claus'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	443	PhoneticStringUtilities soundexCodeOf:'clause'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	444	PhoneticStringUtilities soundexCodeOf:'close'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	445	PhoneticStringUtilities soundexCodeOf:'smalltalk'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	446	PhoneticStringUtilities soundexCodeOf:'smaltalk'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	447	PhoneticStringUtilities soundexCodeOf:'smaltak'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	448	PhoneticStringUtilities soundexCodeOf:'smaltok'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	449	PhoneticStringUtilities soundexCodeOf:'smoltok'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	450	PhoneticStringUtilities soundexCodeOf:'aa'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	451	PhoneticStringUtilities soundexCodeOf:'by'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	452	PhoneticStringUtilities soundexCodeOf:'bab'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	453	PhoneticStringUtilities soundexCodeOf:'bob'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	454	PhoneticStringUtilities soundexCodeOf:'bop'
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	455	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	456
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	457	"Modified (comment): / 28-07-2017 / 15:33:53 / cg"
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	458	! !
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	459
3648 fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	460	!PhoneticStringUtilities class methodsFor:'queries'!
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	461
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	462	isUtilityClass
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	463	^ self == PhoneticStringUtilities
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	464	! !
fccb127ba02e #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3646 diff changeset	465
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	466	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'constant'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	467
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	468	defaultClass
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	469	^SoundexStringComparator
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	470	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	471
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	472	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'documentation'!
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	473
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	474	documentation
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	475	"
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	476	abstract superclass for various phonetic comparators.
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	477	They returns similar strings for similar sounding words, which can be used
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	478	to find similar sounding words in a search list.
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	479
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	480	Notice, that some comparators are better for particular languages.
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	481	"
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	482	!
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	483
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	484	examples
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	485	"
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	486	PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	487	does:'miller' soundLike:'miler'.
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	488
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	489	PhoneticStringUtilities::SoundexStringComparator new
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	490	does:'miller' soundLike:'milner'.
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	491
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	492	PhoneticStringUtilities::SoundexStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	493	does:'müller' soundLike:'mueller'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	494
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	495	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	496	does:'müller' soundLike:'mueller'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	497	"
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	498	! !
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	499
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	500	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'instance creation'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	501
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	502	new
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	503	^ self basicNew initialize.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	504	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	505
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	506	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'queries'!
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	507
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	508	isAbstract
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	509	^ self == PhoneticStringUtilities::PhoneticStringComparator
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	510	! !
82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	511
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	512	!PhoneticStringUtilities::PhoneticStringComparator class methodsFor:'utilities'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	513
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	514	does:aString soundLike:anotherString
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	515	"return true, if aString sounds similar to anotherString"
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	516
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	517	^ self new does:aString soundLike:anotherString.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	518
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	519	"
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	520	PhoneticStringUtilities::SoundexStringComparator does:'miller' soundLike:'miler'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	521
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	522	PhoneticStringUtilities::SoundexStringComparator does:'miller' soundLike:'milner'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	523
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	524	PhoneticStringUtilities::SoundexStringComparator does:'müller' soundLike:'mueller'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	525
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	526	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator does:'müller' soundLike:'mueller'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	527	PhoneticStringUtilities::DoubleMetaphoneStringComparator does:'müller' soundLike:'mueller'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	528	"
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	529	!
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	530
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	531	encode:word
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	532	"return a phonetic encoding for a word.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	533	This can eg. be used as key to map/hash similar sounding words"
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	534
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	535	^ (self new phoneticStringsFor:word) first
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	536
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	537	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	538	SoundexStringComparator encode:'Fischer' -> 'F260'
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	539	SoundexStringComparator encode:'Fiescher' -> 'F260'
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	540	Caverphone2StringComparator encode:'Fischer' -> 'FSKA111111'
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	541	Caverphone2StringComparator encode:'Fiescher' -> 'FSKA111111'
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	542	MRAStringComparator encode:'Fischer' -> 'FSCHR'
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	543	MRAStringComparator encode:'Fiescher' -> 'FSCHR'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	544	SpanishPhoneticCodeStringComparator encode:'Fischer' -> '24429'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	545	SpanishPhoneticCodeStringComparator encode:'Fiescher' -> '24429'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	546	DoubleMetaphoneStringComparator encode:'Fischer' -> 'FXR'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	547	DoubleMetaphoneStringComparator encode:'Fiescher' -> 'FXR'
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	548	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	549
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	550	"Created: / 02-08-2017 / 01:15:50 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	551	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	552
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	553	!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	554
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	555	does:aString soundLike:anotherString
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	556	"return true, if aString sounds similar to anotherString"
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	557
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	558	\|translations1 translations2\|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	559
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	560	translations1 := self phoneticStringsFor:aString.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	561	translations2 := self phoneticStringsFor:anotherString.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	562
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	563	^ translations1 contains:[:t1 \|
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	564	translations2 contains:[:t2 \| t1 = t2]
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	565	]
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	566
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	567	"
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	568	PhoneticStringUtilities::SoundexStringComparator new does:'miller' soundLike:'miler'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	569
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	570	PhoneticStringUtilities::SoundexStringComparator new
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	571	does:'miller' soundLike:'milner'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	572
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	573	PhoneticStringUtilities::SoundexStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	574	does:'müller' soundLike:'mueller'.
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	575
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	576	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	577	does:'müller' soundLike:'mueller'.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	578	"
4467 c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	579
c946d9eea9ec #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4194 diff changeset	580	"Modified (comment): / 13-07-2017 / 17:51:43 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	581	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	582
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	583	phoneticStringsFor: aString
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	584	"Should answer an array of alternate phonetic strings for the given input string."
4485 735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	585
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	586	self subclassResponsibility
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	587
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	588	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	589	(PhoneticStringUtilities::SoundexStringComparator new
4485 735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	590	phoneticStringsFor:'miller') first
735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	591
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	592	'miller' asSoundexCode
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	593	"
4485 735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	594
735edd20512a #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4467 diff changeset	595	"Modified (comment): / 27-07-2017 / 15:07:59 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	596	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	597
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	598	!PhoneticStringUtilities::PhoneticStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	599
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	600	initialize
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	601	"Invoked when a new instance is created."
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	602
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	603	"/ please change as required (and remove this comment)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	604
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	605	"/ super initialize. -- commented since inherited method does nothing
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	606	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	607
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	608	!PhoneticStringUtilities::DaitchMokotoffStringComparator class methodsFor:'documentation'!
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	609
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	610	documentation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	611	"
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	612	self encode:'AUERBACH' -> 097400, 097500
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	613
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	614	Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	615	The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	616	yielding greater accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	617	but differences in spelling.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	618
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	619	The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	620	- coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	621	- the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	622	- rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	623	- multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	624
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	625	This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	626	encode:aString - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	627	phoneticStringsFor:String - branching enabled, all codes will be returned, separated by '\|'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	628
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	629	[see also:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	630	'Wikipedia - Daitch-Mokotoff Soundex'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	631	http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	632
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	633	'Avotaynu - Soundexing and Genealogy'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	634	http://www.avotaynu.com/soundex.htm
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	635	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	636	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	637
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	638	javaCode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	639	"<<END
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	640	/*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	641	* Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	642	* contributor license agreements. See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	643	* this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	644	* The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	645	* (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	646	* the License. You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	647	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	648	* http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	649	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	650	* Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	651	* distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	652	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	653	* See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	654	* limitations under the License.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	655	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	656	package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	657
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	658	import org.apache.commons.codec.CharEncoding;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	659	import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	660	import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	661
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	662	import java.io.InputStream;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	663	import java.util.*;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	664
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	665	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	666	* Encodes a string into a Daitch-Mokotoff Soundex value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	667	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	668	* The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	669	* accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	670	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	671	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	672	* The main differences compared to the other soundex variants are:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	673	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	674	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	675	* <li>coded names are 6 digits long
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	676	* <li>the initial character of the name is coded
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	677	* <li>rules to encoded multi-character n-grams
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	678	* <li>multiple possible encodings for the same name (branching)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	679	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	680	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	681	* This implementation supports branching, depending on the used method:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	682	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	683	* <li>{@link #encode(String)} - branching disabled, only the first code will be returned
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	684	* <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '\|'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	685	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	686	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	687	* Note: this implementation has additional branching rules compared to the original description of the algorithm. The
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	688	* rules can be customized by overriding the default rules contained in the resource file
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	689	* {@code org/apache/commons/codec/language/dmrules.txt}.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	690	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	691	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	692	* This class is thread-safe.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	693	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	694	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	695	* @see Soundex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	696	* @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	697	* @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	698	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	699	* @version $Id$
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	700	* @since 1.10
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	701	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	702	public class DaitchMokotoffSoundex implements StringEncoder {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	703
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	704	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	705	* Inner class representing a branch during DM soundex encoding.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	706	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	707	private static final class Branch {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	708	private final StringBuilder builder;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	709	private String cachedString;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	710	private String lastReplacement;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	711
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	712	private Branch() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	713	builder = new StringBuilder();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	714	lastReplacement = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	715	cachedString = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	716	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	717
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	718	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	719	* Creates a new branch, identical to this branch.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	720	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	721	* @return a new, identical branch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	722	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	723	public Branch createBranch() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	724	final Branch branch = new Branch();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	725	branch.builder.append(toString());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	726	branch.lastReplacement = this.lastReplacement;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	727	return branch;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	728	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	729
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	730	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	731	public boolean equals(final Object other) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	732	if (this == other) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	733	return true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	734	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	735	if (!!(other instanceof Branch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	736	return false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	737	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	738
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	739	return toString().equals(((Branch) other).toString());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	740	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	741
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	742	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	743	* Finish this branch by appending '0's until the maximum code length has been reached.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	744	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	745	public void finish() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	746	while (builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	747	builder.append('0');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	748	cachedString = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	749	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	750	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	751
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	752	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	753	public int hashCode() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	754	return toString().hashCode();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	755	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	756
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	757	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	758	* Process the next replacement to be added to this branch.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	759	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	760	* @param replacement
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	761	* the next replacement to append
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	762	* @param forceAppend
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	763	* indicates if the default processing shall be overridden
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	764	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	765	public void processNextReplacement(final String replacement, final boolean forceAppend) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	766	final boolean append = lastReplacement == null \|\| !!lastReplacement.endsWith(replacement) \|\| forceAppend;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	767
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	768	if (append && builder.length() < MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	769	builder.append(replacement);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	770	// remove all characters after the maximum length
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	771	if (builder.length() > MAX_LENGTH) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	772	builder.delete(MAX_LENGTH, builder.length());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	773	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	774	cachedString = null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	775	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	776
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	777	lastReplacement = replacement;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	778	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	779
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	780	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	781	public String toString() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	782	if (cachedString == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	783	cachedString = builder.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	784	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	785	return cachedString;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	786	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	787	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	788
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	789	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	790	* Inner class for storing rules.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	791	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	792	private static final class Rule {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	793	private final String pattern;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	794	private final String[] replacementAtStart;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	795	private final String[] replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	796	private final String[] replacementDefault;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	797
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	798	protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	799	final String replacementDefault) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	800	this.pattern = pattern;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	801	this.replacementAtStart = replacementAtStart.split("\\\|");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	802	this.replacementBeforeVowel = replacementBeforeVowel.split("\\\|");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	803	this.replacementDefault = replacementDefault.split("\\\|");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	804	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	805
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	806	public int getPatternLength() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	807	return pattern.length();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	808	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	809
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	810	public String[] getReplacements(final String context, final boolean atStart) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	811	if (atStart) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	812	return replacementAtStart;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	813	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	814
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	815	final int nextIndex = getPatternLength();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	816	final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	817	if (nextCharIsVowel) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	818	return replacementBeforeVowel;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	819	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	820
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	821	return replacementDefault;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	822	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	823
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	824	private boolean isVowel(final char ch) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	825	return ch == 'a' \|\| ch == 'e' \|\| ch == 'i' \|\| ch == 'o' \|\| ch == 'u';
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	826	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	827
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	828	public boolean matches(final String context) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	829	return context.startsWith(pattern);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	830	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	831
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	832	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	833	public String toString() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	834	return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart),
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	835	Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault));
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	836	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	837	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	838
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	839	private static final String COMMENT = "//";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	840	private static final String DOUBLE_QUOTE = "\"";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	841
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	842	private static final String MULTILINE_COMMENT_END = "*/";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	843
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	844	private static final String MULTILINE_COMMENT_START = "/*";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	845
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	846	/** The resource file containing the replacement and folding rules */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	847	private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	848
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	849	/** The code length of a DM soundex value. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	850	private static final int MAX_LENGTH = 6;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	851
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	852	/** Transformation rules indexed by the first character of their pattern. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	853	private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	854
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	855	/** Folding rules. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	856	private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	857
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	858	static {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	859	final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	860	if (rulesIS == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	861	throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	862	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	863
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	864	final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	865	parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	866	scanner.close();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	867
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	868	// sort RULES by pattern length in descending order
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	869	for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	870	final List<Rule> ruleList = rule.getValue();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	871	Collections.sort(ruleList, new Comparator<Rule>() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	872	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	873	public int compare(final Rule rule1, final Rule rule2) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	874	return rule2.getPatternLength() - rule1.getPatternLength();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	875	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	876	});
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	877	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	878	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	879
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	880	private static void parseRules(final Scanner scanner, final String location,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	881	final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	882	int currentLine = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	883	boolean inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	884
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	885	while (scanner.hasNextLine()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	886	currentLine++;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	887	final String rawLine = scanner.nextLine();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	888	String line = rawLine;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	889
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	890	if (inMultilineComment) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	891	if (line.endsWith(MULTILINE_COMMENT_END)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	892	inMultilineComment = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	893	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	894	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	895	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	896
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	897	if (line.startsWith(MULTILINE_COMMENT_START)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	898	inMultilineComment = true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	899	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	900	// discard comments
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	901	final int cmtI = line.indexOf(COMMENT);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	902	if (cmtI >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	903	line = line.substring(0, cmtI);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	904	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	905
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	906	// trim leading-trailing whitespace
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	907	line = line.trim();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	908
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	909	if (line.length() == 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	910	continue; // empty lines can be safely skipped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	911	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	912
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	913	if (line.contains("=")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	914	// folding
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	915	final String[] parts = line.split("=");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	916	if (parts.length !!= 2) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	917	throw new IllegalArgumentException("Malformed folding statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	918	" parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	919	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	920	final String leftCharacter = parts[0];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	921	final String rightCharacter = parts[1];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	922
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	923	if (leftCharacter.length() !!= 1 \|\| rightCharacter.length() !!= 1) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	924	throw new IllegalArgumentException("Malformed folding statement - " +
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	925	"patterns are not single characters: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	926	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	927
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	928	asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0));
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	929	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	930	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	931	// rule
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	932	final String[] parts = line.split("\\s+");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	933	if (parts.length !!= 4) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	934	throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	935	" parts: " + rawLine + " in " + location);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	936	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	937	try {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	938	final String pattern = stripQuotes(parts[0]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	939	final String replacement1 = stripQuotes(parts[1]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	940	final String replacement2 = stripQuotes(parts[2]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	941	final String replacement3 = stripQuotes(parts[3]);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	942
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	943	final Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	944	final char patternKey = r.pattern.charAt(0);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	945	List<Rule> rules = ruleMapping.get(patternKey);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	946	if (rules == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	947	rules = new ArrayList<Rule>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	948	ruleMapping.put(patternKey, rules);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	949	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	950	rules.add(r);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	951	} catch (final IllegalArgumentException e) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	952	throw new IllegalStateException(
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	953	"Problem parsing line '" + currentLine + "' in " + location, e);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	954	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	955	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	956	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	957	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	958	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	959	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	960
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	961	private static String stripQuotes(String str) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	962	if (str.startsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	963	str = str.substring(1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	964	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	965
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	966	if (str.endsWith(DOUBLE_QUOTE)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	967	str = str.substring(0, str.length() - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	968	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	969
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	970	return str;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	971	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	972
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	973	/** Whether to use ASCII folding prior to encoding. */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	974	private final boolean folding;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	975
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	976	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	977	* Creates a new instance with ASCII-folding enabled.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	978	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	979	public DaitchMokotoffSoundex() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	980	this(true);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	981	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	982
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	983	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	984	* Creates a new instance.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	985	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	986	* With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	987	* è -> e.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	988	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	989	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	990	* @param folding
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	991	* if ASCII-folding shall be performed before encoding
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	992	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	993	public DaitchMokotoffSoundex(final boolean folding) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	994	this.folding = folding;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	995	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	996
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	997	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	998	* Performs a cleanup of the input string before the actual soundex transformation.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	999	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1000	* Removes all whitespace characters and performs ASCII folding if enabled.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1001	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1002	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1003	* @param input
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1004	* the input string to cleanup
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1005	* @return a cleaned up string
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1006	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1007	private String cleanup(final String input) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1008	final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1009	for (char ch : input.toCharArray()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1010	if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1011	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1012	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1013
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1014	ch = Character.toLowerCase(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1015	if (folding && FOLDINGS.containsKey(ch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1016	ch = FOLDINGS.get(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1017	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1018	sb.append(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1019	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1020	return sb.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1021	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1022
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1023	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1024	* Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1025	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1026	* This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1027	* EncoderException if the supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1028	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1029	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1030	* @see #soundex(String)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1031	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1032	* @param obj
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1033	* Object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1034	* @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1035	* supplied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1036	* @throws EncoderException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1037	* if the parameter supplied is not of type java.lang.String
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1038	* @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1039	* if a character is not mapped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1040	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1041	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1042	public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1043	if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1044	throw new EncoderException(
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1045	"Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1046	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1047	return encode((String) obj);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1048	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1049
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1050	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1051	* Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1052	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1053	* @see #soundex(String)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1054	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1055	* @param source
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1056	* A String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1057	* @return A DM Soundex code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1058	* @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1059	* if a character is not mapped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1060	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1061	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1062	public String encode(final String source) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1063	if (source == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1064	return null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1065	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1066	return soundex(source, false)[0];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1067	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1068
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1069	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1070	* Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1071	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1072	* In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1073	* separated by '\|'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1074	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1075	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1076	* Example: the name "AUERBACH" is encoded as both
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1077	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1078	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1079	* <li>097400</li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1080	* <li>097500</li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1081	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1082	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1083	* Thus the result will be "097400\|097500".
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1084	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1085	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1086	* @param source
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1087	* A String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1088	* @return A string containing a set of DM Soundex codes corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1089	* @throws IllegalArgumentException
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1090	* if a character is not mapped
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1091	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1092	public String soundex(final String source) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1093	final String[] branches = soundex(source, true);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1094	final StringBuilder sb = new StringBuilder();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1095	int index = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1096	for (final String branch : branches) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1097	sb.append(branch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1098	if (++index < branches.length) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1099	sb.append('\|');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1100	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1101	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1102	return sb.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1103	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1104
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1105	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1106	* Perform the actual DM Soundex algorithm on the input string.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1107	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1108	* @param source
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1109	* A String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1110	* @param branching
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1111	* If branching shall be performed
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1112	* @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1113	* selected branching mode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1114	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1115	private String[] soundex(final String source, final boolean branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1116	if (source == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1117	return null;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1118	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1119
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1120	final String input = cleanup(source);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1121
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1122	final Set<Branch> currentBranches = new LinkedHashSet<Branch>();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1123	currentBranches.add(new Branch());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1124
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1125	char lastChar = '\0';
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1126	for (int index = 0; index < input.length(); index++) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1127	final char ch = input.charAt(index);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1128
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1129	// ignore whitespace inside a name
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1130	if (Character.isWhitespace(ch)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1131	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1132	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1133
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1134	final String inputContext = input.substring(index);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1135	final List<Rule> rules = RULES.get(ch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1136	if (rules == null) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1137	continue;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1138	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1139
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1140	// use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1141	@SuppressWarnings("unchecked")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1142	final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1143
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1144	for (final Rule rule : rules) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1145	if (rule.matches(inputContext)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1146	if (branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1147	nextBranches.clear();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1148	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1149	final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1150	final boolean branchingRequired = replacements.length > 1 && branching;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1151
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1152	for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1153	for (final String nextReplacement : replacements) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1154	// if we have multiple replacements, always create a new branch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1155	final Branch nextBranch = branchingRequired ? branch.createBranch() : branch;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1156
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1157	// special rule: occurrences of mn or nm are treated differently
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1158	final boolean force = (lastChar == 'm' && ch == 'n') \|\| (lastChar == 'n' && ch == 'm');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1159
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1160	nextBranch.processNextReplacement(nextReplacement, force);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1161
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1162	if (branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1163	nextBranches.add(nextBranch);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1164	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1165	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1166	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1167	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1168	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1169
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1170	if (branching) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1171	currentBranches.clear();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1172	currentBranches.addAll(nextBranches);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1173	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1174	index += rule.getPatternLength() - 1;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1175	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1176	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1177	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1178
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1179	lastChar = ch;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1180	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1181
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1182	final String[] result = new String[currentBranches.size()];
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1183	int index = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1184	for (final Branch branch : currentBranches) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1185	branch.finish();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1186	result[index++] = branch.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1187	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1188
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1189	return result;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1190	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1191	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	1192	END>>"
2211 42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	1193	! !
42fe8fe39e9c * empty log message * Claus Gittinger <cg@exept.de> parents: 2210 diff changeset	1194
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1195	!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'LICENSE'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1196
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1197	copyright
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1198	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1199	Copyright (c) 2002-2004 Robert Jarvis
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1200
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1201	Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1202	files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use,
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1203	copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1204	the Software is furnished to do so, subject to the following conditions:
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1205
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1206	The above copyright notice and this permission notice shall be included in all copies or substantial
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1207	portions of the Software.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1208
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1209	THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1210	INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1211	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1212	WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1213	USE OR OTHER DEALINGS IN THE SOFTWARE.'
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1214	"
d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1215	! !
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1216
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1217	!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'classification'!
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1218
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1219	isSlavoGermanic:aString
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1220	^ #('w' 'k' 'cz' 'witz' 'ä' 'ö' 'ü' 'ß') contains:[:sub \| aString includesString:sub]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1221
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1222	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1223	self isSlavoGermanic:'walter'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1224	self isSlavoGermanic:'horowitz'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1225	self isSlavoGermanic:'müller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1226	self isSlavoGermanic:'miller'
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1227	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1228
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1229	"Modified: / 28-07-2017 / 10:14:38 / cg"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1230	! !
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1231
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1232	!PhoneticStringUtilities::DoubleMetaphoneStringComparator class methodsFor:'documentation'!
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1233
3685 01ebbac96899 #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3648 diff changeset	1234	documentation
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1235	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1236	The Double Metaphone algorithm
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1237
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1238	see internet: https://en.wikipedia.org/wiki/Metaphone
2209 d544b2f9f239 comments Claus Gittinger <cg@exept.de> parents: 2208 diff changeset	1239	"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1240	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1241
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1242	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'accessing'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1243
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1244	currentIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1245	^currentIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1246	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1247
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1248	currentIndex: anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1249	currentIndex := anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1250	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1251
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1252	inputKey
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1253	^inputKey
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1254	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1255
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1256	inputKey: aString
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1257	inputKey := aString asUppercase.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1258	"/ care for diareses
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1259	(inputKey includesAny:'ÄÖÜ') ifTrue:[
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1260	inputKey := inputKey copyReplaceString:'Ä' withString:'AE'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1261	inputKey := inputKey copyReplaceString:'Ö' withString:'OE'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1262	inputKey := inputKey copyReplaceString:'Ü' withString:'UE'.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1263	].
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1264	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1265
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1266	primaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1267	^primaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1268	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1269
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1270	primaryTranslation: anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1271	primaryTranslation := anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1272	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1273
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1274	secondaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1275	^secondaryTranslation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1276	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1277
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1278	secondaryTranslation: anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1279	secondaryTranslation := anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1280	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1281
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1282	skipCount
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1283	^skipCount
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1284	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1285
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1286	skipCount: anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1287	skipCount := anInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1288	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1289
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1290	startIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1291	^startIndex
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1292	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1293
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1294	startIndex: anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1295	startIndex := anObject
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1296	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1297
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1298	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'api'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1299
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1300	phoneticStringsFor:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1301	"Private - Answers an array of alternate phonetic strings for the given input string."
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1302
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1303	self initialize.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1304	self inputKey:aString.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1305	self performInitialProcessing.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1306	self processRemainingCharacters.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1307	^ Array with:primaryTranslation with:secondaryTranslation
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1308
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1309	"Modified (format): / 28-07-2017 / 11:25:02 / cg"
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1310
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1311	"
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1312	PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'muller'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1313	PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'mueller'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1314	PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'müller'
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1315	"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1316	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1317
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1318	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'initialization'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1319
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1320	initialize
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1321	super initialize.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1322
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1323	startIndex := 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1324	primaryTranslation := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1325	secondaryTranslation := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1326	skipCount := 0.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1327	currentIndex := 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1328
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1329	"Modified: / 28-07-2017 / 11:18:44 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1330	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1331
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1332	!PhoneticStringUtilities::DoubleMetaphoneStringComparator methodsFor:'private'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1333
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1334	addPrimaryTranslation:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1335	primaryTranslation := (primaryTranslation , aString)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1336
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1337	"Modified: / 28-07-2017 / 11:19:09 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1338	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1339
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1340	addSecondaryTranslation:aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1341	secondaryTranslation := secondaryTranslation , aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1342
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1343	"Modified: / 28-07-2017 / 11:17:11 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1344	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1345
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1346	isSlavoGermanic: aString
4521 cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1347	^((aString includesAny: 'WK') or:
5235 b21db1463c69 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5212 diff changeset	1348	[ (aString indexOfSubCollection: 'CZ' startingAt: 1) > 0 ]) or:
b21db1463c69 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5212 diff changeset	1349	[ (aString indexOfSubCollection: 'WITZ' startingAt: 1) > 0 ]
4521 cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1350
cfe4f333794f #REFACTORING by stefan Stefan Vogel <sv@exept.de> parents: 4495 diff changeset	1351	"Modified: / 09-10-2017 / 17:10:46 / stefan"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1352	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1353
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1354	keyAt: anInteger
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1355	(anInteger between:1 and:inputKey size) ifTrue: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1356	^ inputKey at: anInteger
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1357	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1358	^ Character space
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1359
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1360	"Modified: / 28-07-2017 / 11:38:30 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1361	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1362
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1363	keyLeftString: lengthInteger
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1364	^self keyMidString: lengthInteger from: 1
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1365	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1366
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1367	keyMidString: lengthInteger from: fromInteger
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1368	\| result from len additionalSpaces \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1369
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1370	result := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1371	from := fromInteger.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1372	len := lengthInteger.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1373
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1374	"Prepend spaces if caller is requesting characters from before the start of the string"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1375
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1376	[ from < 1 ] whileTrue:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1377	[ result := result, ' '.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1378	from := from + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1379	len := len - 1 ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1380
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1381	from + len - 1 > inputKey size
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1382	ifTrue:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1383	[ additionalSpaces := from + len - 1 - inputKey size.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1384	len := inputKey size - from + 1 ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1385	ifFalse: [ additionalSpaces := 0 ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1386
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1387	result := result, (inputKey copyFrom: from to: (from+len-1 min: inputKey size)).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1388
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1389	[ additionalSpaces > 0 ] whileTrue:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1390	[ result := result, ' '.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1391	additionalSpaces := additionalSpaces - 1 ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1392
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1393	^result
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1394
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1395	"Modified: / 28-07-2017 / 11:20:43 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1396	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1397
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1398	keyRightString: lengthInteger
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1399	^self keyMidString: lengthInteger from: inputKey size - lengthInteger + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1400
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1401	"Modified: / 28-07-2017 / 11:20:51 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1402	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1403
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1404	performInitialProcessing
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1405	\|ch1\|
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1406
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1407	inputKey size > 1 ifTrue:[
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1408	(inputKey startsWithAnyOf:#( 'GN' 'KN' 'PN' 'WR' 'PS' )) ifTrue:[
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1409	startIndex := startIndex + 1
33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1410	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1411	].
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1412
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1413	ch1 := self keyAt:1.
28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1414	ch1 = $X ifTrue:[
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1415	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1416	addPrimaryTranslation:'S';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1417	addSecondaryTranslation:'S'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1418	startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1419	].
5236 28c398151366 #BUGFIX by exept Claus Gittinger <cg@exept.de> parents: 5235 diff changeset	1420	ch1 isVowel ifTrue:[
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1421	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1422	addPrimaryTranslation:'A';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1423	addSecondaryTranslation:'A'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1424	startIndex := startIndex + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1425	]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1426
4490 33b5fbfc4b5d #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4489 diff changeset	1427	"Modified: / 01-08-2017 / 19:29:19 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1428	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1429
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1430	processB
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1431	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1432	addPrimaryTranslation: 'P';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1433	addSecondaryTranslation: 'P'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1434
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1435	(self keyAt: (currentIndex + 1)) == $B ifTrue: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1436	skipCount := skipCount + 1
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1437	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1438
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1439	"Modified: / 28-07-2017 / 11:26:03 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1440	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1441
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1442	processC
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1443	"i"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1444	((((currentIndex >= 3
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1445	and: [ (self keyAt: currentIndex-2) isVowel not ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1446	and: [ (self keyMidString: 3 from: currentIndex-1) = 'ACH' ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1447	and: [ (self keyAt: currentIndex+2) ~= $I ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1448	and: [ ((self keyAt: currentIndex+2) ~= $E)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1449	or: [ (self keyMidString: 6 from: currentIndex-2) ~= 'BACHER'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1450	and: [ (self keyMidString: 6 from: currentIndex-2) ~= 'MACHER' ] ] ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1451	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1452	[ self addPrimaryTranslation: 'K'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1453	self addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1454	skipCount := skipCount + 2.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1455	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1456
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1457	"ii"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1458	(inputKey beginsWith: 'CAESAR')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1459	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1460	[ self addPrimaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1461	self addSecondaryTranslation: 'S'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1462	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1463	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1464
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1465	"iii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1466	(self keyMidString: 4 from: currentIndex) = 'CHIA'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1467	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1468	[ self addPrimaryTranslation: 'K'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1469	self addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1470	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1471	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1472
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1473	"iv"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1474	(self keyMidString: 2 from: currentIndex) = 'CH'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1475	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1476	[ (currentIndex > 1 "a"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1477	and: [ (self keyMidString: 4 from: currentIndex) = 'CHAE' ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1478	ifTrue: [ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1479	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1480	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1481	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1482	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1483
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1484	(currentIndex = 1 "b"
5456 3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1485	and: [ (inputKey size > 5 and: [(inputKey startsWith: 'CHARAC')
3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1486	or: [ (inputKey startsWith: 'CHARIS') ]] )
3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1487	or: [inputKey size > 4 and: [ ((((inputKey startsWith: 'CHOR')
3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1488	or: [ (inputKey startsWith: 'CHYM') ])
3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1489	or: [ (inputKey startsWith: 'CHIA') ])
3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1490	or: [ (inputKey startsWith: 'CHEM') ])
3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1491	and: [ (inputKey startsWith: 'CHORE') not ] ] ] ])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1492	ifTrue: [ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1493	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1494	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1495	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1496	^self ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1497
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1498	(((((#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: 4)) "c"
5456 3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1499	or: [ (inputKey startsWith: 'SCH') ])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1500	or: [ #('ORCHES' 'ARCHIT' 'ORCHID')
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1501	includes: (self keyMidString: 6 from: currentIndex-2) ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1502	or: [ #($T $S) includes: (self keyAt: currentIndex+2) ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1503	or: [ ((currentIndex = 1)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1504	or: [ #($A $O $U $E) includes: (self keyAt: currentIndex-1) ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1505	and: [ #($L $R $N $M $B $H $F $V $W $ ) includes: (self keyAt: currentIndex+2) ] ] )
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1506	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1507	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1508	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1509	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1510	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1511	^self ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1512	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1513	[ currentIndex > 1
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1514	ifTrue:
5456 3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1515	[ (inputKey startsWith: 'MC')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1516	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1517	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1518	addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1519	addSecondaryTranslation: 'K' ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1520	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1521	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1522	addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1523	addSecondaryTranslation: 'K' ] ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1524	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1525	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1526	addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1527	addSecondaryTranslation: 'X' ].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1528	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1529	^self ] ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1530
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1531	"v"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1532	(self keyAt: currentIndex+1) = $Z
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1533	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1534	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1535	addPrimaryTranslation: 'S';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1536	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1537	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1538	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1539
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1540	"vi"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1541	(self keyMidString: 3 from: currentIndex+1) = 'CIA'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1542	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1543	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1544	addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1545	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1546	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1547	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1548
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1549	"vii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1550	((self keyAt: currentIndex+1) = $C
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1551	and: [ ((currentIndex = 2)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1552	and: [ (self keyAt: 1) = $M ]) not ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1553	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1554	[ ((#($I $E $H) includes: (self keyAt: currentIndex+2))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1555	and: [ (self keyMidString: 2 from: currentIndex+2) ~= 'HU' ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1556	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1557	[ ((currentIndex = 2 and: [ (self keyAt: 1) = $A ])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1558	or: [ #('UCCEE' 'UCCES') includes: (self keyMidString: 5 from: currentIndex-1)])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1559	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1560	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1561	addPrimaryTranslation: 'KS';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1562	addSecondaryTranslation: 'KS'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1563	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1564	^self ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1565	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1566	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1567	addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1568	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1569	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1570	^self ] ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1571	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1572	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1573	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1574	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1575	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1576	^self ] ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1577
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1578	"viii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1579	(#($K $G $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1580	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1581	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1582	addPrimaryTranslation: 'K';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1583	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1584	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1585	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1586
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1587	"ix"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1588	(#($I $E $Y) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1589	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1590	[ (#('CIO' 'CIE' 'CIA') includes: (self keyMidString: 3 from: currentIndex))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1591	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1592	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1593	addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1594	addSecondaryTranslation: 'X' ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1595	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1596	[self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1597	addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1598	addSecondaryTranslation: 'S'].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1599	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1600	^self ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1601
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1602	"x"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1603	self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1604	addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1605	addSecondaryTranslation: 'K'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1606
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1607	"xi"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1608	(#(' C' ' Q' ' G') includes: (self keyMidString: 2 from: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1609	ifTrue:
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1610	[ skipCount := skipCount + 2 ]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1611	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1612	[ ((#($C $K $Q) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1613	and: [ (#('CE' 'CI') includes: (self keyMidString: 2 from: currentIndex+1)) not ])
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1614	ifTrue: [ skipCount := skipCount + 1] ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1615
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1616	"Modified: / 28-07-2017 / 11:29:11 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1617	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1618
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1619	processCedille
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1620	self
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1621	addPrimaryTranslation: 'S';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1622	addSecondaryTranslation: 'S'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1623	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1624
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1625	processD
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1626	"i"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1627	(self keyAt: currentIndex+1) = $G
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1628	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1629	[ (#($I $E $Y) includes: (self keyAt: currentIndex+2))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1630	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1631	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1632	addPrimaryTranslation: 'J';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1633	addSecondaryTranslation: 'J'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1634	skipCount := skipCount + 2.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1635	^self ]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1636	ifFalse:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1637	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1638	addPrimaryTranslation: 'TK';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1639	addSecondaryTranslation: 'TK'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1640	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1641	^self ] ].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1642
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1643	"ii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1644	(#($T $D) includes: (self keyAt: currentIndex+1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1645	ifTrue:
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1646	[ self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1647	addPrimaryTranslation: 'T';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1648	addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1649	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1650	^self ].
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1651
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1652	"iii"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1653	self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1654	addPrimaryTranslation: 'T';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1655	addSecondaryTranslation: 'T'
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1656
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1657	"Modified: / 28-07-2017 / 11:27:39 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1658	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1659
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1660	processF
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1661	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1662	addPrimaryTranslation: 'F';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1663	addSecondaryTranslation: 'F'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1664
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1665	(self keyAt: currentIndex+1) = $F
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1666	ifTrue: [ skipCount := skipCount + 1 ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1667
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1668	"Modified (format): / 28-07-2017 / 11:29:21 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1669	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1670
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1671	processG
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1672	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1673	case 'G':
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1674	if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1675	{"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1676	\| word \|
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1677	(self keyAt: currentIndex + 1) = $H
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1678	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1679	"if((current > 0) AND !!IsVowel(current - 1))"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1680
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1681	(currentIndex > 1 and: [(self keyAt: currentIndex - 1) isVowel not])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1682	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1683	" {
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1684	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1685	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1686	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1687	}"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1688
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1689	self
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1690	addPrimaryTranslation: 'K';
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1691	addSecondaryTranslation: 'K'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1692	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1693	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1694	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1695
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1696	"if(current < 3)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1697	{"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1698
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1699	currentIndex < 4
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1700	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1701
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1702	" //'ghislane', ghiradelli
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1703	if(current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1704	{ "
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1705	currentIndex = 1
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1706	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1707	"if(GetAt(current + 2) == 'I')"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1708
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1709	(self keyAt: currentIndex + 2) = $I
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1710	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1711	"MetaphAdd(J);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1712	self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1713	addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1714	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1715	"MetaphAdd(K);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1716	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1717	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1718	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1719	" current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1720	break;"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1721	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1722	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1723	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1724	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1725
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1726	" //Parker's rule (with some further refinements) - e.g., 'hugh'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1727	if(((current > 1) AND StringAt((current - 2), 1, B, H, D, ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1728	//e.g., 'bough'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1729	OR ((current > 2) AND StringAt((current - 3), 1, B, H, D, ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1730	//e.g., 'broughton'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1731	OR ((current > 3) AND StringAt((current - 4), 1, B, H, ) ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1732	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1733	(((currentIndex > 2 and: [#($B $H $D) includes: (self keyAt: currentIndex - 2)])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1734	or: [currentIndex > 3 and: [#($B $H $D) includes: (self keyAt: currentIndex - 3)]])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1735	or: [currentIndex > 4 and: [#($B $H) includes: (self keyAt: currentIndex - 4)]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1736	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1737	"current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1738	break;"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1739	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1740	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1741	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1742	" //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1743	if((current > 2)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1744	AND (GetAt(current - 1) == 'U')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1745	AND StringAt((current - 3), 1, C, G, L, R, T, ) )"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1746	(currentIndex > 3 and: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1747	((self keyAt: currentIndex - 1) = $U) and: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1748	#($C $G $L $R $T) includes: (self keyAt: currentIndex - 3)
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1749	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1750	]) ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1751	"MetaphAdd(F);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1752	self addPrimaryTranslation: 'F';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1753	addSecondaryTranslation: 'F'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1754	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1755	" if((current > 0) AND GetAt(current - 1) !!= 'I')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1756	MetaphAdd(K);"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1757	(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= $I])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1758	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1759	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1760	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1761	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1762	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1763	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1764	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1765	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1766	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1767	"if(GetAt(current + 1) == 'N')"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1768	(self keyAt: currentIndex + 1) = $N
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1769	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1770	"if((current == 1) AND IsVowel(0) AND !!SlavoGermanic())"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1771	(currentIndex = 2 and: [(inputKey at: 1) isVowel and: [(self isSlavoGermanic: inputKey) not]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1772	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1773	"MetaphAdd(KN, N);"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1774	self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1775	addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1776	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1777	" //not e.g. 'cagney'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1778	if(!!StringAt((current + 2), 2, EY, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1779	AND (GetAt(current + 1) !!= 'Y')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1780	AND !!SlavoGermanic())"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1781	((inputKey size >= (currentIndex + 2)) and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1782	(inputKey copyFrom: currentIndex + 2 to: (currentIndex + 4 min: inputKey size)) ~= 'EY' and: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1783	(self keyAt: currentIndex + 1) ~= $Y and: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1784	(self isSlavoGermanic: inputKey) not
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1785	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1786	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1787	]) ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1788	self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1789	addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1790	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1791	self addPrimaryTranslation: 'KN';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1792	addSecondaryTranslation: 'KN'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1793	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1794	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1795	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1796	^self
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1797	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1798	" //'tagliaro'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1799	if(StringAt((current + 1), 2, LI, ) AND !!SlavoGermanic())"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1800	((inputKey size >= (currentIndex + 3)) and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1801	(inputKey copyFrom: currentIndex + 1 to: currentIndex + 2) = 'LI' and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1802	(self isSlavoGermanic: inputKey) not]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1803	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1804	self addPrimaryTranslation: 'KL';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1805	addSecondaryTranslation: 'L'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1806	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1807	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1808	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1809	" //-ges-,-gep-,-gel-, -gie- at beginning
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1810	if((current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1811	AND ((GetAt(current + 1) == 'Y')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1812	OR StringAt((current + 1), 2, ES, EP, EB, EL, EY, IB, IL, IN, IE, EI, ER, )) )"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1813	(currentIndex = 1 and: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1814	((self keyAt: currentIndex + 1) = $Y) or: [
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1815	(#('ES' 'EP' 'EB' 'EL' 'EY' 'IB' 'IL' 'IN' 'IE' 'EI' 'ER') includes:
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1816	(inputKey copyFrom: currentIndex + 1 to: currentIndex + 2))
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1817	]]) ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1818	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1819	addSecondaryTranslation: 'J'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1820	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1821	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1822	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1823	" // -ger-, -gy-
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1824	if((StringAt((current + 1), 2, ER, ) OR (GetAt(current + 1) == 'Y'))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1825	AND !!StringAt(0, 6, DANGER, RANGER, MANGER, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1826	AND !!StringAt((current - 1), 1, E, I, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1827	AND !!StringAt((current - 1), 3, RGY, OGY, ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1828	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1829	(((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 3 min: inputKey size)) = 'ER' or: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1830	((self keyAt: currentIndex + 1) = $Y)])
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1831	and: [((#('DANGER' 'RANGER' 'MANGER') includes: (word := inputKey copyFrom: 1 to: (6 min: inputKey size))) not)
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1832	and: [(self keyAt: currentIndex - 1) ~= $E
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1833	and: [(#('RGY' 'OGY') includes: (inputKey copyFrom: currentIndex - 1 to: currentIndex + 1)) not]]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1834	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1835	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1836	addSecondaryTranslation: 'J'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1837	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1838	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1839	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1840
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1841	" // italian e.g, 'biaggi'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1842	if(StringAt((current + 1), 1, E, I, Y, ) OR StringAt((current - 1), 4, AGGI, OGGI, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1843	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1844	((#($E $I $Y) includes: (self keyAt: (currentIndex + 1))) or: [(#('AGGI' 'OGGI') includes: (inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size)))])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1845	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1846	" //obvious germanic
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1847	if((StringAt(0, 4, VAN , VON , ) OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1848	OR StringAt((current + 1), 2, ET, )) MetaphAdd(K);"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1849	word := (inputKey copyFrom: 1 to: 4).
5456 3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1850	((#('VAN ' 'VON ') includes: word) or: [(word startsWith: 'SCH') or: [(word startsWith: 'ET')]])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1851	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1852	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1853	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1854	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1855	" //always soft if french ending
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1856	if(StringAt((current + 1), 4, IER , ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1857	MetaphAdd(J);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1858	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1859	MetaphAdd(J, K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1860	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1861	break;"
5456 3040ec2b4531 #REFACTORING by exept Claus Gittinger <cg@exept.de> parents: 5236 diff changeset	1862	(((inputKey copyFrom: currentIndex + 1 to: (currentIndex + 5 min: inputKey size)), ' ') startsWith: 'IER ')
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1863	ifTrue: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1864	self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1865	addSecondaryTranslation: 'J'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1866	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1867	self addPrimaryTranslation: 'J';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1868	addSecondaryTranslation: 'K'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1869	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1870
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1871	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1872	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1873	^self.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1874	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1875
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1876	" if(GetAt(current + 1) == 'G')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1877	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1878	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1879	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1880	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1881	break;"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1882
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1883	(self keyAt: (currentIndex + 1)) = $G
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1884	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1885	skipCount := skipCount + 1.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1886	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1887	self addPrimaryTranslation: 'K';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1888	addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1889
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1890	"Modified: / 28-07-2017 / 11:31:33 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1891	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1892
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1893	processH
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1894	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1895	case 'H':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1896	//only keep if first & before vowel or btw. 2 vowels
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1897	if(((current == 0) OR IsVowel(current - 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1898	AND IsVowel(current + 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1899	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1900	MetaphAdd(H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1901	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1902	}else//also takes care of 'HH'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1903	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1904	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1905	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1906
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1907	(((currentIndex = 1)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1908	or: [ (self keyAt: currentIndex - 1) isVowel])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1909	and: [(self keyAt: currentIndex + 1) isVowel])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1910	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1911	self addPrimaryTranslation: 'H';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1912	addSecondaryTranslation: 'H'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1913	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1914	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1915	]
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1916
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1917	"Modified: / 28-07-2017 / 11:29:52 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1918	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1919
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1920	processJ
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1921	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1922	case 'J':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1923	//obvious spanish, 'jose', 'san jacinto'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1924	if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1925	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1926	if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1927	MetaphAdd(H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1928	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1929	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1930	MetaphAdd(J, H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1931	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1932	current +=1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1933	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1934	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1935
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1936	if((current == 0) AND !!StringAt(current, 4, JOSE, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1937	MetaphAdd(J, A);//Yankelovich/Jankelowicz
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1938	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1939	//spanish pron. of e.g. 'bajador'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1940	if(IsVowel(current - 1)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1941	AND !!SlavoGermanic()
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1942	AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1943	MetaphAdd(J, H);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1944	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1945	if(current == last)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1946	MetaphAdd(J, );
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1947	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1948	if(!!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1949	AND !!StringAt((current - 1), 1, S, K, L, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1950	MetaphAdd(J);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1951
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1952	if(GetAt(current + 1) == 'J')//it could happen!!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1953	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1954	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1955	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1956	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	1957	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1958	\| currentWord firstWord nextLetter \|
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1959	currentWord := inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1960	firstWord := inputKey copyFrom: 1 to: (4 min: inputKey size).
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1961	nextLetter := self keyAt: currentIndex + 1.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1962	(currentWord = 'JOSE' or: [firstWord = 'SAN '])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1963	ifTrue: [
5212 76ae0b6f061e #TUNING by exept Claus Gittinger <cg@exept.de> parents: 4521 diff changeset	1964	((currentIndex = 1 and: [inputKey size == 4 or: [inputKey size >= 5 and: [self keyAt: currentIndex + 4 = $ ]]])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1965	or: [firstWord = 'SAN '])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1966	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1967	self addPrimaryTranslation: 'H';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1968	addSecondaryTranslation: 'H'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1969	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1970	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1971	addSecondaryTranslation: 'H'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1972	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1973	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1974	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1975	(currentIndex = 1 and: [firstWord ~= 'JOSE'])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1976	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1977	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1978	addSecondaryTranslation: 'A'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1979	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1980	((currentIndex > 1 and: [(self keyAt: currentIndex -1) isVowel])
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1981	and: [(self isSlavoGermanic: inputKey) not and: [nextLetter == $A or: [nextLetter == $O]]])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1982	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1983	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1984	addSecondaryTranslation: 'H'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1985	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	1986	currentIndex = inputKey size
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1987	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1988	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1989	addSecondaryTranslation: ' '.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1990	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1991	((#($L $T $K $S $N $M $B $Z) includes: nextLetter) not and: [(#($S $K $L) includes: (self keyAt: currentIndex - 1)) not])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1992	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1993	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1994	addSecondaryTranslation: 'J'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1995	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1996	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1997	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	1998	].
3489 6ef5f530df03 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3488 diff changeset	1999	nextLetter == $J
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2000	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2001	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2002	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2003
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2004	"Modified: / 28-07-2017 / 11:31:41 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2005	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2006
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2007	processK
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2008	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2009	case 'K':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2010	if(GetAt(current + 1) == 'K')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2011	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2012	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2013	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2014	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2015	break;
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2016	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2017
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2018	(self keyAt: currentIndex + 1) = $K
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2019	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2020	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2021	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2022	self addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2023	addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2024
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2025	"Modified: / 28-07-2017 / 11:31:46 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2026	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2027
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2028	processL
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2029
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2030	"case 'L':
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2031	if(GetAt(current + 1) == 'L')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2032	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2033	//spanish e.g. 'cabrillo', 'gallegos'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2034	if(((current == (length - 3))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2035	AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2036	OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2037	AND StringAt((current - 1), 4, ALLE, )) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2038	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2039	MetaphAdd(L, );
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2040	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2041	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2042	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2043	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2044	}else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2045	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2046	MetaphAdd(L);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2047	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2048	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2049	\| currentWord \|
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2050	(self keyAt: currentIndex + 1) = $L
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2051	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2052	(((currentIndex = (inputKey size - 2))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2053	and: [(currentIndex > 1 and: [#('ILLO' 'ILLA' 'ALLE') includes: (currentWord := inputKey copyFrom: currentIndex - 1 to: (currentIndex + 2 min: inputKey size))])])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2054	or: [((#('AS' 'OS') includes: (inputKey copyFrom: inputKey size - 1 to: inputKey size)) or: [#($A $O) includes: (self keyAt: inputKey size)]) and: [currentWord = 'ALLE']
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2055	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2056	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2057	self addPrimaryTranslation: 'L';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2058	addSecondaryTranslation: ' '.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2059	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2060	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2061	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2062	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2063	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2064	self addPrimaryTranslation: 'L';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2065	addSecondaryTranslation: 'L'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2066
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2067	"Modified: / 28-07-2017 / 11:32:03 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2068	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2069
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2070	processM
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2071
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2072	"case 'M':
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2073	if((StringAt((current - 1), 3, UMB, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2074	AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2075	//'dumb','thumb'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2076	OR (GetAt(current + 1) == 'M') )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2077	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2078	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2079	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2080	MetaphAdd(M);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2081	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2082	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2083	(((currentIndex > 1 and: [(inputKey copyFrom: currentIndex - 1 to: (currentIndex +1 min: inputKey size)) = 'UMB'])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2084	and: [currentIndex + 1 = inputKey size or: [(inputKey copyFrom: (currentIndex + 2 min: inputKey size) to: (currentIndex + 4 min: inputKey size)) = 'ER']])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2085	or: [(self keyAt: currentIndex + 1) = $M])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2086	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2087	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2088	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2089	self addPrimaryTranslation: 'M';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2090	addSecondaryTranslation: 'M'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2091
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2092	"Modified: / 28-07-2017 / 11:32:08 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2093	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2094
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2095	processN
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2096	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2097	case 'N':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2098	if(GetAt(current + 1) == 'N')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2099	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2100	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2101	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2102	MetaphAdd(N);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2103	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2104
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2105	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2106
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2107	(self keyAt: currentIndex + 1) = $N
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2108	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2109	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2110	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2111	self addPrimaryTranslation: 'N';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2112	addSecondaryTranslation: 'N'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2113
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2114	"Modified: / 28-07-2017 / 11:32:14 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2115	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2116
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2117	processNtilde
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2118	"case 'Ñ':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2119	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2120	MetaphAdd(N);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2121	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2122	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2123	self addPrimaryTranslation: 'N';
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2124	addSecondaryTranslation: 'N'.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2125	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2126
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2127	processP
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2128	"case 'P':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2129	if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2130	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2131	MetaphAdd(F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2132	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2133	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2134	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2135
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2136	//also account for campbell, raspberry
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2137	if(StringAt((current + 1), 1, P, B, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2138	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2139	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2140	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2141	MetaphAdd(P);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2142	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2143	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2144	\| nextLetter \|
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2145	(nextLetter := self keyAt: currentIndex + 1) = $H
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2146	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2147	self addPrimaryTranslation: 'F';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2148	addSecondaryTranslation: 'F'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2149	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2150	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2151	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2152	(#($P $B) includes: nextLetter)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2153	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2154	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2155	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2156	self addPrimaryTranslation: 'P';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2157	addSecondaryTranslation: 'P'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2158	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2159
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2160	"Modified: / 28-07-2017 / 11:32:28 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2161	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2162
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2163	processQ
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2164	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2165	case 'Q':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2166	if(GetAt(current + 1) == 'Q')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2167	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2168	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2169	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2170	MetaphAdd(K);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2171	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2172
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2173	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2174
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2175	(self keyAt: currentIndex + 1) = $Q
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2176	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2177	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2178	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2179	self addPrimaryTranslation: 'K';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2180	addSecondaryTranslation: 'K'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2181
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2182	"Modified: / 28-07-2017 / 11:32:32 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2183	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2184
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2185	processR
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2186	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2187	case 'R':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2188	//french e.g. 'rogier', but exclude 'hochmeier'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2189	if((current == last)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2190	AND !!SlavoGermanic()
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2191	AND StringAt((current - 2), 2, IE, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2192	AND !!StringAt((current - 4), 2, ME, MA, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2193	MetaphAdd(, R);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2194	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2195	MetaphAdd(R);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2196
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2197	if(GetAt(current + 1) == 'R')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2198	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2199	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2200	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2201	break;
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2202	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2203	(currentIndex = inputKey size and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2204	(self isSlavoGermanic: inputKey) not and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2205	(inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)) = 'IE' and: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2206	(#('ME' 'MA') includes: (inputKey copyFrom: ((currentIndex - 4) max: 1) to: ((currentIndex - 3) max: 1))) not
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2207	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2208	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2209	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2210	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2211	self addPrimaryTranslation: '';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2212	addSecondaryTranslation: 'R'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2213	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2214	self addPrimaryTranslation: 'R';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2215	addSecondaryTranslation: 'R'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2216	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2217	(self keyAt: currentIndex + 1) = $R
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2218	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2219	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2220	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2221
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2222	"Modified: / 28-07-2017 / 11:32:37 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2223	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2224
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2225	processRemainingCharacters
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2226	startIndex to: inputKey size do:[ :i \|
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2227	\| c methodSelector \|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2228
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2229	skipCount = 0 ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2230	((primaryTranslation size > 4) and: [ secondaryTranslation size > 4 ])
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2231	ifTrue: [ ^self ].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2232
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2233	currentIndex := i.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2234	c := self keyAt: i.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2235
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2236	(c isVowel not and: [c ~= $Y]) ifTrue:[
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2237	c == $Ç ifTrue: [
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2238	methodSelector := #processCedille
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2239	] ifFalse: [ c == $Ñ ifTrue: [
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2240	methodSelector := #processNtilde
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2241	] ifFalse: [
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2242	methodSelector := ('process', c asString) asSymbol
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2243	]].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2244	self perform: methodSelector
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2245	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2246	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2247	skipCount := skipCount - 1
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2248	]
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2249	]
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2250
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2251	"Modified: / 28-07-2017 / 11:24:15 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2252	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2253
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2254	processS
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2255	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2256	case 'S':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2257	//special cases 'island', 'isle', 'carlisle', 'carlysle'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2258	if(StringAt((current - 1), 3, ISL, YSL, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2259	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2260	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2261	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2262	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2263
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2264	//special case 'sugar-'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2265	if((current == 0) AND StringAt(current, 5, SUGAR, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2266	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2267	MetaphAdd(X, S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2268	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2269	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2270	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2271
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2272	if(StringAt(current, 2, SH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2273	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2274	//germanic
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2275	if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2276	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2277	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2278	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2279	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2280	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2281	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2282
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2283	//italian & armenian
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2284	if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2285	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2286	if(!!SlavoGermanic())
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2287	MetaphAdd(S, X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2288	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2289	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2290	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2291	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2292	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2293
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2294	//german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2295	//also, -sz- in slavic language altho in hungarian it is pronounced 's'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2296	if(((current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2297	AND StringAt((current + 1), 1, M, N, L, W, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2298	OR StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2299	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2300	MetaphAdd(S, X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2301	if(StringAt((current + 1), 1, Z, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2302	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2303	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2304	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2305	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2306	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2307
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2308	if(StringAt(current, 2, SC, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2309	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2310	//Schlesinger's rule
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2311	if(GetAt(current + 2) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2312	//dutch origin, e.g. 'school', 'schooner'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2313	if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2314	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2315	//'schermerhorn', 'schenker'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2316	if(StringAt((current + 3), 2, ER, EN, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2317	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2318	MetaphAdd(X, SK);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2319	}else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2320	MetaphAdd(SK);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2321	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2322	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2323	}else{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2324	if((current == 0) AND !!IsVowel(3) AND (GetAt(3) !!= 'W'))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2325	MetaphAdd(X, S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2326	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2327	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2328	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2329	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2330	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2331
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2332	if(StringAt((current + 2), 1, I, E, Y, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2333	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2334	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2335	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2336	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2337	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2338	//else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2339	MetaphAdd(SK);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2340	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2341	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2342	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2343
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2344	//french e.g. 'resnais', 'artois'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2345	if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2346	MetaphAdd(, S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2347	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2348	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2349
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2350	if(StringAt((current + 1), 1, S, Z, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2351	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2352	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2353	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2354	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2355	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2356
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2357	\| nextChar char2 chars char \|
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2358	(#('ISL' 'YSL') includes: (inputKey copyFrom: (currentIndex - 1 max: 1) to: (currentIndex + 1 min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2359	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2360	^self
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2361	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2362	(currentIndex = 1 and: [(inputKey copyFrom: 1 to: (5 min: inputKey size)) = 'SUGAR'])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2363	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2364	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2365	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2366	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2367	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2368	(inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SH'
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2369	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2370	(#('HEIM' 'HOEK' 'HOLM' 'HOLZ') includes: (inputKey copyFrom: (currentIndex + 1 min: inputKey size) to: ((currentIndex + 5) min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2371	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2372	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2373	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2374	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2375	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2376	addSecondaryTranslation: 'X'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2377	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2378	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2379	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2380	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2381	((#('SIO' 'SIA') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 2 min: inputKey size)))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2382	or: [(inputKey copyFrom: currentIndex to: (currentIndex + 3 min: inputKey size)) = 'SIAN'])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2383	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2384	(self isSlavoGermanic: inputKey) not
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2385	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2386	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2387	addSecondaryTranslation: 'X'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2388	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2389	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2390	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2391	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2392	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2393	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2394	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2395	((currentIndex = 1 and: [#($M $N $L $W) includes: (self keyAt: currentIndex + 1)])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2396	or: [(nextChar := self keyAt: currentIndex + 1) = $Z])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2397	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2398	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2399	addSecondaryTranslation: 'X'.
3488 5a69e672d7f8 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 3185 diff changeset	2400	nextChar == $Z
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2401	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2402	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2403	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2404	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2405	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2406	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2407	((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'SC')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2408	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2409	(char2 := self keyAt: currentIndex + 2) = $H
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2410	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2411	(#('OO' 'ER' 'EN' 'UY' 'ED' 'EM') includes: (chars := inputKey copyFrom: ((currentIndex + 3) min: inputKey size) to: ((currentIndex + 4) min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2412	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2413	(#('ER' 'EN') includes: chars)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2414	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2415	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2416	addSecondaryTranslation: 'SK'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2417	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2418	self addPrimaryTranslation: 'SK';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2419	addSecondaryTranslation: 'SK'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2420	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2421	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2422	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2423	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2424	((currentIndex = 1 and: [(char := inputKey at: 4 ifAbsent: [$b]) isVowel not]) and: [char ~= $W])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2425	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2426	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2427	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2428	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2429	self addPrimaryTranslation: 'X';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2430	addSecondaryTranslation: 'X'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2431	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2432	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2433	^self .
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2434	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2435	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2436	(#($I $E $Y) includes: char2)
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2437	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2438	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2439	addSecondaryTranslation: 'S'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2440	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2441	^self .
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2442	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2443	self addPrimaryTranslation: 'SK';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2444	addSecondaryTranslation: 'SK'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2445	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2446	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2447	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2448	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2449	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2450	(currentIndex = inputKey size and: [(#('AI' 'OI') includes: (inputKey copyFrom: ((currentIndex - 2) max: 1) to: ((currentIndex - 1) max: 1)))])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2451	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2452	self addPrimaryTranslation: '';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2453	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2454	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2455	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2456	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2457	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2458	(#($S $Z) includes: (self keyAt: currentIndex + 1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2459	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2460	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2461	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2462	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2463
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2464	"Modified: / 28-07-2017 / 11:34:18 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2465	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2466
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2467	processT
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2468	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2469	case 'T':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2470	if(StringAt(current, 4, TION, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2471	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2472	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2473	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2474	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2475	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2476
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2477	if(StringAt(current, 3, TIA, TCH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2478	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2479	MetaphAdd(X);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2480	current += 3;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2481	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2482	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2483
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2484	if(StringAt(current, 2, TH, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2485	OR StringAt(current, 3, TTH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2486	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2487	//special case 'thomas', 'thames' or germanic
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2488	if(StringAt((current + 2), 2, OM, AM, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2489	OR StringAt(0, 4, VAN , VON , )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2490	OR StringAt(0, 3, SCH, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2491	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2492	MetaphAdd(T);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2493	}else{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2494	MetaphAdd(0, T);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2495	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2496	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2497	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2498	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2499
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2500	if(StringAt((current + 1), 1, T, D, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2501	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2502	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2503	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2504	MetaphAdd(T);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2505	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2506	"
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2507	((inputKey copyFrom: currentIndex to: ((currentIndex + 3) min: inputKey size)) = 'TION')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2508	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2509	self addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2510	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2511	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2512	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2513	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2514	(#('TIA' 'TCH') includes: (inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2515	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2516	self addPrimaryTranslation: 'X';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2517	addSecondaryTranslation: 'X'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2518	skipCount := skipCount + 2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2519	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2520	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2521	(((inputKey copyFrom: currentIndex to: ((currentIndex + 1) min: inputKey size)) = 'TH') or: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2522	((inputKey copyFrom: currentIndex to: ((currentIndex + 2) min: inputKey size)) = 'TTH')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2523	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2524	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2525	((#('OM' 'AM') includes: (inputKey copyFrom: currentIndex + 2 to: ((currentIndex + 3) min: inputKey size)))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2526	or: [(#('VAN ' 'VON ') includes: (inputKey copyFrom: 1 to: (4 min: inputKey size)))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2527	or: [(inputKey copyFrom: 1 to: (3 min: inputKey size)) = 'SCH']
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2528	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2529	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2530	self addPrimaryTranslation: 'T';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2531	addSecondaryTranslation: 'T'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2532	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2533	self addPrimaryTranslation: '0';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2534	addSecondaryTranslation: 'T'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2535	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2536	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2537	^self.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2538	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2539	(#($T $D) includes: (self keyAt: currentIndex + 1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2540	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2541	skipCount := skipCount + 1.
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2542	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2543	self addPrimaryTranslation: 'T';
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2544	addSecondaryTranslation: 'T'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2545
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2546	"Modified: / 28-07-2017 / 11:33:33 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2547	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2548
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2549	processV
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2550	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2551	case 'V':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2552	if(GetAt(current + 1) == 'V')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2553	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2554	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2555	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2556	MetaphAdd(F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2557	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2558
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2559
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2560	"
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2561
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2562	(self keyAt: currentIndex + 1) = $V
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2563	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2564	skipCount := skipCount + 1
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2565	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2566	self addPrimaryTranslation: 'F';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2567	addSecondaryTranslation: 'F'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2568
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2569	"Modified: / 28-07-2017 / 11:34:27 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2570	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2571
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2572	processW
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2573	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2574	case 'W':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2575	//can also be in middle of word
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2576	if(StringAt(current, 2, WR, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2577	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2578	MetaphAdd(R);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2579	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2580	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2581	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2582
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2583	if((current == 0)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2584	AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2585	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2586	//Wasserman should match Vasserman
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2587	if(IsVowel(current + 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2588	MetaphAdd(A, F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2589	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2590	//need Uomo to match Womo
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2591	MetaphAdd(A);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2592	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2593
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2594	//Arnow should match Arnoff
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2595	if(((current == last) AND IsVowel(current - 1))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2596	OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2597	OR StringAt(0, 3, SCH, ))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2598	{
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2599	MetaphAdd(, F);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2600	current +=1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2601	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2602	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2603
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2604	//polish e.g. 'filipowicz'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2605	if(StringAt(current, 4, WICZ, WITZ, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2606	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2607	MetaphAdd(TS, FX);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2608	current +=4;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2609	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2610	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2611
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2612	//else skip it
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2613	current +=1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2614	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2615	"
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2616	\| word nextLetter \|
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2617	((word := inputKey copyFrom: currentIndex to: (currentIndex + 1 min: inputKey size)) = 'WR')
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2618	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2619	self addPrimaryTranslation: 'R';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2620	addSecondaryTranslation: 'R'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2621	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2622	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2623	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2624	((currentIndex = 1 and: [(nextLetter := self keyAt: currentIndex + 1) isVowel]) or: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2625	word = 'WH'
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2626	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2627	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2628	nextLetter isVowel
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2629	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2630	self addPrimaryTranslation: 'A';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2631	addSecondaryTranslation: 'F'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2632	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2633	self addPrimaryTranslation: 'A';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2634	addSecondaryTranslation: 'A'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2635	]
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2636	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2637	((((currentIndex = inputKey size) and: [(self keyAt: currentIndex - 1) isVowel])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2638	or: [#('EWSKI' 'EWSKY' 'OWSKI' 'OWSKY') includes: (inputKey copyFrom: ((currentIndex - 1) max: 1) to: (currentIndex + 3 min: inputKey size))])
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2639	or: [inputKey startsWith:'SCH'])
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2640	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2641	self addPrimaryTranslation: '';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2642	addSecondaryTranslation: 'F'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2643	^self.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2644	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2645	(#('WICZ' 'WITZ') includes: (inputKey copyFrom: currentIndex to: (currentIndex + 4 min: inputKey size)))
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2646	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2647	self addPrimaryTranslation: 'TS';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2648	addSecondaryTranslation: 'FX'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2649	skipCount := skipCount + 3.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2650	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2651	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2652
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2653	"Modified: / 28-07-2017 / 11:34:51 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2654	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2655
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2656	processX
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2657	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2658	case 'X':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2659	//french e.g. breaux
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2660	if(!!((current == last)
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2661	AND (StringAt((current - 3), 3, IAU, EAU, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2662	OR StringAt((current - 2), 2, AU, OU, ))) )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2663	MetaphAdd(KS);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2664
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2665	if(StringAt((current + 1), 1, C, X, ))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2666	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2667	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2668	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2669	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2670	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2671
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2672
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2673	((currentIndex = inputKey size)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2674	and: [(#('IAU' 'EAU') includes: (inputKey copyFrom: ((currentIndex - 3) min: 1) to: currentIndex))
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2675	or: [(#('AU' 'OU') includes: (inputKey copyFrom: ((currentIndex - 2) min: 1) to: currentIndex))]])
2580 7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-) Claus Gittinger <cg@exept.de> parents: 2445 diff changeset	2676	ifFalse: [
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2677	self addPrimaryTranslation: 'KS';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2678	addSecondaryTranslation: 'KS'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2679	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2680	(#($C $X) includes: (self keyAt: currentIndex + 1))
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2681	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2682	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2683	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2684	]
2580 7ce713ba2618 not ifTrue -> ifFalse (trying the rewrite tool ;-) Claus Gittinger <cg@exept.de> parents: 2445 diff changeset	2685
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2686	"Modified: / 28-07-2017 / 11:34:44 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2687	!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2688
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2689	processZ
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2690	"http://aspell.sourceforge.net/metaphone/dmetaph.cpp
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2691	case 'Z':
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2692	//chinese pinyin e.g. 'zhao'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2693	if(GetAt(current + 1) == 'H')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2694	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2695	MetaphAdd(J);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2696	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2697	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2698	}else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2699	if(StringAt((current + 1), 2, ZO, ZI, ZA, )
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2700	OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) !!= 'T')))
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2701	{
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2702	MetaphAdd(S, TS);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2703	}
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2704	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2705	MetaphAdd(S);
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2706
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2707	if(GetAt(current + 1) == 'Z')
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2708	current += 2;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2709	else
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2710	current += 1;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2711	break;
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2712	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	2713
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2714	(self keyAt: currentIndex + 1) = $H
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2715	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2716	self addPrimaryTranslation: 'J';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2717	addSecondaryTranslation: 'J'.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2718	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2719	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2720	] ifFalse: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2721	((#('ZO' 'ZI' 'ZA') includes: (inputKey copyFrom: ((currentIndex + 1) min: inputKey size) to: ((currentIndex + 2) min: inputKey size))) or: [
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2722	(self isSlavoGermanic: inputKey) and: [(currentIndex > 1 and: [(self keyAt: currentIndex - 1) ~= 'T'])]
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2723	])
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2724	ifTrue: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2725	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2726	addSecondaryTranslation: 'TS'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2727	] ifFalse: [
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2728	self addPrimaryTranslation: 'S';
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2729	addSecondaryTranslation: 'S'.
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2730	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2731	(self keyAt: currentIndex + 1) = $Z
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2732	ifTrue: [
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2733	skipCount := skipCount + 1.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2734	^self
2213 d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2735	].
d465fa29df0e * empty log message * Claus Gittinger <cg@exept.de> parents: 2211 diff changeset	2736	]
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2737
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2738	"Modified: / 28-07-2017 / 11:35:12 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2739	! !
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	2740
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2741	!PhoneticStringUtilities::ExtendedSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2742
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2743	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2744	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2745	There are many extended and enhanced soundex variants around;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2746	here is one, called 'extended soundex'. It is destribed for example in
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2747	http://www.epidata.dk/documentation.php.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2748	An author or origin is unknown.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2749
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2750	The number of digits is increased to 5 or 8;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2751	The first character is not used literally; instead it is encoded like the rest.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2752	This might have a negative effect on names starting with a vovel, though.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2753
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2754	Overall, it can be doubted if this is really an enhancement after all.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2755	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2756	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2757
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2758	!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2759
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2760	phoneticStringsFor:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2761	"generates both an extended soundex of length 5 and one of length 8"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2762
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2763	\|first second u t prevCode\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2764
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2765	u := aString asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2766	first := second := ''.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2767	u do:[:c \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2768	t := self translate:c.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2769	(t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2770	first := first , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2771	second := second , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2772	second size == 8 ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2773	^ Array with:(first copyTo:5) with:second
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2774	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2775	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2776	prevCode := t
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2777	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2778	[ first size < 5 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2779	first := first , '0'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2780	second := second , '0'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2781	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2782	[ second size < 8 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2783	second := second , '0'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2784	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2785	^ Array with:first with:second
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2786
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2787	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2788	self basicNew phoneticStringsFor:'müller' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2789	self basicNew phoneticStringsFor:'miller' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2790	self basicNew phoneticStringsFor:'muller' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2791	self basicNew phoneticStringsFor:'muler' #('87900' '87900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2792	self basicNew phoneticStringsFor:'schmidt' #('38600' '38600000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2793	self basicNew phoneticStringsFor:'schneider' #('38690' '38690000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2794	self basicNew phoneticStringsFor:'fischer' #('23900' '23900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2795	self basicNew phoneticStringsFor:'weber' #('19000' '19000000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2796	self basicNew phoneticStringsFor:'meyer' #('89000' '89000000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2797	self basicNew phoneticStringsFor:'wagner' #('48900' '48900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2798	self basicNew phoneticStringsFor:'schulz' #('37500' '37500000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2799	self basicNew phoneticStringsFor:'becker' #('13900' '13900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2800	self basicNew phoneticStringsFor:'hoffmann' #('28800' '28800000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2801	self basicNew phoneticStringsFor:'schäfer' #('32900' '32900000')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2802	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2803	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2804
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2805	!PhoneticStringUtilities::ExtendedSoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2806
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2807	translate:aCharacter
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2808	"use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2809
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2810	"vowels serve as separators"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2811	aCharacter == $A ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2812	aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2813	aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2814	aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2815	aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2816	aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2817
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2818	aCharacter == $B ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2819	aCharacter == $P ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2820
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2821	aCharacter == $F ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2822	aCharacter == $V ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2823
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2824	aCharacter == $C ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2825	aCharacter == $S ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2826	aCharacter == $K ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2827
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2828	aCharacter == $G ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2829	aCharacter == $J ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2830
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2831	aCharacter == $Q ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2832	aCharacter == $X ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2833	aCharacter == $Z ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2834
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2835	aCharacter == $D ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2836	aCharacter == $G ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2837	aCharacter == $T ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2838
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2839	aCharacter == $L ifTrue:[^ '7' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2840
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2841	aCharacter == $M ifTrue:[^ '8' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2842	aCharacter == $N ifTrue:[^ '8' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2843
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2844	aCharacter == $R ifTrue:[^ '9' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2845	^ nil
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2846	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2847
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2848	!PhoneticStringUtilities::SingleResultPhoneticStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2849
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2850	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2851	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2852	documentation to be added.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2853
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2854	[author:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2855	cg
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2856
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2857	[instance variables:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2858
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2859	[class variables:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2860
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2861	[see also:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2862
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2863	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2864	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2865
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2866	!PhoneticStringUtilities::SingleResultPhoneticStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2867
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2868	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2869	^ self subclassResponsibility
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2870
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2871	"Created: / 28-07-2017 / 15:20:49 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2872	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2873
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2874	phoneticStringsFor:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2875	^ Array with:(self encode:word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2876
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2877	"Created: / 28-07-2017 / 15:20:38 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2878	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2879
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2880	!PhoneticStringUtilities::MRAStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2881
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2882	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2883	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2884	Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2885
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2886	The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2887
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2888	[see also:]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2889	https://en.wikipedia.org/wiki/Match_Rating_Approach
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2890
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2891	G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2892	''Accessing Individual Records from Personal Data Files Using Nonunique Identifiers''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2893	US National Institute of Standards and Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2894	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2895	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2896
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2897	rCode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2898	"<<END
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2899	## Copyright (c) 2015, James P. Howard, II <jh@jameshoward.us>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2900	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2901	## Redistribution and use in source and binary forms, with or without
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2902	## modification, are permitted provided that the following conditions are
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2903	## met:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2904	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2905	## Redistributions of source code must retain the above copyright
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2906	## notice, this list of conditions and the following disclaimer.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2907	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2908	## Redistributions in binary form must reproduce the above copyright
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2909	## notice, this list of conditions and the following disclaimer in
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2910	## the documentation and/or other materials provided with the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2911	## distribution.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2912	##
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2913	## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2914	## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2915	## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2916	## A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2917	## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2918	## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2919	## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2920	## DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2921	## THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2922	## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2923	## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2924
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2925	#' @rdname mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2926	#' @title Match Rating Approach Encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2927	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2928	#' @description
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2929	#' The Western Airlines matching rating approach name encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2930	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2931	#' @param word string or vector of strings to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2932	#' @param x MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2933	#' @param y MRA-encoded character vector
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2934	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2935	#' @details
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2936	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2937	#' The variable \code{word} is the name to be encoded. The variable
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2938	#' \code{maxCodeLen} is \emph{not} supported in this algorithm encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2939	#' because the algorithm itself is dependent upon its six-character
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2940	#' length. The variables \code{x} and \code{y} are MRA-encoded and are
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2941	#' compared to each other using the MRA comparison specification.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2942	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2943	#' @return The \code{mra_encode} function returns match rating approach
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2944	#' encoded character vector. The \code{mra_compare} returns a boolean
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2945	#' vector which is \code{TRUE} if \code{x} and \code{y} pass the MRA
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2946	#' comparison test.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2947	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2948	#' @references
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2949	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2950	#' G.B. Moore, J.L. Kuhns, J.L. Treffzs, and C.A. Montgomery,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2951	#' \emph{Accessing Individual Records from Personal Data Files Using
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2952	#' Nonunique Identifiers,} US National Institute of Standards and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2953	#' Technology, SP-500-2 (1977), p. 17.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2954	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2955	#' @family phonics
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2956	#'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2957	#' @examples
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2958	#' mra_encode("William")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2959	#' mra_encode(c("Peter", "Peady"))
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2960	#' mra_encode("Stevenson")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2961
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2962	#' @rdname mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2963	#' @name mra_encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2964	#' @export
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2965	mra_encode <- function(word) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2966
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2967	## First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2968	word <- gsub("[^[:alpha:]]*", "", word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2969	word <- toupper(word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2970
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2971	## First character of key = first character of name
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2972	first <- substr(word, 1, 1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2973	word <- substr(word, 2, nchar(word))
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2974
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2975	## Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2976	word <- gsub("[AEIOU]", "", word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2977	word <- paste(first, word, sep = "")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2978
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2979	## Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2980	word <- gsub("([A-Z])\\1+", "\\1", word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2981
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2982	## If longer than 6 characters, take first and last 3...and we have
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2983	## to vectorize it
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2984	for(i in 1:length(word)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2985	if((l = nchar(word[i])) > 6) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2986	first <- substr(word[i], 1, 3)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2987	last <- substr(word[i], l - 2, l)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2988	word[i] <- paste(first, last, sep = "");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2989	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2990	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2991
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2992	return(word)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2993	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2994
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2995	#' @rdname mra
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2996	#' @name mra_compare
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2997	#' @export
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2998	mra_compare <- function(x, y) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	2999	mra <- data.frame(x = x, y = y, sim = 0, min = 100, stringsAsFactors = FALSE)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3000
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3001	## Obtain the minimum rating value by calculating the length sum of
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3002	## the encoded strings and using table A (from Wikipedia). We start
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3003	## by setting the minimum to be the sum and move from there.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3004	mra$lensum <- nchar(mra$x) + nchar(mra$y)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3005	mra$min[mra$lensum == 12] <- 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3006	mra$min[mra$lensum > 7 && mra$lensum <= 11] <- 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3007	mra$min[mra$lensum > 4 && mra$lensum <= 7] <- 4
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3008	mra$min[mra$lensum <= 4] <- 5
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3009
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3010	## If the length difference between the encoded strings is 3 or
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3011	## greater, then no similarity comparison is done. For us, we
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3012	## continue the similarity comparison out of laziness and ensure the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3013	## minimum is impossibly high to meet.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3014	mra$min[abs(nchar(mra$x) - nchar(mra$y)) >= 3] <- 100
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3015
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3016	## Start the comparison.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3017	x <- strsplit(mra$x, split = "")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3018	y <- strsplit(mra$y, split = "")
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3019	rows <- nrow(mra)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3020	for(i in 1:rows) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3021	## Process the encoded strings from left to right and remove any
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3022	## identical characters found from both strings respectively.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3023	j <- 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3024	while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3025	if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3026	x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3027	y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3028	} else
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3029	j <- j + 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3030	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3031
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3032	## Process the unmatched characters from right to left and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3033	## remove any identical characters found from both names
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3034	## respectively.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3035	x[[i]] <- rev(x[[i]])
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3036	y[[i]] <- rev(y[[i]])
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3037	j <- 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3038	while(j < min(length(x[[i]]), length(y[[i]]))) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3039	if(x[[i]][j] == y[[i]][j]) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3040	x[[i]] <- x[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3041	y[[i]] <- y[[i]][-j]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3042	} else
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3043	j <- j + 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3044	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3045	## Subtract the number of unmatched characters from 6 in the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3046	## longer string. This is the similarity rating.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3047	len <- min(length(x[[i]]), length(y[[i]]))
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3048	mra$sim[i] <- 6 - len
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3049	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3050
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3051	## If the similarity is greater than or equal to the minimum
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3052	## required, it is a successful match.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3053	mra$match <- (mra$sim >= mra$min)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3054	return(mra$match)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3055	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3056
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3057	END>>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3058	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3059
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3060	!PhoneticStringUtilities::MRAStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3061
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3062	encode:wordIn
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3063	"see https://en.wikipedia.org/wiki/Match_Rating_Approach"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3064
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3065	\|word prev\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3066
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3067	word := wordIn.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3068
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3069	"/ First, remove any nonalphabetical characters and uppercase it
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3070
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3071	word := word select:#isLetter thenCollect:#asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3072
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3073	"/ Delete vowels not at the start of the word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3074
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3075	word := word first asString , ((word from:2) reject:#isVowel).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3076
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3077	"/ Remove duplicate consecutive characters
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3078
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3079	prev := nil.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3080	word := word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3081	collect:[:char \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3082	char == prev ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3083	$*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3084	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3085	prev := char.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3086	char.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3087	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3088	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3089	thenSelect:[:char \| char ~~ $*].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3090
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3091	"/ If longer than 6 characters, take first and last 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3092	word size > 6 ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3093	word := (word copyFirst:3),(word copyLast:3)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3094	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3095	^ word.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3096
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3097	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3098	self new encode:'Catherine' -> 'CTHRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3099	self new encode:'CatherineCatherine' -> 'CTHHRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3100	self new encode:'Butter' -> 'BTR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3101	self new encode:'Byrne' -> 'BYRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3102	self new encode:'Boern' -> 'BRN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3103	self new encode:'Smith' -> 'SMTH'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3104	self new encode:'Smyth' -> 'SMYTH'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3105	self new encode:'Kathryn' -> 'KTHRYN'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3106	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3107
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3108	"Created: / 28-07-2017 / 15:19:22 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3109	"Modified (comment): / 31-07-2017 / 15:14:31 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3110	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3111
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3112	!PhoneticStringUtilities::MetaphoneStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3113
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3114	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3115	"
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3116	Ongoing work - do not use at the moment
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3117
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3118	Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3119
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3120	Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3121	Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3122
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3123	Hanging on the Metaphone by Lawrence Philips in Computer Language of Dec. 1990, p 39.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3124	Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3125	https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm6
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3126
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3127	They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3128	For more information, see https://issues.apache.org/jira/browse/CODEC-57
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3129
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3130	Metaphone uses the following rules:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3131
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3132	Doubled letters except 'c' -> drop 2nd letter.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3133	Vowels are only kept when they are the first letter.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3134	B -> B unless at the end of a word after 'm' as in 'dumb'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3135	C -> X (sh) if -cia- or -ch-
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3136	S if -ci-, -ce- or -cy-
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3137	K otherwise, including -sch-
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3138	D -> J if in -dge-, -dgy- or -dgi-; T otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3139	F -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3140	G -> silent if in -gh- and not at end or before a vowel in -gn- or -gned- (also see dge etc. above)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3141	J if before i or e or y if not double gg; K otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3142	H -> silent if after vowel and no vowel follows; H otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3143	J -> J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3144	K -> silent if after 'c'; K otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3145	L -> L
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3146	M -> M
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3147	N -> N
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3148	P -> F if before 'h'; P otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3149	Q -> K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3150	R -> R
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3151	S -> X (sh) if before 'h' or in -sio- or -sia-; S otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3152	T -> X (sh) if -tia- or -tio- 0 (th) if before 'h' silent if in -tch-; T otherwise
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3153	V -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3154	W -> silent if not followed by a vowel W if followed by a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3155	X -> KS
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3156	Y -> silent if not followed by a vowel Y if followed by a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3157	Z -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3158
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3159	Initial Letter Exceptions
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3160
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3161	Initial kn-, gn- pn, ae- or wr- -> drop first letter
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3162	Initial x- -> change to 's'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3163	Initial wh- -> change to 'w'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3164
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3165
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3166	self new encode:'a'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3167	self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3168	self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3169	self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3170	self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3171	self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3172	self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3173	self new encode:'weber'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3174	self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3175	self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3176	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3177	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3178
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3179	javaCode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3180	"<<END
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3181	/*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3182	* Licensed to the Apache Software Foundation (ASF) under one or more
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3183	* contributor license agreements. See the NOTICE file distributed with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3184	* this work for additional information regarding copyright ownership.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3185	* The ASF licenses this file to You under the Apache License, Version 2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3186	* (the "License"); you may not use this file except in compliance with
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3187	* the License. You may obtain a copy of the License at
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3188	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3189	* http://www.apache.org/licenses/LICENSE-2.0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3190	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3191	* Unless required by applicable law or agreed to in writing, software
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3192	* distributed under the License is distributed on an "AS IS" BASIS,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3193	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3194	* See the License for the specific language governing permissions and
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3195	* limitations under the License.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3196	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3197
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3198	package org.apache.commons.codec.language;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3199
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3200	import org.apache.commons.codec.EncoderException;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3201	import org.apache.commons.codec.StringEncoder;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3202
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3203	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3204	* Encodes a string into a Metaphone value.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3205	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3206	* Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3207	* Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3208	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3209	* <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3210	* p 39.</CITE>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3211	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3212	* Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3213	* </p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3214	* <ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3215	* <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3216	* (broken link 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3217	* <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3218	* (link checked 4/30/2013) </li>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3219	* </ul>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3220	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3221	* They have had undocumented changes from the originally published algorithm.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3222	* For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3223	* <p>
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3224	* This class is conditionally thread-safe.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3225	* The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3226	* but is not volatile, and accesses are not synchronized.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3227	* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3228	* is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3229	* after initial setup.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3230	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3231	* @version $Id$
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3232	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3233	public class Metaphone implements StringEncoder {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3234
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3235	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3236	* Five values in the English language
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3237	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3238	private static final String VOWELS = "AEIOU";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3239
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3240	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3241	* Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3242	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3243	private static final String FRONTV = "EIY";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3244
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3245	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3246	* Variable used in Metaphone algorithm
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3247	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3248	private static final String VARSON = "CSPTG";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3249
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3250	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3251	* The max code length for metaphone is 4
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3252	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3253	private int maxCodeLen = 4;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3254
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3255	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3256	* Creates an instance of the Metaphone encoder
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3257	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3258	public Metaphone() {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3259	super();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3260	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3261
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3262	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3263	* Find the metaphone value of a String. This is similar to the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3264	* soundex algorithm, but better at finding similar sounding words.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3265	* All input is converted to upper case.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3266	* Limitations: Input format is expected to be a single ASCII word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3267	* with only characters in the A - Z range, no punctuation or numbers.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3268	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3269	* @param txt String to find the metaphone code for
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3270	* @return A metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3271	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3272	public String metaphone(final String txt) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3273	boolean hard = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3274	int txtLength;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3275	if (txt == null \|\| (txtLength = txt.length()) == 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3276	return "";
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3277	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3278	// single character is itself
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3279	if (txtLength == 1) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3280	return txt.toUpperCase(java.util.Locale.ENGLISH);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3281	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3282
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3283	final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3284
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3285	final StringBuilder local = new StringBuilder(40); // manipulate
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3286	final StringBuilder code = new StringBuilder(10); // output
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3287	// handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3288	switch(inwd[0]) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3289	case 'K':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3290	case 'G':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3291	case 'P': /* looking for KN, etc*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3292	if (inwd[1] == 'N') {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3293	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3294	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3295	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3296	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3297	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3298	case 'A': /* looking for AE */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3299	if (inwd[1] == 'E') {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3300	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3301	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3302	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3303	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3304	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3305	case 'W': /* looking for WR or WH */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3306	if (inwd[1] == 'R') { // WR -> R
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3307	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3308	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3309	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3310	if (inwd[1] == 'H') {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3311	local.append(inwd, 1, inwd.length - 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3312	local.setCharAt(0, 'W'); // WH -> W
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3313	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3314	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3315	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3316	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3317	case 'X': /* initial X becomes S */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3318	inwd[0] = 'S';
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3319	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3320	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3321	default:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3322	local.append(inwd);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3323	} // now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3324
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3325	final int wdsz = local.length();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3326	int n = 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3327
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3328	while (code.length() < this.getMaxCodeLen() &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3329	n < wdsz ) { // max code size of 4 works well
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3330	final char symb = local.charAt(n);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3331	// remove duplicate letters except C
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3332	if (symb !!= 'C' && isPreviousChar( local, n, symb ) ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3333	n++;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3334	} else { // not dup
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3335	switch(symb) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3336	case 'A':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3337	case 'E':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3338	case 'I':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3339	case 'O':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3340	case 'U':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3341	if (n == 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3342	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3343	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3344	break; // only use vowel if leading char
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3345	case 'B':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3346	if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3347	isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3348	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3349	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3350	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3351	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3352	case 'C': // lots of C special cases
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3353	/* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3354	if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3355	!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3356	FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3357	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3358	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3359	if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3360	code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3361	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3362	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3363	if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3364	FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3365	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3366	break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3367	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3368	if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3369	isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3370	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3371	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3372	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3373	if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3374	if (n == 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3375	wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3376	isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3377	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3378	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3379	code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3380	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3381	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3382	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3383	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3384	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3385	case 'D':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3386	if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3387	isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3388	FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3389	code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3390	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3391	code.append('T');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3392	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3393	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3394	case 'G': // GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3395	if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3396	isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3397	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3398	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3399	if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3400	isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3401	!!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3402	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3403	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3404	if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3405	( regionMatch(local, n, "GN") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3406	regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3407	break; // silent G
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3408	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3409	if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3410	// NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3411	hard = true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3412	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3413	hard = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3414	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3415	if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3416	FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3417	!!hard) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3418	code.append('J');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3419	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3420	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3421	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3422	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3423	case 'H':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3424	if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3425	break; // terminal H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3426	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3427	if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3428	VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3429	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3430	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3431	if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3432	code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3433	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3434	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3435	case 'F':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3436	case 'J':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3437	case 'L':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3438	case 'M':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3439	case 'N':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3440	case 'R':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3441	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3442	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3443	case 'K':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3444	if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3445	if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3446	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3447	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3448	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3449	code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3450	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3451	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3452	case 'P':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3453	if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3454	// PH -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3455	code.append('F');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3456	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3457	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3458	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3459	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3460	case 'Q':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3461	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3462	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3463	case 'S':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3464	if (regionMatch(local,n,"SH") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3465	regionMatch(local,n,"SIO") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3466	regionMatch(local,n,"SIA")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3467	code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3468	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3469	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3470	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3471	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3472	case 'T':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3473	if (regionMatch(local,n,"TIA") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3474	regionMatch(local,n,"TIO")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3475	code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3476	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3477	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3478	if (regionMatch(local,n,"TCH")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3479	// Silent if in "TCH"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3480	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3481	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3482	// substitute numeral 0 for TH (resembles theta after all)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3483	if (regionMatch(local,n,"TH")) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3484	code.append('0');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3485	} else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3486	code.append('T');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3487	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3488	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3489	case 'V':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3490	code.append('F'); break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3491	case 'W':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3492	case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3493	if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3494	isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3495	code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3496	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3497	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3498	case 'X':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3499	code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3500	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3501	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3502	case 'Z':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3503	code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3504	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3505	default:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3506	// do nothing
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3507	break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3508	} // end switch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3509	n++;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3510	} // end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3511	if (code.length() > this.getMaxCodeLen()) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3512	code.setLength(this.getMaxCodeLen());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3513	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3514	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3515	return code.toString();
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3516	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3517
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3518	private boolean isVowel(final StringBuilder string, final int index) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3519	return VOWELS.indexOf(string.charAt(index)) >= 0;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3520	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3521
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3522	private boolean isPreviousChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3523	boolean matches = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3524	if( index > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3525	index < string.length() ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3526	matches = string.charAt(index - 1) == c;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3527	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3528	return matches;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3529	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3530
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3531	private boolean isNextChar(final StringBuilder string, final int index, final char c) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3532	boolean matches = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3533	if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3534	index < string.length() - 1 ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3535	matches = string.charAt(index + 1) == c;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3536	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3537	return matches;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3538	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3539
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3540	private boolean regionMatch(final StringBuilder string, final int index, final String test) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3541	boolean matches = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3542	if( index >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3543	index + test.length() - 1 < string.length() ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3544	final String substring = string.substring( index, index + test.length());
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3545	matches = substring.equals( test );
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3546	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3547	return matches;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3548	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3549
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3550	private boolean isLastChar(final int wdsz, final int n) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3551	return n + 1 == wdsz;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3552	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3553
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3554
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3555	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3556	* Encodes an Object using the metaphone algorithm. This method
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3557	* is provided in order to satisfy the requirements of the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3558	* Encoder interface, and will throw an EncoderException if the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3559	* supplied object is not of type java.lang.String.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3560	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3561	* @param obj Object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3562	* @return An object (or type java.lang.String) containing the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3563	* metaphone code which corresponds to the String supplied.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3564	* @throws EncoderException if the parameter supplied is not
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3565	* of type java.lang.String
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3566	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3567	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3568	public Object encode(final Object obj) throws EncoderException {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3569	if (!!(obj instanceof String)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3570	throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3571	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3572	return metaphone((String) obj);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3573	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3574
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3575	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3576	* Encodes a String using the Metaphone algorithm.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3577	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3578	* @param str String object to encode
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3579	* @return The metaphone code corresponding to the String supplied
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3580	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3581	@Override
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3582	public String encode(final String str) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3583	return metaphone(str);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3584	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3585
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3586	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3587	* Tests is the metaphones of two strings are identical.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3588	*
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3589	* @param str1 First of two strings to compare
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3590	* @param str2 Second of two strings to compare
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3591	* @return <code>true</code> if the metaphones of these strings are identical,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3592	* <code>false</code> otherwise.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3593	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3594	public boolean isMetaphoneEqual(final String str1, final String str2) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3595	return metaphone(str1).equals(metaphone(str2));
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3596	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3597
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3598	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3599	* Returns the maxCodeLen.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3600	* @return int
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3601	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3602	public int getMaxCodeLen() { return this.maxCodeLen; }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3603
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3604	/**
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3605	* Sets the maxCodeLen.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3606	* @param maxCodeLen The maxCodeLen to set
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3607	*/
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3608	public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3609
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3610	}
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3611	END>>"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3612	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3613
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3614	!PhoneticStringUtilities::MetaphoneStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3615
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3616	encode:txt
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3617	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3618	self new encode:'a'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3619	self new encode:'MILLER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3620	self new encode:'schmidt'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3621	self new encode:'schneider'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3622	self new encode:'FISCHER'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3623	self new encode:'HEDGY'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3624	self new encode:'weber'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3625	self new encode:'wagner'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3626	self new encode:'van gogh'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3627	self new encode:'dumb'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3628	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3629
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3630	\|hard txtLength local code inwd ch ch2 wdsz n\|
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3631
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3632	inwd := txt.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3633	hard := false.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3634	txtLength := 0.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3635
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3636	(txtLength := txt size) == 0 ifTrue:[^ ''].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3637
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3638	inwd := txt asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3639	"/ single character is itself
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3640	(txtLength == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3641	^ inwd
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3642	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3643
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3644	code := '' writeStream.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3645	local := inwd.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3646
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3647	"/ handle initial 2 characters exceptions
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3648	ch := inwd at:(0+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3649	ch2 := inwd at:(1+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3650	('KGP' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3651	"/ looking for KN, etc
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3652	"/ KNx -> Nx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3653	"/ GNx -> Nx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3654	"/ PNx -> Nx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3655	(ch2 == $N) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3656	local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3657	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3658	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3659	('A' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3660	"/ looking for AE
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3661	"/ AEx -> Ex
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3662	(ch2 == $E) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3663	local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3664	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3665	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3666	('W' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3667	"/ looking for WR or WH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3668	(ch2 == $R) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3669	"/ WRx -> Wx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3670	local := (inwd from:1+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3671	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3672	(ch2 == $H) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3673	"/ // WH -> W
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3674	local := 'W',(inwd from:2+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3675	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3676	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3677	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3678	('X' includes:ch) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3679	"/ initial X becomes S */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3680	"/ Xx -> Sx
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3681	local := 'S',(inwd from:1+1).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3682	]]]].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3683
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3684	"/ now local has working string with initials fixed
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3685
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3686	wdsz := local size.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3687	n := 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3688
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3689	[ n <= wdsz ] whileTrue:[
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3690	"/ max code size of 4 works well
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3691
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3692	\|symb prevChar nextChar nextNextChar isLastChar isPrevToLastChar\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3693
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3694	symb := local at:n.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3695	(n > 1) ifTrue:[ prevChar := local at:(n-1) ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3696	(isLastChar := (n == wdsz)) ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3697	nextChar := local at:(n+1)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3698	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3699	isPrevToLastChar := (n == (wdsz-1)).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3700	(n+2) <= wdsz ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3701	nextNextChar := local at:(n+2)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3702	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3703
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3704	"/ remove duplicate letters except C and except first
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3705	(symb == $C or:[ nextChar ~~ symb or:[ n == 1] ]) ifTrue:[
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3706	"/ not dup
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3707	('AEIOU' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3708	"/ only use vowel if leading char
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3709	(n == 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3710	code nextPut:symb
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3711	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3712	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3713	('B' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3714	"/ if ( isPreviousChar(local, n, 'M') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3715	"/ isLastChar(wdsz, n) ) { // B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3716	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3717	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3718	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3719	"/ break;
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3720	(isLastChar and:[ prevChar == $M]) ifTrue:[
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3721	"/ B is silent if word ends in MB
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3722	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3723	code nextPut:symb.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3724	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3725	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3726	('C' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3727	"/ lots of C special cases
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3728	"/ /* discard if SCI, SCE or SCY */
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3729	"/ if ( isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3730	"/ !!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3731	"/ FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3732	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3733	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3734	"/ if (regionMatch(local, n, "CIA")) { // "CIA" -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3735	"/ code.append('X');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3736	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3737	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3738	"/ if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3739	"/ FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3740	"/ code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3741	"/ break; // CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3742	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3743	"/ if (isPreviousChar(local, n, 'S') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3744	"/ isNextChar(local, n, 'H') ) { // SCH->sk
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3745	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3746	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3747	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3748	"/ if (isNextChar(local, n, 'H')) { // detect CH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3749	"/ if (n == 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3750	"/ wdsz >= 3 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3751	"/ isVowel(local,2) ) { // CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3752	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3753	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3754	"/ code.append('X'); // CHvowel -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3755	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3756	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3757	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3758	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3759	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3760	(prevChar == $S and:[ 'EIY' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3761	"/ discard if SCI, SCE or SCY
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3762	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3763	((nextChar == $I) and:[ nextNextChar == $A ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3764	"/ "CIA" -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3765	code nextPut:$X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3766	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3767	('IEY' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3768	"/ CI,CE,CY -> S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3769	code nextPut:$S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3770	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3771	((prevChar == $S) and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3772	"/ SCH->sk
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3773	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3774	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3775	nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3776	"/ CH
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3777	('AEIOU' includes:nextNextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3778	code nextPut:$K "/ CH consonant -> K consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3779	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3780	code nextPut:$X "/ CHvowel -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3781	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3782	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3783	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3784	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3785	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3786	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3787	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3788	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3789
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3790	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3791	('D' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3792	"/ if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3793	"/ isNextChar(local, n, 'G') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3794	"/ FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3795	"/ code.append('J'); n += 2;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3796	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3797	"/ code.append('T');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3798	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3799	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3800	((nextChar == $G)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3801	and:[ (local from:n) startsWithAnyOf:#('DGE' 'DGI' 'DGY') ])
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3802	ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3803	code nextPut:$J.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3804	n := n + 2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3805	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3806	code nextPut:$T.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3807	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3808	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3809	('G' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3810	"/ GH silent at end or before consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3811	"/ if (isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3812	"/ isNextChar(local, n, 'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3813	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3814	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3815	"/ if (!!isLastChar(wdsz, n + 1) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3816	"/ isNextChar(local,n,'H') &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3817	"/ !!isVowel(local,n+2)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3818	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3819	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3820	"/ if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3821	"/ ( regionMatch(local, n, "GN") \|\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3822	"/ regionMatch(local, n, "GNED") ) ) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3823	"/ break; // silent G
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3824	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3825	"/ if (isPreviousChar(local, n, 'G')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3826	"/ // NOTE: Given that duplicated chars are removed, I dont see how this can ever be true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3827	"/ hard = true;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3828	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3829	"/ hard = false;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3830	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3831	"/ if (!!isLastChar(wdsz, n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3832	"/ FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3833	"/ !!hard) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3834	"/ code.append('J');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3835	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3836	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3837	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3838	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3839	(isPrevToLastChar and:[ nextChar == $H ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3840	"/ GH silent at end
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3841	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3842	(isPrevToLastChar not and:[ nextChar == $H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3843	and:[ ('AEIOU' includes:nextNextChar) not ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3844	"/ GH silent before consonant
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3845	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3846	(n > 1 and:[ nextChar == $N ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3847	"/ GN -> silent G
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3848	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3849	hard := (prevChar == $G).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3850	(isLastChar not and:[ hard not and:[ ('EIY' includes:nextChar) ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3851	code nextPut:$J
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3852	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3853	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3854	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3855	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3856	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3857	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3858	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3859	('H' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3860	"/ case 'H':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3861	"/ if (isLastChar(wdsz, n)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3862	"/ break; // terminal H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3863	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3864	"/ if (n > 0 &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3865	"/ VARSON.indexOf(local.charAt(n - 1)) >= 0) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3866	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3867	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3868	"/ if (isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3869	"/ code.append('H'); // Hvowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3870	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3871	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3872	isLastChar ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3873	"/ ignore terminal H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3874	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3875	('CSPTG' includes:prevChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3876	"/ ignore CH, SH, PH, TH, GH (H treated there)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3877	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3878	('AEIOU' includes:nextChar) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3879	"/ Hvowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3880	code nextPut:$H
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3881	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3882	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3883	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3884	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3885	('FJLMNR' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3886	"/ case 'F':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3887	"/ case 'J':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3888	"/ case 'L':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3889	"/ case 'M':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3890	"/ case 'N':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3891	"/ case 'R':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3892	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3893	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3894	code nextPut:symb.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3895	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3896	('K' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3897	"/ case 'K':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3898	"/ if (n > 0) { // not initial
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3899	"/ if (!!isPreviousChar(local, n, 'C')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3900	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3901	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3902	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3903	"/ code.append(symb); // initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3904	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3905	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3906	n > 1 ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3907	"/ not initial
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3908	prevChar ~~ $C ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3909	code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3910	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3911	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3912	code nextPut:$K. "/ initial K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3913	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3914	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3915	('P' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3916	"/ case 'P':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3917	"/ if (isNextChar(local,n,'H')) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3918	"/ // PH -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3919	"/ code.append('F');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3920	"/ } else {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3921	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3922	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3923	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3924	nextChar == $H ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3925	"/ PH -> F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3926	code nextPut:$F.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3927	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3928	code nextPut:symb.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3929	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3930	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3931	('Q' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3932	"/ case 'Q':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3933	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3934	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3935	code nextPut:$K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3936
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3937	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3938	('S' includes:symb) ifTrue:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3939	"/ case 'S':
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3940	"/ if (regionMatch(local,n,"SH") \|\|
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3941	"/ regionMatch(local,n,"SIO") \|\|
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3942	"/ regionMatch(local,n,"SIA")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3943	"/ code.append('X');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3944	"/ } else {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3945	"/ code.append('S');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3946	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3947	"/ break;
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3948	"/ SH -> X (as in shave or ashton)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3949	"/ SIO -> X
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3950	"/ SIA -> X (as in ASIA)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3951	((nextChar == $H)
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3952	or:[ ((nextChar == $I) and:[ 'OA' includes:nextNextChar])]
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3953	) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3954	code nextPut:$X
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3955	] ifFalse:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3956	code nextPut:$S
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3957	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3958	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3959	('T' includes:symb) ifTrue:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3960	"/ case 'T':
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3961	"/ if (regionMatch(local,n,"TIA") \|\|
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3962	"/ regionMatch(local,n,"TIO")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3963	"/ code.append('X');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3964	"/ break;
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3965	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3966	"/ if (regionMatch(local,n,"TCH")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3967	"/ // Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3968	"/ break;
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3969	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3970	"/ // substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3971	"/ if (regionMatch(local,n,"TH")) {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3972	"/ code.append('0');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3973	"/ } else {
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3974	"/ code.append('T');
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3975	"/ }
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3976	"/ break;
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3977	(nextChar == $I and:[ 'AO' includes:nextNextChar]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3978	code nextPut:$X.
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3979	] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3980	(nextChar == $C and:[ nextNextChar == $H]) ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3981	"/ Silent if in "TCH"
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3982	"/ cg - huh; hutch - methinksthereisat
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3983	] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3984	"/ substitute numeral 0 for TH (resembles theta after all)
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3985	nextChar == $H ifTrue:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3986	code nextPut:$0.
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3987	] ifFalse:[
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3988	code nextPut:$T.
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3989	].
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3990	].
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	3991	].
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3992	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3993	('V' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3994	"/ case 'V':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3995	"/ code.append('F'); break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3996	code nextPut:$F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3997
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3998	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	3999	('WY' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4000	"/ case 'W':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4001	"/ case 'Y': // silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4002	"/ if (!!isLastChar(wdsz,n) &&
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4003	"/ isVowel(local,n+1)) {
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4004	"/ code.append(symb);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4005	"/ }
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4006	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4007
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4008	"/ silent if not followed by vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4009	(isLastChar not and:[ 'AEIOU' includes:nextChar ]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4010	code nextPut:symb
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4011	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4012	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4013	('X' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4014	"/ case 'X':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4015	"/ code.append('K');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4016	"/ code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4017	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4018	code nextPutAll:'KS'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4019	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4020	('Z' includes:symb) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4021	"/ case 'Z':
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4022	"/ code.append('S');
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4023	"/ break;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4024	code nextPut:$S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4025	] ifFalse:[
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	4026	"/ default:
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	4027	"/ // do nothing
5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	4028	"/ break;
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4029	]]]]]]]]]]]]]]]]. "/ end switch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4030	]. "/ end else from symb !!= 'C'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4031	n := n + 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4032	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4033	^ code contents
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4034
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4035	"Created: / 02-08-2017 / 09:51:31 / cg"
4495 5d2da4bddbda #DOCUMENTATION by cg Claus Gittinger <cg@exept.de> parents: 4491 diff changeset	4036	"Modified: / 03-08-2017 / 14:55:22 / cg"
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4037	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4038
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4039	!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4040
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4041	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4042	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4043	WARNING: this is the so called 'simplified soundex' algorithm;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4044	there are more variants like miracode (american soundex) or
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4045	mysqlSoundex around.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4046
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4047	Be sure to use the correct algorithm, if the generated strings must be compatible
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4048	(otherwise, the differences are probably too small to be noticed as effect, but
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4049	your search will be different)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4050
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4051	The following was copied from http://www.civilsolutions.com.au/publications/dedup.htm
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4052
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4053	SOUNDEX is a phonetic coding algorithm that ignores many of the unreliable
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4054	components of names, but by doing so reports more matches.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4055
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4056	There are some variations around in the literature;
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4057	the following is called 'simplified soundex', and the rules for coding a name are:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4058
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4059	1. The first letter of the name is used in its un-coded form to serve as the prefix
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4060	character of the code. (The rest of the code is numerical).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4061
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4062	2. Thereafter, W and H are ignored entirely.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4063
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4064	3. A, E, I, 0, U, Y are not assigned a code number, but do serve as 'separators' (see Step 5).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4065
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4066	4. Other letters of the name are converted to a numerical equivalent:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4067	B, P, F, V 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4068	C, G, J, K, Q, S, X, Z 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4069	D, T 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4070	L 4
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4071	M, N 5
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4072	R 6
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4073
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4074	5. There are two exceptions:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4075	1. Letters that follow prefix letters which would, if coded, have the same
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4076	numerical code, are ignored in all cases unless a ''separator'' (see Step 3) precedes them.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4077
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4078	2. The second letter of any pair of consonants having the same code number is likewise ignored,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4079	i.e. unless there is a ''separator'' between them in the name.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4080
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4081	6. The final SOUNDEX code consists of the prefix letter plus three numerical characters.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4082	Longer codes are truncated to this length, and shorter codes are extended to it by adding zeros.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4083
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4084	Notice, that in another variant, w and h are treated slightly differently.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4085	This is only of relevance, if you need to reconstruct original soundex codes of other programs
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4086	or for the original 1880 us census data.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4087	SoundexStringComparator new encode:'Ashcraft' -> 'A226'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4088	vs.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4089	MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4090
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4091	Also notice, that soundex deals better with english.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4092	For german and other languages, other algorithms may provide better results.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4093	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4094	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4095
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4096	!PhoneticStringUtilities::SoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4097
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4098	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4099	\|u p t prevCode\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4100
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4101	u := word asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4102	p := u first asString.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4103	prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4104	u from:2 to:u size do:[:c \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4105	t := self translate:c.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4106	(t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4107	p := p , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4108	p size == 4 ifTrue:[^ p ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4109	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4110	prevCode := t
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4111	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4112	[ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4113	p := p , '0'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4114	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4115	^ (p copyFrom:1 to:4)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4116
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4117	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4118	self new encode:'washington' -> 'W252'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4119	self new encode:'lee' -> 'L000'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4120	self new encode:'Gutierrez' -> 'G362'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4121	self new encode:'Pfister' -> 'P236'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4122	self new encode:'Jackson' -> 'J250'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4123	self new encode:'Tymczak' -> 'T522'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4124	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4125
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4126	"notice:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4127	MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4128	self new encode:'Ashcraft' -> 'A226'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4129	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4130
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4131	"Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4132	"Modified (comment): / 01-08-2017 / 19:01:43 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4133	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4134
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4135	!PhoneticStringUtilities::SoundexStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4136
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4137	translate:aCharacter
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4138	"use simple if's for more speed when compiled"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4139
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4140	"vowels serve as separators"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4141	aCharacter == $A ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4142	aCharacter == $E ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4143	aCharacter == $I ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4144	aCharacter == $O ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4145	aCharacter == $U ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4146	aCharacter == $Y ifTrue:[^ '0' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4147
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4148	aCharacter == $B ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4149	aCharacter == $P ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4150	aCharacter == $F ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4151	aCharacter == $V ifTrue:[^ '1' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4152
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4153	aCharacter == $C ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4154	aCharacter == $S ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4155	aCharacter == $K ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4156	aCharacter == $G ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4157	aCharacter == $J ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4158	aCharacter == $Q ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4159	aCharacter == $X ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4160	aCharacter == $Z ifTrue:[^ '2' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4161
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4162	aCharacter == $D ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4163	aCharacter == $T ifTrue:[^ '3' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4164
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4165	aCharacter == $L ifTrue:[^ '4' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4166
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4167	aCharacter == $M ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4168	aCharacter == $N ifTrue:[^ '5' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4169
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4170	aCharacter == $R ifTrue:[^ '6' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4171	^ nil
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4172
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4173	"Modified: / 02-08-2017 / 01:35:40 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4174	"Modified (comment): / 02-08-2017 / 14:30:11 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4175	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4176
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4177	!PhoneticStringUtilities::MySQLSoundexStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4178
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4179	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4180	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4181	MySQL soundex is like american Soundex (i.e. miracode) without the 4 character limitation,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4182	and also removing vokals first, then removing duplicate codes
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4183	(whereas the soundex code does this in reverse order).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4184
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4185	These variations are important, if you need the miracode soundex codes to be generated.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4186	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4187	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4188
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4189	!PhoneticStringUtilities::MySQLSoundexStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4190
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4191	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4192	"same as inherited, but cares for 0, W and H"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4193
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4194	\|u p t prevCode\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4195
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4196	u := word asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4197	p := u first asString.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4198	prevCode := self translate:u first.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4199	u from:2 to:u size do:[:c \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4200	t := self translate:c.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4201	(t notNil and:[ t ~= '0' and:[ t ~= prevCode ]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4202	p := p , t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4203	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4204	(t ~= '0' and:[ c ~= $W and:[c ~= $H]]) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4205	prevCode := t.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4206	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4207	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4208	[ p size < 4 ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4209	p := p , '0'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4210	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4211	^ p
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4212
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4213	"Created: / 28-07-2017 / 15:23:41 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4214	"Modified: / 31-07-2017 / 17:53:51 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4215	"Modified (comment): / 02-08-2017 / 14:31:15 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4216	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4217
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4218	!PhoneticStringUtilities::NYSIISStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4219
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4220	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4221	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4222	NYSIIS Algorithm:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4223
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4224	1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4225	remove all ''S'' and ''Z'' chars from the end of the surname
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4226
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4227	2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4228	transcode initial strings
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4229	MAC => MC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4230	PF => F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4231
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4232	3.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4233	Transcode trailing strings as follows,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4234
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4235	IX => IC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4236	EX => EC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4237	YE,EE,IE => Y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4238	NT,ND => D
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4239
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4240	4.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4241	transcode ''EV'' to ''EF'' if not at start of name
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4242
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4243	5.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4244	use first character of name as first character of key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4245
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4246	6.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4247	remove any ''W'' that follows a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4248
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4249	7.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4250	replace all vowels with ''A''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4251
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4252	8.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4253	transcode ''GHT'' to ''GT''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4254
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4255	9.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4256	transcode ''DG'' to ''G''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4257
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4258	10.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4259	transcode ''PH'' to ''F''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4260
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4261	11.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4262	if not first character, eliminate all ''H'' preceded or followed by a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4263
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4264	12.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4265	change ''KN'' to ''N'', else ''K'' to ''C''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4266
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4267	13.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4268	if not first character, change ''M'' to ''N''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4269
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4270	14.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4271	if not first character, change ''Q'' to ''G''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4272
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4273	15.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4274	transcode ''SH'' to ''S''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4275
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4276	16.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4277	transcode ''SCH'' to ''S''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4278
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4279	17.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4280	transcode ''YW'' to ''Y''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4281
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4282	18.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4283	if not first or last character, change ''Y'' to ''A''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4284
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4285	19.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4286	transcode ''WR'' to ''R''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4287
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4288	20.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4289	if not first character, change ''Z'' to ''S''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4290
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4291	21.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4292	transcode terminal ''AY'' to ''Y''
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4293
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4294	22.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4295	remove traling vowels
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4296
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4297	23.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4298	collapse all strings of repeated characters
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4299
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4300	24.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4301	if first char of original surname was a vowel, append it to the code
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4302	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4303	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4304
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4305	!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4306
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4307	encode:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4308	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4309
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4310	k := self rule1:(aString asUppercase).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4311	"2. Transcode initial strings: MAC => MC PF => F"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4312	k := self rule2:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4313	k := self rule3:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4314	k := self rule4:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4315	k := self rule5:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4316	k := self rule6:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4317	k := self rule7:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4318	k := self rule8:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4319	k := self rule9:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4320	k := self rule10:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4321	k := self rule11:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4322	k := self rule12:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4323	k := self rule13:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4324	k := self rule14:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4325	k := self rule15:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4326	k := self rule16:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4327	k := self rule17:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4328	k := self rule18:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4329	k := self rule19:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4330	k := self rule20:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4331	k := self rule21:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4332	k := self rule22:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4333	k := self rule23:k.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4334	k := self rule24:k originalKey:aString.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4335	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4336
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4337	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4338	self new encode:'hello'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4339	self new encode:'bliss'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4340	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4341	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4342	self new phoneticStringsFor:'hello'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4343	self new phoneticStringsFor:'bliss'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4344	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4345
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4346	"Created: / 28-07-2017 / 15:34:52 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4347	"Modified (comment): / 02-08-2017 / 14:31:47 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4348	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4349
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4350	!PhoneticStringUtilities::NYSIISStringComparator methodsFor:'private'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4351
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4352	rule10:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4353	"10. transcode 'PH' to 'F' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4354
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4355	^ self transcodeAll:'PH' of:key to:'F' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4356
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4357	"Modified (format): / 02-08-2017 / 14:34:27 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4358	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4359
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4360	rule11:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4361	\|k c\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4362
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4363	"11. if not first character, eliminate all 'H' preceded or followed by a vowel "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4364	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4365	c := SortedCollection sortBlock:[:a :b \| b < a ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4366	2 to:key size do:[:i \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4367	(key at:i) = $H ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4368	((key at:i - 1) isVowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4369	or:[ (i < key size) and:[ (key at:i + 1) isVowel ] ]) ifTrue:[ c add:i ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4370	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4371	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4372	c do:[:n \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4373	k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4374	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4375	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4376	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4377
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4378	rule12:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4379	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4380
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4381	"12. change 'KN' to 'N', else 'K' to 'C' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4382	k := self transcodeAll:'KN' of:key to:'K' startingAt:1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4383	k := self transcodeAll:'K' of:k to:'C' startingAt:1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4384	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4385
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4386	"Modified (format): / 02-08-2017 / 14:34:48 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4387	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4388
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4389	rule13:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4390	"13. if not first character, change 'M' to 'N' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4391
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4392	^ self transcodeAll:'M' of:key to:'N' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4393
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4394	"Modified (format): / 02-08-2017 / 14:35:00 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4395	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4396
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4397	rule14:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4398	"14. if not first character, change 'Q' to 'G' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4399
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4400	^ self transcodeAll:'Q' of:key to:'G' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4401
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4402	"Modified (format): / 02-08-2017 / 14:35:08 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4403	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4404
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4405	rule15:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4406	"15. transcode 'SH' to 'S' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4407
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4408	^ self transcodeAll:'SH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4409
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4410	"Modified (format): / 02-08-2017 / 14:35:18 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4411	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4412
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4413	rule16:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4414	"16. transcode 'SCH' to 'S' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4415
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4416	^ self transcodeAll:'SCH' of:key to:'S' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4417
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4418	"Modified (format): / 02-08-2017 / 14:35:25 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4419	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4420
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4421	rule17:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4422	"17. transcode 'YW' to 'Y' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4423
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4424	^ self transcodeAll:'YW' of:key to:'Y' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4425
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4426	"Modified (format): / 02-08-2017 / 14:35:33 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4427	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4428
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4429	rule18:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4430	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4431
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4432	"18. if not first or last character, change 'Y' to 'A' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4433	k := self transcodeAll:'Y' of:key to:'A' startingAt:2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4434	key last = $Y ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4435	k at:k size put:$Y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4436	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4437	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4438
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4439	"Modified (format): / 02-08-2017 / 14:35:44 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4440	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4441
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4442	rule19:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4443	"19. transcode 'WR' to 'R' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4444
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4445	^ self transcodeAll:'WR' of:key to:'R' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4446
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4447	"Modified (format): / 02-08-2017 / 14:35:52 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4448	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4449
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4450	rule1:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4451	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4452
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4453	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4454	"1. Remove all 'S' and 'Z' chars from the end of the name"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4455	[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4456	'SZ' includes:k last
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4457	] whileTrue:[ k := k copyFrom:1 to:(k size - 1) ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4458	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4459	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4460
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4461	rule20:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4462	"20. if not first character, change 'Z' to 'S' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4463
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4464	^ self transcodeAll:'Z' of:key to:'S' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4465
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4466	"Modified (format): / 02-08-2017 / 14:36:00 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4467	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4468
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4469	rule21:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4470	"21. transcode terminal 'AY' to 'Y' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4471
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4472	^ self transcodeAll:'AY' of:key to:'Y' startingAt:key size - 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4473
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4474	"Modified (format): / 02-08-2017 / 14:36:08 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4475	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4476
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4477	rule22:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4478	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4479
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4480	"22. remove trailing vowels "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4481	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4482	[ k last isVowel ] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4483	k := k copyButLast
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4484	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4485	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4486
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4487	"Modified: / 02-08-2017 / 14:36:42 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4488	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4489
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4490	rule23:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4491	\|k c\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4492
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4493	"23. collapse all strings of repeated characters "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4494	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4495	c := SortedCollection sortBlock:[:a :b \| b < a ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4496	k size to:2 do:[:i \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4497	(k at:i) = (k at:i - 1) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4498	c add:i
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4499	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4500	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4501	c do:[:n \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4502	k := (k copyFrom:1 to:n - 1) , (k copyFrom:n + 1 to:k size)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4503	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4504	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4505	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4506
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4507	rule24:key originalKey:originalKey
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4508	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4509
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4510	"24. if first char of original surname was a vowel, append it to the code"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4511	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4512	originalKey first isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4513	k := k , originalKey first asString asUppercase
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4514	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4515	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4516	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4517
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4518	rule2:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4519	"2. Transcode initial strings: MAC => MC PF => F"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4520
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4521	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4522
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4523	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4524	(k startsWith:'MAC') ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4525	k := 'MC' , (k copyFrom:4)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4526	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4527	(k startsWith:'PF') ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4528	k := 'F' , (k copyFrom:3)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4529	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4530	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4531
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4532	"Modified (format): / 02-08-2017 / 14:31:40 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4533	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4534
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4535	rule3:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4536	\|k\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4537
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4538	"3. Transcode trailing strings as follows:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4539	IX => IC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4540	EX => EC
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4541	YE, EE, IE => Y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4542	NT, ND => D"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4543
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4544	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4545	k := self transcodeTrailing:#( 'IX' ) of:k to:'IC'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4546	k := self transcodeTrailing:#( 'EX' ) of:k to:'EC'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4547	k := self transcodeTrailing:#( 'YE' 'EE' 'IE' ) of:k to:'Y'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4548	k := self transcodeTrailing:#( 'NT' 'ND' ) of:k to:'D'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4549	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4550
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4551	"Modified (format): / 02-08-2017 / 14:32:24 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4552	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4553
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4554	rule4:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4555	"4. Transcode 'EV' to 'EF' if not at start of name"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4556
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4557	^ self transcodeAll:'EV' of:key to:'EF' startingAt:2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4558
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4559	"Modified (format): / 02-08-2017 / 14:32:35 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4560	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4561
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4562	rule5:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4563	"5. Use first character of name as first character of key.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4564	Ignored because we're doing an in-place conversion"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4565
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4566	^ key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4567
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4568	"Modified (comment): / 02-08-2017 / 14:32:45 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4569	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4570
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4571	rule6:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4572	\|k i\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4573
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4574	"6. Remove any 'W' that follows a vowel"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4575	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4576	i := 2.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4577	[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4578	(i := k indexOf:$W startingAt:i) > 0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4579	] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4580	(k at:i - 1) isVowel ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4581	k := (k copyFrom:1 to:i - 1) , (k copyFrom:i + 1 to:k size).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4582	i := i - 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4583	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4584	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4585	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4586	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4587
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4588	rule7:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4589	"7. replace all vowels with 'A' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4590	^ key collect:[:ch \| ch isVowel ifTrue:[$A] ifFalse:[ch]].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4591
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4592	"Modified: / 02-08-2017 / 14:33:56 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4593	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4594
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4595	rule8:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4596	"8. transcode 'GHT' to 'GT' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4597
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4598	^ self transcodeAll:'GHT' of:key to:'GT' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4599
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4600	"Modified (format): / 02-08-2017 / 14:34:05 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4601	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4602
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4603	rule9:key
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4604	"9. transcode 'DG' to 'G' "
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4605
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4606	^ self transcodeAll:'DG' of:key to:'G' startingAt:1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4607
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4608	"Modified (format): / 02-08-2017 / 14:34:15 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4609	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4610
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4611	transcodeAll:aString of:key to:replacementString startingAt:start
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4612	\|k i\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4613
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4614	k := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4615	[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4616	(i := k indexOfSubCollection:aString startingAt:start) > 0
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4617	] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4618	k := (k copyFrom:1 to:i - 1) , replacementString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4619	, (k copyFrom:i + aString size to:k size)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4620	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4621	^ k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4622	!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4623
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4624	transcodeTrailing:anArrayOfStrings of:key to:replacementString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4625	\|answer\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4626
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4627	answer := key copy.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4628	anArrayOfStrings do:[:aString \|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4629	answer := self
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4630	transcodeAll:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4631	of:answer
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4632	to:replacementString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4633	startingAt:(answer size - aString size) + 1
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4634	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4635	^ answer
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4636	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4637
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4638	!PhoneticStringUtilities::PhonemStringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4639
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4640	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4641	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4642	Implementation of the PHONEM algorithm, as described in
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4643	'Georg Wilde and Carsten Meyer, Doppelgaenger gesucht -
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4644	Ein Programm fuer kontextsensitive phonetische Textumwandlung
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4645	ct Magazin fuer Computer & Technik 25/1998'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4646
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4647	This algorithm deals better with the german language (it cares for umlauts)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4648	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4649	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4650
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4651	!PhoneticStringUtilities::PhonemStringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4652
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4653	encode:aString
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4654	\|s idx t t2\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4655
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4656	s := aString asUppercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4657
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4658	idx := 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4659	[idx < (s size-1)] whileTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4660	t2 := nil.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4661	t := s copyFrom:idx to:idx+1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4662	t = 'SC' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4663	ifFalse:[ t = 'SZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4664	ifFalse:[ t = 'CZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4665	ifFalse:[ t = 'TZ' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4666	ifFalse:[ t = 'TS' ifTrue:[ t2 := 'C' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4667	ifFalse:[ t = 'KS' ifTrue:[ t2 := 'X' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4668	ifFalse:[ t = 'PF' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4669	ifFalse:[ t = 'QU' ifTrue:[ t2 := 'KW' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4670	ifFalse:[ t = 'PH' ifTrue:[ t2 := 'V' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4671	ifFalse:[ t = 'UE' ifTrue:[ t2 := 'Y' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4672	ifFalse:[ t = 'AE' ifTrue:[ t2 := 'E' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4673	ifFalse:[ t = 'OE' ifTrue:[ t2 := 'Ö' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4674	ifFalse:[ t = 'EI' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4675	ifFalse:[ t = 'EY' ifTrue:[ t2 := 'AY' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4676	ifFalse:[ t = 'EU' ifTrue:[ t2 := 'OY' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4677	ifFalse:[ t = 'AU' ifTrue:[ t2 := 'A§' ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4678	ifFalse:[ t = 'OU' ifTrue:[ t2 := '§ ' ]]]]]]]]]]]]]]]]].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4679	t2 notNil ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4680	s := (s copyTo:idx-1),t2,(s copyFrom:idx+2)
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4681	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4682	idx := idx + 1.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4683	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4684	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4685
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4686	"/ single character substitutions via tr
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4687	s := s copyTransliterating:'ÖÄZKGQÜIJFWPT§' to:'YECCCCYYYVVDDUA'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4688	s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'' complement:true squashDuplicates:false.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4689	s := s copyTransliterating:'ABCDLMNORSUVWXY' to:'ABCDLMNORSUVWXY' complement:false squashDuplicates:true.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4690	^ s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4691
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4692	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4693	self basicNew encode:'müller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4694	self basicNew encode:'mueller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4695	self basicNew encode:'möller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4696	self basicNew encode:'miller' -> 'MYLR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4697	self basicNew encode:'muller' -> 'MULR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4698	self basicNew encode:'muler' -> 'MULR'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4699
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4700	self basicNew phoneticStringsFor:'müller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4701	self basicNew phoneticStringsFor:'mueller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4702	self basicNew phoneticStringsFor:'möller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4703	self basicNew phoneticStringsFor:'miller' #('MYLR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4704	self basicNew phoneticStringsFor:'muller' #('MULR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4705	self basicNew phoneticStringsFor:'muler' #('MULR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4706
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4707	self basicNew phoneticStringsFor:'schmidt' #('CMYD')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4708	self basicNew phoneticStringsFor:'schneider' #('CNAYDR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4709	self basicNew phoneticStringsFor:'fischer' #('VYCR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4710	self basicNew phoneticStringsFor:'weber' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4711	self basicNew phoneticStringsFor:'weeber' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4712	self basicNew phoneticStringsFor:'webber' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4713	self basicNew phoneticStringsFor:'wepper' #('VBR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4714
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4715	self basicNew phoneticStringsFor:'meyer' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4716	self basicNew phoneticStringsFor:'maier' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4717	self basicNew phoneticStringsFor:'mayer' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4718	self basicNew phoneticStringsFor:'mayr' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4719	self basicNew phoneticStringsFor:'meir' #('MAYR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4720
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4721	self basicNew phoneticStringsFor:'wagner' #('VACNR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4722	self basicNew phoneticStringsFor:'schulz' #('CULC')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4723	self basicNew phoneticStringsFor:'becker' #('BCR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4724	self basicNew phoneticStringsFor:'hoffmann' #('OVMAN')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4725	self basicNew phoneticStringsFor:'haus' #('AUS')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4726
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4727	self basicNew phoneticStringsFor:'schäfer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4728	self basicNew phoneticStringsFor:'scheffer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4729	self basicNew phoneticStringsFor:'schaeffer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4730	self basicNew phoneticStringsFor:'schaefer' #('CVR')
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4731	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4732
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4733	"Created: / 28-07-2017 / 15:38:08 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4734	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4735
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4736	!PhoneticStringUtilities::Caverphone2StringComparator class methodsFor:'documentation'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4737
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4738	documentation
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4739	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4740	Caverphone (2) Algorithm:
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4741
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4742	see http://caversham.otago.ac.nz/files/working/ctp150804.pdf
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4743
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4744	Caverphone 2.0 is being made available for free use for the benefit of anyone who has a use for it,
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4745	with the proviso that the Caversham Project at the University of Otago should be acknowledged as the
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4746	original source (which is hereby done ;-).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4747
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4748	• Start with a Surname or Firstname
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4749	• Convert to lowercase
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4750	This coding system is case sensitive, implementations should acknowledge that a is not the same as A
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4751	• Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4752	The main intention of this is to remove spaces, hyphens, and apostrophes.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4753	example: o'brian becomes obrian
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4754	• If the name starts with cough make it cou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4755	2 is being used as a temporary placeholder to indicate a consonant which we are no longer interested in.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4756	• If the name starts with rough make it rou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4757	• If the name starts with tough make it tou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4758	• If the name starts with enough make it enou2f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4759	• If the name starts with gn make it 2n
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4760	• If the name ends with mb make it m2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4761	• replace cq with 2q
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4762	• replace ci with si
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4763	• replace ce with se
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4764	• replace cy with sy
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4765	• replace tch with 2ch
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4766	• replace c with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4767	• replace q with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4768	• replace x with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4769	• replace v with f
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4770	• replace dg with 2g
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4771	• replace tio with sio
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4772	• replace tia with sia
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4773	• replace d with t
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4774	• replace ph with fh
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4775	• replace b with p
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4776	• replace sh with s2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4777	• replace z with s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4778	• replace and initial vowel with an A
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4779	• replace all other vowels with a 3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4780	3 is a temporary placeholder marking a vowel
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4781	• replace 3gh3 with 3kh3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4782	Exceptions are dealt with before the general case. gh between vowels is an except of the more general gh rule.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4783	• replace gh with 22
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4784	• replace g with k
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4785	• replace groups of the letter s with a S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4786	Continuous strings of s are replace by a single S
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4787	• replace groups of the letter t with a T
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4788	• replace groups of the letter p with a P
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4789	• replace groups of the letter k with a K
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4790	• replace groups of the letter f with a F
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4791	• replace groups of the letter m with a M
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4792	• replace groups of the letter n with a N
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4793	• replace w3 with W3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4794	• replace wy with Wy
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4795	• replace wh3 with Wh3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4796	• replace why with Why
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4797	• replace w with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4798	• replace and initial h with an A
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4799	• replace all other occurrences of h with a 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4800	• replace r3 with R3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4801	• replace ry with Ry
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4802	• replace r with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4803	• replace l3 with L3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4804	• replace ly with Ly
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4805	• replace l with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4806	• replace j with y
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4807	• replace y3 with Y3
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4808	• replace y with 2
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4809	• remove all 2s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4810	• remove all 3s
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4811	• put six (v1) / ten (v2) 1s on the end
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4812	• take the first six characters as the code (caverphone 1);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4813	/ take the first ten characters as the code (caverphone 2);
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4814
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4815	self new encode:'david' -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4816	self new encode:'whittle' -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4817
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4818	self new encode:'Stevenson' -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4819	self new encode:'Peter' -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4820
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4821	self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4822	self new encode:'lee' -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4823	self new encode:'Gutierrez' -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4824	self new encode:'Pfister' -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4825	self new encode:'Jackson' -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4826	self new encode:'Tymczak' -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4827
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4828	self new encode:'add' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4829	self new encode:'aid' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4830	self new encode:'at' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4831	self new encode:'art' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4832	self new encode:'earth' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4833	self new encode:'head' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4834	self new encode:'old' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4835
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4836	self new encode:'ready' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4837	self new encode:'rather' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4838	self new encode:'able' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4839	self new encode:'appear' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4840
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4841	self new encode:'Deedee' -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4842	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4843	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4844
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4845	!PhoneticStringUtilities::Caverphone2StringComparator methodsFor:'api'!
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4846
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4847	encode:word
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4848	\|txt\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4849
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4850	word size == 0 ifTrue:[^ '1111111111' ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4851
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4852	"/ 1. Convert to lowercase
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4853	txt := word asLowercase.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4854
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4855	"/ 2. Remove anything not A-Z
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4856	txt := txt select:#isLetter.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4857
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4858	#(
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4859	"/ oldSeq newSeq repeat
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4860
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4861	"/ 2.5. Remove final e
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4862	'e$' '' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4863	"/ 3. Handle various start options
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4864	'^cough' 'cou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4865	'^rough' 'rou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4866	'^tough' 'tou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4867	'^enough' 'enou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4868	'^trough' 'trou2f' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4869
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4870	'^gn' '2n' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4871	'mb$' 'm2' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4872
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4873	"/ 4. Handle replacements
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4874	'cq' '2q' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4875	'ci' 'si' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4876	'ce' 'se' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4877	'cy' 'sy' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4878	'tch' '2ch' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4879	'c' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4880	'q' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4881	'x' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4882	'v' 'f' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4883	'dg' '2g' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4884	'tio' 'sio' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4885	'tia' 'sia' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4886	'd' 't' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4887	'ph' 'fh' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4888	'b' 'p' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4889	'sh' 's2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4890	'z' 's' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4891
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4892	'^a' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4893	'^e' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4894	'^i' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4895	'^o' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4896	'^u' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4897
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4898	'a' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4899	'e' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4900	'i' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4901	'o' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4902	'u' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4903	'j' 'y' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4904
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4905	'^y3' 'Y3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4906	'^y' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4907
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4908	'y' '3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4909	'3gh3' '3kh3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4910	'gh' '22' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4911	'g' 'k' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4912	's' 'S' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4913	'SS' 'S' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4914	't' 'T' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4915	'TT' 'T' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4916	'p' 'P' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4917	'PP' 'P' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4918	'k' 'K' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4919	'KK' 'K' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4920	'f' 'F' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4921	'FF' 'F' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4922	'm' 'M' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4923	'MM' 'M' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4924	'n' 'N' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4925	'NN' 'N' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4926	'w3' 'W3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4927	'wh3' 'Wh3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4928	'w$' '3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4929	'w' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4930	'^h' 'A' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4931	'h' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4932	'r3' 'R3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4933	'r$' '3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4934	'r' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4935	'l3' 'L3' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4936	'l$' '3' false
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4937	'l' '2' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4938
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4939	"/ 5. removals
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4940
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4941	'2' '' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4942	'3$' 'A' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4943	'3' '' true
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4944	) inGroupsOf:3 do:[:pat :repl :repeat\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4945	\|s txtBefore\|
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4946
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4947	txtBefore := txt.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4948	(pat startsWith:$^) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4949	s := pat copyButFirst.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4950	repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4951	[txt startsWith:s] whileTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4952	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4953	(txt startsWith:s) ifTrue:[ txt := repl,(txt copyButFirst:s size) ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4954	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4955	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4956	(pat endsWith:$$) ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4957	s := pat copyButLast.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4958	repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4959	[txt endsWith:s] whileTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4960	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4961	(txt endsWith:s) ifTrue:[ txt := (txt copyButLast:s size),repl ]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4962	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4963	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4964	repeat ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4965	txt := txt copyReplaceAllSubcollections:pat with:repl
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4966	] ifFalse:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4967	txt := txt copyReplaceSubcollection:pat with:repl
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4968	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4969	]
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4970	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4971	"/ txt ~= txtBefore ifTrue:[
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4972	"/ Transcript showCR:(pat,' \| ',repl,' -> ',txt).
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4973	"/ ].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4974	].
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4975
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4976	"/ 6. put ten 1s on the end
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4977	txt := txt,'1111111111'.
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4978
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4979	"/ 7. take the first ten characters as the code
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4980	^ txt copyTo:10
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4981
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4982	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4983	self new encode:'david' -> 'TFT1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4984	self new encode:'whittle' -> 'WTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4985
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4986	self new encode:'Stevenson' -> 'STFNSN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4987	self new encode:'Peter' -> 'PTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4988
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4989	self new encode:'washington' -> 'WSNKTN1111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4990	self new encode:'lee' -> 'LA11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4991	self new encode:'Gutierrez' -> 'KTRS111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4992	self new encode:'Pfister' -> 'PFSTA11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4993	self new encode:'Jackson' -> 'YKSN111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4994	self new encode:'Tymczak' -> 'TMKSK11111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4995
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4996	self new encode:'add' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4997	self new encode:'aid' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4998	self new encode:'at' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	4999	self new encode:'art' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5000	self new encode:'earth' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5001	self new encode:'head' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5002	self new encode:'old' -> 'AT11111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5003
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5004	self new encode:'ready' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5005	self new encode:'rather' -> 'RTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5006	self new encode:'able' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5007	self new encode:'appear' -> 'APA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5008
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5009	self new encode:'Deedee' -> 'TTA1111111'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5010	"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5011
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5012	"Created: / 28-07-2017 / 15:21:23 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5013	"Modified: / 02-08-2017 / 01:42:35 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5014	! !
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5015
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5016	!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator class methodsFor:'documentation'!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5017
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5018	documentation
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5019	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5020	The 'Kölner Phonetik' (cologne phonetic) code is for the german language
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5021	what the soundex code is for english:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5022	it returns similar strings for similar sounding words
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5023	(but is specifically aware of the pronunciation of German and eastern languages) .
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5024
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5025	There are some other differences to soundex, though:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5026	its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5027	it does not start with the first character of the input, but returns a pure numeric string.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5028
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5029	This algorithm was described by Postel 1969,
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5030	See http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5031
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5032	self new phoneticStringsFor:'Müller-Lüdenscheidt' -> #('65752682')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5033	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5034	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5035
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5036	examples
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5037	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5038	words sounding similar (german pronunciation) will deliver a similar code:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5039
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5040	#(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5041	'Müller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5042	'Miller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5043	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5044	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5045	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5046	'Mülherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5047	'Myler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5048	'Millar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5049	'Myller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5050	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5051	'Müler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5052	'Muehler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5053	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5054	'Müllerr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5055	'Muehlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5056	'Muellar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5057	'Mueler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5058	'Mülleer'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5059	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5060	'Nüller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5061	'Nyller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5062	'Niler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5063	'Czerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5064	'Tscherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5065	'Czernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5066	'Tschernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5067	'Schernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5068	'Scherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5069	'Scherno'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5070	'Czerne'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5071	'Zerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5072	'Tzernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5073	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5074	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5075	'Breschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5076	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5077	'Braeschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5078	'Braessneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5079	'Pressneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5080	'Presznäph'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5081	'Präschnäf'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5082	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5083	'Breschnijeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5084	'Breschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5085	'Bräschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5086	'Braschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5087	'Broschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5088	) do:[:w \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5089	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5090	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5091	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5092	! !
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5093
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5094	!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'api'!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5095
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5096	encode: aString
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5097	"return a koelner phonetic code.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5098	The koelnerPhonetic code is for the german language what the soundex code is for english;
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5099	it returns simular strings for similar sounding words.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5100	There are some differences to soundex, though:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5101	its length is not limited to 4, but depends on the length of the original string;
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5102	it does not start with the first character of the input.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5103	This algorithm is described by Postel 1969"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5104
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5105	\|in ret val rslt\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5106
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5107	in := aString withoutSeparators asLowercase.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5108	in := in copyReplaceString:'ph' withString:'f'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5109	(in includesAny:'öäüß') ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5110	in := in copyReplaceAll:$ü withAll:'u'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5111	in := in copyReplaceAll:$ä withAll:'a'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5112	in := in copyReplaceAll:$ö withAll:'o'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5113	in := in copyReplaceAll:$ß withAll:'ss'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5114	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5115	in := in select:[:ch \| ch isLetter].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5116	in := '#',in,'#'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5117
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5118	ret := ''.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5119	1 to:in size-2 do:[:i \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5120	\|sub\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5121
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5122	sub := in copyFrom:i to:i+2.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5123	val := (i==1)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5124	ifTrue:[ self convertFirst:sub ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5125	ifFalse:[ self convertRest:sub ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5126	ret := ret,val
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5127	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5128
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5129	ret := ret select:[:ch \| ch ~= $-].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5130
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5131	(ret startsWith:'0') ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5132	ret := '0',(ret select:[:ch \| ch ~= $0]).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5133	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5134	ret := ret select:[:ch \| ch ~= $0].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5135	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5136
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5137	rslt := String streamContents:[:s \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5138	\|prev\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5139
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5140	ret do:[:ch \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5141	ch ~= prev ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5142	s nextPut:ch
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5143	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5144	prev := ch.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5145	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5146	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5147	^ rslt.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5148
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5149	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5150	#(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5151	'Müller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5152	'Miller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5153	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5154	'Mühler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5155	'Mühlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5156	'Mülherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5157	'Myler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5158	'Millar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5159	'Myller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5160	'Müllar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5161	'Müler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5162	'Muehler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5163	'Mülller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5164	'Müllerr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5165	'Muehlherr'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5166	'Muellar'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5167	'Mueler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5168	'Mülleer'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5169	'Mueller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5170	'Nüller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5171	'Nyller'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5172	'Niler'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5173	'Czerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5174	'Tscherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5175	'Czernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5176	'Tschernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5177	'Schernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5178	'Scherny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5179	'Scherno'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5180	'Czerne'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5181	'Zerny'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5182	'Tzernie'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5183	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5184	'Breschnew'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5185	'Breschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5186	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5187	'Braeschneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5188	'Braessneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5189	'Pressneff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5190	'Presznäph'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5191	'Präschnäf'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5192	'Breschnjeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5193	'Breschnijeff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5194	'Breschnieff'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5195	) do:[:w \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5196	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:w)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5197	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5198	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5199
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5200	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5201	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnew' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5202	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5203	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braeschneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5204	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Braessneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5205	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Pressneff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5206	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Presznäph' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5207	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Präschnäf' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5208	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnjeff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5209	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnijeff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5210	PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator new encode:'Breschnieff' -> '17863'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5211	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5212	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5213	self basicNew encode:'müller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5214	self basicNew encode:'möller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5215	self basicNew encode:'miller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5216	self basicNew encode:'muller' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5217	self basicNew encode:'muler' -> '657'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5218	self basicNew encode:'schmidt' -> '862'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5219	self basicNew encode:'schneider' -> '8627'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5220	self basicNew encode:'fischer' -> '387'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5221	self basicNew encode:'weber' -> '317'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5222	self basicNew encode:'meyer' -> '67'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5223	self basicNew encode:'wagner' -> '3467'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5224	self basicNew encode:'schulz' -> '858'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5225	self basicNew encode:'becker' -> '147'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5226	self basicNew encode:'hoffmann' -> '036'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5227	self basicNew encode:'schäfer' -> '837'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5228	"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5229
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5230	"Created: / 28-07-2017 / 15:24:33 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5231	! !
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5232
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5233	!PhoneticStringUtilities::KoelnerPhoneticCodeStringComparator methodsFor:'private'!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5234
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5235	convertFirst:chars
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5236	\|c2 c3\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5237
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5238	chars size == 3 ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5239	c2 := (chars at:2).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5240	c2 == $a ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5241	c2 == $e ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5242	c2 == $i ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5243	c2 == $j ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5244	c2 == $y ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5245	c2 == $o ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5246	c2 == $u ifTrue:[^ '0'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5247
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5248	c2 == $c ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5249	c3 := (chars at:3).
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5250	(c3 == $a) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5251	(c3 == $h) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5252	(c3 == $k) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5253	(c3 == $l) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5254	(c3 == $o) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5255	(c3 == $q) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5256	(c3 == $r) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5257	(c3 == $u) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5258	(c3 == $x) ifTrue:[^ '4'].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5259	^ '8'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5260	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5261
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5262	"/ #(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5263	"/ ('#a#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5264	"/ ('#e#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5265	"/ ('#i#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5266	"/ ('#j#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5267	"/ ('#y#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5268	"/ ('#o#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5269	"/ ('#u#' '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5270	"/
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5271	"/ ('#ca' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5272	"/ ('#ch' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5273	"/ ('#ck' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5274	"/ ('#cl' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5275	"/ ('#co' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5276	"/ ('#cq' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5277	"/ ('#cr' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5278	"/ ('#cu' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5279	"/ ('#cx' '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5280	"/
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5281	"/ ('#c#' '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5282	"/ ) do:[:pair \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5283	"/ (pair first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5284	"/ ^ pair second
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5285	"/ ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5286	"/ ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5287	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5288
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5289	^ self convertRest:chars
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5290
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5291	"Modified: / 29-07-2017 / 14:22:20 / cg"
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5292	!
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5293
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5294	convertRest:chars
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5295	chars size == 3 ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5296	self error:'cannot happen'.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5297	^ '?'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5298	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5299
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5300	#(
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5301	"/ used to be matchpattern code,
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5302	"/ but doing these glob-matches is too slow.
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5303	"/ changed to:
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5304	"/ start nil code
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5305	"/ nil end code
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5306	"/ nil char code
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5307	"/
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5308	(nil 'ds' " '#ds' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5309	(nil 'dc' " '#dc' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5310	(nil 'dz' " '#dz' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5311	(nil 'ts' " '#ts' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5312	(nil 'tc' " '#tc' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5313	(nil 'tz' " '#tz' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5314	(nil $d " '#d#' " '2')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5315	(nil $t " '#t#' " '2')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5316	('cx' nil " 'cx#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5317	('kx' nil " 'kx#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5318	('qx' nil " 'qx#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5319	(nil $x " '#x#' " '48')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5320	('sc' nil " 'sc#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5321	('sz' nil " 'sz#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5322	(nil 'ca' " '#ca' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5323	(nil 'co' " '#co' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5324	(nil 'cu' " '#cu' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5325	(nil 'ch' " '#ch' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5326	(nil 'ck' " '#ck' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5327	(nil 'cx' " '#cx' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5328	(nil 'cq' " '#cq' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5329	(nil $c " '#c#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5330	(nil $a " '#a#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5331	(nil $e " '#e#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5332	(nil $i " '#i#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5333	(nil $j " '#j#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5334	(nil $y " '#y#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5335	(nil $o " '#o#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5336	(nil $u " '#u#' " '0')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5337	(nil $h " '#h#' " '-')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5338	(nil $l " '#l#' " '5')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5339	(nil $r " '#r#' " '7')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5340	(nil $m " '#m#' " '6')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5341	(nil $n " '#n#' " '6')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5342	(nil $s " '#s#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5343	(nil $z " '#z#' " '8')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5344	(nil $b " '#b#' " '1')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5345	(nil $p " '#p#' " '1')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5346	(nil $f " '#f#' " '3')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5347	(nil $v " '#v#' " '3')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5348	(nil $w " '#w#' " '3')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5349	(nil $g " '#g#' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5350	(nil $k " '#k#' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5351	(nil $q " '#q#' " '4')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5352	(nil nil " '###' " '?')
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5353	) do:[:vector \|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5354	\|v1 v2\|
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5355
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5356	(v1 := vector at:1) notNil ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5357	"/ prefix
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5358	(chars startsWith:v1) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5359	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5360	(v2 := vector at:2) isCharacter ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5361	"/ middle character compare
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5362	(chars at:2) == v2 ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5363	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5364	v2 isString ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5365	"/ suffix
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5366	(chars endsWith:v2) ifTrue:[^ (vector at:3) ].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5367	] ifFalse:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5368	^ '?'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5369	]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5370	]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5371	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5372
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5373	"/ (vector first match:chars) ifTrue:[
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5374	"/ ^ vector second
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5375	"/ ]
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5376	].
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5377
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5378	self error:'cannot happen'
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5379
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5380	"Modified: / 29-07-2017 / 14:17:38 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5381	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5382
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5383	!PhoneticStringUtilities::MiracodeStringComparator class methodsFor:'documentation'!
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5384
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5385	documentation
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5386	"
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5387	Miracode (also called << American Soundex >>) is like Soundex with the
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5388	addition that h and w are discarded if they separate consonants.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5389
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5390	These variants may be specifically important because they were used in
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5391	U.S. National Archives.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5392	Most archive data were encoded with Miracode,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5393	but there are some (older) entries encoded with Simplified Soundex.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5394
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5395	The HW-rule was documented as a standard in 1910,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5396	but actually data of 1880, 1900 and 1910
3185 9833bbba2050 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 2580 diff changeset	5397	censuses were encoded with mixed methods.
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5398
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5399	self new encode:'washington' -> 'W252'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5400	self new encode:'lee' -> 'L000'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5401	self new encode:'Gutierrez' -> 'G362'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5402	self new encode:'Pfister' -> 'P236'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5403	self new encode:'Jackson' -> 'J250'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5404	self new encode:'Tymczak' -> 'T522'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5405
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5406	notice:
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5407	MiracodeStringComparator new encode:'Ashcraft' -> 'A261'
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5408	SoundexStringComparator new encode:'Ashcraft' -> 'A226'
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5409
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5410	see also:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5411	https://www.archives.gov/research/census/soundex.html
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5412	"
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5413	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5414
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5415	!PhoneticStringUtilities::MiracodeStringComparator methodsFor:'private'!
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5416
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5417	encode:word
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5418	"same as inherited, but cares for W and H"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5419
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5420	\|u p t prevCode\|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5421
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5422	u := word asUppercase.
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5423	p := u first asString.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5424	prevCode := self translate:u first.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5425	u from:2 to:u size do:[:c \|
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5426	t := self translate:c.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5427	(t notNil
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5428	and:[ t ~= '0'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5429	and:[ t ~= prevCode ]]) ifTrue:[
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5430	p := p , t.
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5431	p size == 4 ifTrue:[^ p ].
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5432	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5433	(c ~= $W and:[c ~= $H]) ifTrue:[
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5434	prevCode := t.
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5435	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5436	].
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5437	[ p size < 4 ] whileTrue:[
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5438	p := p , '0'
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5439	].
4488 51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5440	^ (p copyFrom:1 to:4)
51f2907c7389 #BUGFIX by cg Claus Gittinger <cg@exept.de> parents: 4487 diff changeset	5441
4491 d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5442	"Created: / 02-08-2017 / 00:19:47 / cg"
d6c31bb1e928 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4490 diff changeset	5443	"Modified (comment): / 02-08-2017 / 14:30:47 / cg"
4489 2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5444	! !
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5445
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5446	!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator class methodsFor:'documentation'!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5447
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5448	documentation
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5449	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5450	The 'Spanish Phonetik' (spanish phonetic) code is for the spanish language
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5451	what the soundex code is for english:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5452	it returns similar strings for similar sounding words
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5453	(but is specifically aware of the pronunciation of spanish) .
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5454
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5455	There are some other differences to soundex, though:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5456	its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5457	it does not start with the first character of the input,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5458	but returns a pure numeric string,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5459	it uses different character groups
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5460
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5461	This algorithm was described by Marıa del Pilar Angeles, Adrian Espino-Gamez,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5462	and Jonathan Gil-Moncada, in
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5463	'Comparison of a Modiﬁed Spanish phonetic,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5464	Soundex, and Phonex coding functions during data matching process'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5465	See https://www.researchgate.net/publication/285589803_Comparison_of_a_Modified_Spanish_Phonetic_Soundex_and_Phonex_coding_functions_during_data_matching_process
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5466
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5467	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5468	!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5469
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5470	examples
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5471	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5472	words sounding similar (german pronunciation) will deliver a similar code:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5473
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5474	#(
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5475	'Marıa'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5476	'Pilar'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5477	'Angeles'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5478	'Adrian'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5479	'Gamez'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5480	) do:[:w \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5481	Transcript show:w; show:'->'; showCR:(PhoneticStringUtilities::SpanishPhoneticCodeStringComparator new encode:w)
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5482	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5483	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5484	! !
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5485
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5486	!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'api'!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5487
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5488	encode: aString
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5489	"return a spanish phonetic code.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5490	The spanishPhonetic code is for the spanish language what the soundex code is for english;
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5491	it returns simular strings for similar sounding words.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5492	There are some differences to soundex, though:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5493	its length is not limited to 4, but depends on the length of the original string;
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5494	it does not start with the first character of the input,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5495	it uses different character groups.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5496	This algorithm is described by Marıa del Pilar Angeles, Adrian Espino-Gamez,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5497	Jonathan Gil-Moncada."
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5498
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5499	\|in\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5500
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5501	in := aString withoutSeparators asUppercase.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5502
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5503	^ String streamContents:[:out \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5504	\|prev\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5505
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5506	in do:[:ch \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5507	ch == prev ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5508	ch == $P ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5509	out nextPut:$0.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5510	] ifFalse:[ ('BV' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5511	out nextPut:$1.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5512	] ifFalse:[ ('FH' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5513	out nextPut:$2.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5514	] ifFalse:[ ('DT' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5515	out nextPut:$3.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5516	] ifFalse:[ ('SZCX' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5517	out nextPut:$4.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5518	] ifFalse:[ ('YL' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5519	out nextPut:$5.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5520	] ifFalse:[ ('NŃM' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5521	out nextPut:$6.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5522	] ifFalse:[ ('QK' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5523	out nextPut:$7.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5524	] ifFalse:[ ('GJ' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5525	out nextPut:$8.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5526	] ifFalse:[ ('R' includes:ch) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5527	out nextPut:$9.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5528	]]]]]]]]]].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5529	prev := ch.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5530	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5531	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5532	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5533
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5534	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5535	self new encode:'Jose'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5536	"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5537
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5538	"Created: / 28-07-2017 / 15:24:33 / cg"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5539	"Modified: / 01-08-2017 / 18:48:50 / cg"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5540	! !
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5541
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5542	!PhoneticStringUtilities::SpanishPhoneticCodeStringComparator methodsFor:'private'!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5543
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5544	convertFirst:chars
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5545	\|c2 c3\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5546
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5547	chars size == 3 ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5548	c2 := (chars at:2).
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5549	c2 == $a ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5550	c2 == $e ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5551	c2 == $i ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5552	c2 == $j ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5553	c2 == $y ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5554	c2 == $o ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5555	c2 == $u ifTrue:[^ '0'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5556
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5557	c2 == $c ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5558	c3 := (chars at:3).
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5559	(c3 == $a) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5560	(c3 == $h) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5561	(c3 == $k) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5562	(c3 == $l) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5563	(c3 == $o) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5564	(c3 == $q) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5565	(c3 == $r) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5566	(c3 == $u) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5567	(c3 == $x) ifTrue:[^ '4'].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5568	^ '8'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5569	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5570
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5571	"/ #(
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5572	"/ ('#a#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5573	"/ ('#e#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5574	"/ ('#i#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5575	"/ ('#j#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5576	"/ ('#y#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5577	"/ ('#o#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5578	"/ ('#u#' '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5579	"/
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5580	"/ ('#ca' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5581	"/ ('#ch' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5582	"/ ('#ck' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5583	"/ ('#cl' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5584	"/ ('#co' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5585	"/ ('#cq' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5586	"/ ('#cr' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5587	"/ ('#cu' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5588	"/ ('#cx' '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5589	"/
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5590	"/ ('#c#' '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5591	"/ ) do:[:pair \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5592	"/ (pair first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5593	"/ ^ pair second
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5594	"/ ]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5595	"/ ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5596	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5597
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5598	^ self convertRest:chars
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5599
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5600	"Modified: / 29-07-2017 / 14:22:20 / cg"
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5601	!
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5602
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5603	convertRest:chars
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5604	chars size == 3 ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5605	self error:'cannot happen'.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5606	^ '?'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5607	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5608
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5609	#(
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5610	"/ used to be matchpattern code,
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5611	"/ but doing these glob-matches is too slow.
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5612	"/ changed to:
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5613	"/ start nil code
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5614	"/ nil end code
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5615	"/ nil char code
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5616	"/
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5617	(nil 'ds' " '#ds' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5618	(nil 'dc' " '#dc' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5619	(nil 'dz' " '#dz' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5620	(nil 'ts' " '#ts' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5621	(nil 'tc' " '#tc' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5622	(nil 'tz' " '#tz' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5623	(nil $d " '#d#' " '2')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5624	(nil $t " '#t#' " '2')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5625	('cx' nil " 'cx#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5626	('kx' nil " 'kx#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5627	('qx' nil " 'qx#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5628	(nil $x " '#x#' " '48')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5629	('sc' nil " 'sc#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5630	('sz' nil " 'sz#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5631	(nil 'ca' " '#ca' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5632	(nil 'co' " '#co' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5633	(nil 'cu' " '#cu' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5634	(nil 'ch' " '#ch' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5635	(nil 'ck' " '#ck' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5636	(nil 'cx' " '#cx' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5637	(nil 'cq' " '#cq' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5638	(nil $c " '#c#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5639	(nil $a " '#a#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5640	(nil $e " '#e#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5641	(nil $i " '#i#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5642	(nil $j " '#j#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5643	(nil $y " '#y#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5644	(nil $o " '#o#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5645	(nil $u " '#u#' " '0')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5646	(nil $h " '#h#' " '-')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5647	(nil $l " '#l#' " '5')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5648	(nil $r " '#r#' " '7')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5649	(nil $m " '#m#' " '6')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5650	(nil $n " '#n#' " '6')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5651	(nil $s " '#s#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5652	(nil $z " '#z#' " '8')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5653	(nil $b " '#b#' " '1')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5654	(nil $p " '#p#' " '1')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5655	(nil $f " '#f#' " '3')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5656	(nil $v " '#v#' " '3')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5657	(nil $w " '#w#' " '3')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5658	(nil $g " '#g#' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5659	(nil $k " '#k#' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5660	(nil $q " '#q#' " '4')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5661	(nil nil " '###' " '?')
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5662	) do:[:vector \|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5663	\|v1 v2\|
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5664
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5665	(v1 := vector at:1) notNil ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5666	"/ prefix
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5667	(chars startsWith:v1) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5668	] ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5669	(v2 := vector at:2) isCharacter ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5670	"/ middle character compare
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5671	(chars at:2) == v2 ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5672	] ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5673	v2 isString ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5674	"/ suffix
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5675	(chars endsWith:v2) ifTrue:[^ (vector at:3) ].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5676	] ifFalse:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5677	^ '?'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5678	]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5679	]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5680	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5681
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5682	"/ (vector first match:chars) ifTrue:[
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5683	"/ ^ vector second
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5684	"/ ]
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5685	].
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5686
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5687	self error:'cannot happen'
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5688
2d7af11ffcd7 #FEATURE by cg Claus Gittinger <cg@exept.de> parents: 4488 diff changeset	5689	"Modified: / 29-07-2017 / 14:17:38 / cg"
2208 d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5690	! !
d430693b581a +mySQL soundex Claus Gittinger <cg@exept.de> parents: 2207 diff changeset	5691
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5692	!PhoneticStringUtilities class methodsFor:'documentation'!
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5693
33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5694	version
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	5695	^ '$Header$'
2285 0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	5696	!
0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	5697
0527d18cfec9 changed: #documentation Claus Gittinger <cg@exept.de> parents: 2215 diff changeset	5698	version_CVS
3646 82247702d48b #DOCUMENTATION Claus Gittinger <cg@exept.de> parents: 3489 diff changeset	5699	^ '$Header$'
2197 33e71ed6cf32 initial checkin Claus Gittinger <cg@exept.de> parents: diff changeset	5700	! !
3185 9833bbba2050 class: PhoneticStringUtilities Claus Gittinger <cg@exept.de> parents: 2580 diff changeset	5701

author	Claus Gittinger <cg@exept.de>
	Sat, 02 May 2020 21:40:13 +0200
changeset 5476	7355a4b11cb6
parent 5456	3040ec2b4531
permissions	-rw-r--r--