#DOCUMENTATION by cg
class: PhoneticStringUtilities::MetaphoneStringComparator
removed: #maxCodeLen
changed: #encode:
class: PhoneticStringUtilities::MetaphoneStringComparator class
comment/format in: #documentation
--- a/PhoneticStringUtilities.st Wed Aug 02 16:41:52 2017 +0200
+++ b/PhoneticStringUtilities.st Thu Aug 03 15:05:59 2017 +0200
@@ -3068,6 +3068,8 @@
documentation
"
+ Ongoing work - do not use at the moment
+
Encodes a string into a Metaphone value.
Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
@@ -3580,12 +3582,11 @@
self new encode:'dumb'
"
- |hard txtLength local code inwd ch ch2 wdsz n maxCodeLen|
+ |hard txtLength local code inwd ch ch2 wdsz n|
inwd := txt.
hard := false.
txtLength := 0.
- maxCodeLen := self maxCodeLen.
(txtLength := txt size) == 0 ifTrue:[^ ''].
@@ -3640,7 +3641,7 @@
wdsz := local size.
n := 1.
- [ (code size < maxCodeLen) and:[ n <= wdsz ] ] whileTrue:[
+ [ n <= wdsz ] whileTrue:[
"/ max code size of 4 works well
|symb prevChar nextChar nextNextChar isLastChar isPrevToLastChar|
@@ -3655,8 +3656,8 @@
nextNextChar := local at:(n+2)
].
- "/ remove duplicate letters except C
- (symb ~~ $C and:[ nextChar == symb ]) ifFalse:[
+ "/ remove duplicate letters except C and except first
+ (symb == $C or:[ nextChar ~~ symb or:[ n == 1] ]) ifTrue:[
"/ not dup
('AEIOU' includes:symb) ifTrue:[
"/ only use vowel if leading char
@@ -3671,7 +3672,7 @@
"/ }
"/ code.append(symb);
"/ break;
- ((prevChar == $M) and:[isLastChar]) ifTrue:[
+ (isLastChar and:[ prevChar == $M]) ifTrue:[
"/ B is silent if word ends in MB
] ifFalse:[
code nextPut:symb.
@@ -3890,51 +3891,59 @@
] ifFalse:[
('S' includes:symb) ifTrue:[
-"/ case 'S':
-"/ if (regionMatch(local,n,"SH") ||
-"/ regionMatch(local,n,"SIO") ||
-"/ regionMatch(local,n,"SIA")) {
-"/ code.append('X');
-"/ } else {
-"/ code.append('S');
-"/ }
-"/ break;
+ "/ case 'S':
+ "/ if (regionMatch(local,n,"SH") ||
+ "/ regionMatch(local,n,"SIO") ||
+ "/ regionMatch(local,n,"SIA")) {
+ "/ code.append('X');
+ "/ } else {
+ "/ code.append('S');
+ "/ }
+ "/ break;
"/ SH -> X (as in shave or ashton)
"/ SIO -> X
"/ SIA -> X (as in ASIA)
((nextChar == $H)
- or:[
- ((nextChar == $I)
- and:[
- (((local from:n) startsWith:'SIO')
- or:[ ((local from:n) startsWith:'SIA') ])
- ]
- )
- ]) ifTrue:[
- code nextPut:$X
+ or:[ ((nextChar == $I) and:[ 'OA' includes:nextNextChar])]
+ ) ifTrue:[
+ code nextPut:$X
] ifFalse:[
- code nextPut:$S
+ code nextPut:$S
]
] ifFalse:[
('T' includes:symb) ifTrue:[
-"/ case 'T':
-"/ if (regionMatch(local,n,"TIA") ||
-"/ regionMatch(local,n,"TIO")) {
-"/ code.append('X');
-"/ break;
-"/ }
-"/ if (regionMatch(local,n,"TCH")) {
-"/ // Silent if in "TCH"
-"/ break;
-"/ }
-"/ // substitute numeral 0 for TH (resembles theta after all)
-"/ if (regionMatch(local,n,"TH")) {
-"/ code.append('0');
-"/ } else {
-"/ code.append('T');
-"/ }
-"/ break;
- self halt.
+ "/ case 'T':
+ "/ if (regionMatch(local,n,"TIA") ||
+ "/ regionMatch(local,n,"TIO")) {
+ "/ code.append('X');
+ "/ break;
+ "/ }
+ "/ if (regionMatch(local,n,"TCH")) {
+ "/ // Silent if in "TCH"
+ "/ break;
+ "/ }
+ "/ // substitute numeral 0 for TH (resembles theta after all)
+ "/ if (regionMatch(local,n,"TH")) {
+ "/ code.append('0');
+ "/ } else {
+ "/ code.append('T');
+ "/ }
+ "/ break;
+ (nextChar == $I and:[ 'AO' includes:nextNextChar]) ifTrue:[
+ code nextPut:$X.
+ ] ifFalse:[
+ (nextChar == $C and:[ nextNextChar == $H]) ifTrue:[
+ "/ Silent if in "TCH"
+ "/ cg - huh; hutch - methinksthereisat
+ ] ifFalse:[
+ "/ substitute numeral 0 for TH (resembles theta after all)
+ nextChar == $H ifTrue:[
+ code nextPut:$0.
+ ] ifFalse:[
+ code nextPut:$T.
+ ].
+ ].
+ ].
] ifFalse:[
('V' includes:symb) ifTrue:[
"/ case 'V':
@@ -3969,26 +3978,17 @@
"/ break;
code nextPut:$S
] ifFalse:[
-"/ default:
-"/ // do nothing
-"/ break;
+ "/ default:
+ "/ // do nothing
+ "/ break;
]]]]]]]]]]]]]]]]. "/ end switch
]. "/ end else from symb !!= 'C'
n := n + 1.
- (code size > maxCodeLen) ifTrue:[
- code := code truncateTo:maxCodeLen
- ]
].
^ code contents
"Created: / 02-08-2017 / 09:51:31 / cg"
- "Modified: / 02-08-2017 / 12:00:38 / cg"
-!
-
-maxCodeLen
- ^ 4
-
- "Created: / 02-08-2017 / 09:51:59 / cg"
+ "Modified: / 03-08-2017 / 14:55:22 / cg"
! !
!PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!