#DOCUMENTATION by cg
authorClaus Gittinger <cg@exept.de>
Thu, 03 Aug 2017 15:05:59 +0200
changeset 4495 5d2da4bddbda
parent 4494 8d08d2d86136
child 4496 7ba22942a728
#DOCUMENTATION by cg class: PhoneticStringUtilities::MetaphoneStringComparator removed: #maxCodeLen changed: #encode: class: PhoneticStringUtilities::MetaphoneStringComparator class comment/format in: #documentation
PhoneticStringUtilities.st
--- a/PhoneticStringUtilities.st	Wed Aug 02 16:41:52 2017 +0200
+++ b/PhoneticStringUtilities.st	Thu Aug 03 15:05:59 2017 +0200
@@ -3068,6 +3068,8 @@
 
 documentation
 "
+   Ongoing work - do not use at the moment
+   
    Encodes a string into a Metaphone value.
 
    Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
@@ -3580,12 +3582,11 @@
      self new encode:'dumb'
     "
     
-    |hard txtLength local code inwd ch ch2 wdsz n maxCodeLen|
+    |hard txtLength local code inwd ch ch2 wdsz n|
 
     inwd := txt.
     hard := false.
     txtLength := 0.
-    maxCodeLen := self maxCodeLen.
     
     (txtLength := txt size) == 0 ifTrue:[^ ''].
 
@@ -3640,7 +3641,7 @@
     wdsz := local size.
     n := 1.
 
-    [ (code size < maxCodeLen) and:[ n <= wdsz ] ] whileTrue:[
+    [ n <= wdsz ] whileTrue:[
         "/ max code size of 4 works well
 
         |symb prevChar nextChar nextNextChar isLastChar isPrevToLastChar|
@@ -3655,8 +3656,8 @@
             nextNextChar := local at:(n+2)
         ].
         
-        "/ remove duplicate letters except C
-        (symb ~~ $C and:[ nextChar == symb ]) ifFalse:[
+        "/ remove duplicate letters except C and except first
+        (symb == $C or:[ nextChar ~~ symb or:[ n == 1] ]) ifTrue:[
             "/ not dup
             ('AEIOU' includes:symb) ifTrue:[
                 "/ only use vowel if leading char
@@ -3671,7 +3672,7 @@
                 "/    }
                 "/    code.append(symb);
                 "/    break;
-                ((prevChar == $M) and:[isLastChar]) ifTrue:[
+                (isLastChar and:[ prevChar == $M]) ifTrue:[
                     "/ B is silent if word ends in MB 
                 ] ifFalse:[
                     code nextPut:symb.
@@ -3890,51 +3891,59 @@
 
             ] ifFalse:[
             ('S' includes:symb) ifTrue:[
-"/                case 'S':
-"/                    if (regionMatch(local,n,"SH") ||
-"/                        regionMatch(local,n,"SIO") ||
-"/                        regionMatch(local,n,"SIA")) {
-"/                        code.append('X');
-"/                    } else {
-"/                        code.append('S');
-"/                    }
-"/                    break;
+                "/    case 'S':
+                "/        if (regionMatch(local,n,"SH") ||
+                "/            regionMatch(local,n,"SIO") ||
+                "/            regionMatch(local,n,"SIA")) {
+                "/            code.append('X');
+                "/        } else {
+                "/            code.append('S');
+                "/        }
+                "/        break;
                 "/ SH -> X  (as in shave or ashton)
                 "/ SIO -> X 
                 "/ SIA -> X (as in ASIA)
                 ((nextChar == $H) 
-                or:[
-                    ((nextChar == $I) 
-                      and:[
-                        (((local from:n) startsWith:'SIO')
-                          or:[ ((local from:n) startsWith:'SIA') ]) 
-                      ]
-                    )
-                ]) ifTrue:[
-                   code nextPut:$X
+                  or:[ ((nextChar == $I) and:[ 'OA' includes:nextNextChar])]
+                ) ifTrue:[
+                    code nextPut:$X
                 ] ifFalse:[
-                   code nextPut:$S
+                    code nextPut:$S
                 ]
             ] ifFalse:[
             ('T' includes:symb) ifTrue:[
-"/                case 'T':
-"/                    if (regionMatch(local,n,"TIA") ||
-"/                        regionMatch(local,n,"TIO")) {
-"/                        code.append('X');
-"/                        break;
-"/                    }
-"/                    if (regionMatch(local,n,"TCH")) {
-"/                        // Silent if in "TCH"
-"/                        break;
-"/                    }
-"/                    // substitute numeral 0 for TH (resembles theta after all)
-"/                    if (regionMatch(local,n,"TH")) {
-"/                        code.append('0');
-"/                    } else {
-"/                        code.append('T');
-"/                    }
-"/                    break;
-                self halt.
+                "/    case 'T':
+                "/        if (regionMatch(local,n,"TIA") ||
+                "/            regionMatch(local,n,"TIO")) {
+                "/            code.append('X');
+                "/            break;
+                "/        }
+                "/        if (regionMatch(local,n,"TCH")) {
+                "/            // Silent if in "TCH"
+                "/            break;
+                "/        }
+                "/        // substitute numeral 0 for TH (resembles theta after all)
+                "/        if (regionMatch(local,n,"TH")) {
+                "/            code.append('0');
+                "/        } else {
+                "/            code.append('T');
+                "/        }
+                "/        break;
+                (nextChar == $I and:[ 'AO' includes:nextNextChar]) ifTrue:[
+                    code nextPut:$X.
+                ] ifFalse:[
+                    (nextChar == $C and:[ nextNextChar == $H]) ifTrue:[
+                        "/ Silent if in "TCH"
+                        "/ cg - huh; hutch - methinksthereisat
+                    ] ifFalse:[
+                        "/ substitute numeral 0 for TH (resembles theta after all)
+                        nextChar == $H ifTrue:[
+                            code nextPut:$0.
+                        ] ifFalse:[
+                            code nextPut:$T.
+                        ].    
+                    ].    
+                ].    
             ] ifFalse:[
             ('V' includes:symb) ifTrue:[
                 "/    case 'V':
@@ -3969,26 +3978,17 @@
                 "/        break;
                 code nextPut:$S
             ] ifFalse:[
-"/                default:
-"/                    // do nothing
-"/                    break;
+                "/    default:
+                "/        // do nothing
+                "/        break;
             ]]]]]]]]]]]]]]]]. "/ end switch
         ]. "/ end else from symb !!= 'C'
         n := n + 1.
-        (code size > maxCodeLen) ifTrue:[
-            code := code truncateTo:maxCodeLen
-        ]
     ].
     ^ code contents
 
     "Created: / 02-08-2017 / 09:51:31 / cg"
-    "Modified: / 02-08-2017 / 12:00:38 / cg"
-!
-
-maxCodeLen
-    ^ 4
-
-    "Created: / 02-08-2017 / 09:51:59 / cg"
+    "Modified: / 03-08-2017 / 14:55:22 / cg"
 ! !
 
 !PhoneticStringUtilities::SoundexStringComparator class methodsFor:'documentation'!