tuned EUC decoding
authorClaus Gittinger <cg@exept.de>
Fri, 04 Jul 1997 12:53:52 +0200
changeset 2750 65927af4fe5b
parent 2749 62552ae3b004
child 2751 1899bf26e438
tuned EUC decoding
CharArray.st
CharacterArray.st
--- a/CharArray.st	Fri Jul 04 12:24:37 1997 +0200
+++ b/CharArray.st	Fri Jul 04 12:53:52 1997 +0200
@@ -817,7 +817,7 @@
     |newString 
      sz     "{ Class: SmallInteger }"
      dstIdx "{ Class: SmallInteger }"
-     start  "{ Class: SmallInteger }"
+     srcIdx "{ Class: SmallInteger }"
      b1     "{ Class: SmallInteger }"
      b2     "{ Class: SmallInteger }"
      val    "{ Class: SmallInteger }"
@@ -827,20 +827,63 @@
     newString := JISEncodedString new:sz.
     sz ~~ 0 ifTrue:[
         dstIdx := 1.
-        start := 1.
-
-        [true] whileTrue:[
-            c := aString at:start.
+        srcIdx := 1.
+
+%{
+        if (__isString(aString)
+         && (__Class(newString) == @global(JISEncodedString))) {
+            int _dstIdx = 1, _srcIdx = 1;
+            int _sz = __intVal(sz);
+            unsigned char *_cp = __stringVal(aString);
+            unsigned char _c1;
+            unsigned short *_jcp = (unsigned short *)__stringVal(newString);
+
+            while (_srcIdx <= _sz) {
+                _c1 = _cp[_srcIdx-1];
+                if (_c1 < 161) {
+                    _jcp[_dstIdx-1] = _c1;
+                } else {
+                    _srcIdx++;
+                    if (_srcIdx <= _sz) {
+                        unsigned char _c2;
+                        int _val;
+                        int _b1, _b2;
+
+                        _b1 = _c1 - 128;
+                        _c2 = _cp[_srcIdx-1];
+                        _b2 = _c2 - 128;
+                        _val = (_b1<<8) + _b2;
+                        if (_val < 0) {
+                            /* decoder errors are handled in smalltalk */
+                            _srcIdx--;
+                            goto getOutOfHere;
+                        }
+                        _jcp[_dstIdx-1] = _val;
+                    } else {
+                        _jcp[_dstIdx-1] = _c1;
+                    }
+                }
+                _dstIdx++;
+                _srcIdx++;
+            }
+    getOutOfHere:
+            srcIdx = __MKSMALLINT(_srcIdx);
+            dstIdx = __MKSMALLINT(_dstIdx);
+        }
+%}.
+
+        [srcIdx <= sz] whileTrue:[
+            c := aString at:srcIdx.
             b1 := c asciiValue.
             b1 < 161 ifTrue:[
                 "/ characters below 16rA1 are left untranslated
                 "/ (control character or roman).
                 newString at:dstIdx put:c.
             ] ifFalse:[
-                start := start + 1.
-                start <= sz ifTrue:[    
+                srcIdx := srcIdx + 1.
+                srcIdx <= sz ifTrue:[    
                     b1 := b1 - 128.
-                    b2 := (c2 := aString at:start) asciiValue.
+                    b2 := (c2 := aString at:srcIdx) asciiValue.
                     b2 := b2 - 128.
                     val := (b1 bitShift:8) bitOr:b2.
                     val <= 0 ifTrue:[
@@ -858,12 +901,12 @@
                 ].
             ].
             dstIdx := dstIdx + 1.
-            start := start + 1.
-
-            start > sz ifTrue:[
-                ^ newString copyFrom:1 to:dstIdx-1.
-            ]
-        ]
+            srcIdx := srcIdx + 1.
+        ].
+
+        (dstIdx-1) ~~ sz ifTrue:[
+            newString := newString copyFrom:1 to:dstIdx-1.
+        ].
     ].
     ^ newString
 
@@ -935,7 +978,7 @@
                     b1 := (c := aString at:i) asciiValue.
                     b1 >= 16r80 ifTrue:[
                         "/ mhmh - seems to be JIS8
-                        "/ (has half-width katakane at A1..DF
+                        "/ (has half-width katakana at A1..DF
                         "/ my romanTable also includes xlations for those ...
 
 "/                        DecodingFailedSignal 
@@ -1104,7 +1147,7 @@
     "
 
     "Created: 17.4.1996 / 16:11:57 / cg"
-    "Modified: 4.7.1997 / 11:01:48 / cg"
+    "Modified: 4.7.1997 / 12:53:20 / cg"
 !
 
 decodeFromSJIS:aString
@@ -1129,7 +1172,7 @@
         dstIdx := 1.
         srcIdx := 1.
 %{
-        if (0 && __isString(aString)
+        if (__isString(aString)
          && (__Class(newString) == @global(JISEncodedString))) {
             int _dstIdx = 1, _srcIdx = 1;
             int _sz = __intVal(sz);
@@ -4904,6 +4947,6 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.127 1997-07-04 10:24:37 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.128 1997-07-04 10:53:52 cg Exp $'
 ! !
 CharacterArray initialize!
--- a/CharacterArray.st	Fri Jul 04 12:24:37 1997 +0200
+++ b/CharacterArray.st	Fri Jul 04 12:53:52 1997 +0200
@@ -817,7 +817,7 @@
     |newString 
      sz     "{ Class: SmallInteger }"
      dstIdx "{ Class: SmallInteger }"
-     start  "{ Class: SmallInteger }"
+     srcIdx "{ Class: SmallInteger }"
      b1     "{ Class: SmallInteger }"
      b2     "{ Class: SmallInteger }"
      val    "{ Class: SmallInteger }"
@@ -827,20 +827,63 @@
     newString := JISEncodedString new:sz.
     sz ~~ 0 ifTrue:[
         dstIdx := 1.
-        start := 1.
-
-        [true] whileTrue:[
-            c := aString at:start.
+        srcIdx := 1.
+
+%{
+        if (__isString(aString)
+         && (__Class(newString) == @global(JISEncodedString))) {
+            int _dstIdx = 1, _srcIdx = 1;
+            int _sz = __intVal(sz);
+            unsigned char *_cp = __stringVal(aString);
+            unsigned char _c1;
+            unsigned short *_jcp = (unsigned short *)__stringVal(newString);
+
+            while (_srcIdx <= _sz) {
+                _c1 = _cp[_srcIdx-1];
+                if (_c1 < 161) {
+                    _jcp[_dstIdx-1] = _c1;
+                } else {
+                    _srcIdx++;
+                    if (_srcIdx <= _sz) {
+                        unsigned char _c2;
+                        int _val;
+                        int _b1, _b2;
+
+                        _b1 = _c1 - 128;
+                        _c2 = _cp[_srcIdx-1];
+                        _b2 = _c2 - 128;
+                        _val = (_b1<<8) + _b2;
+                        if (_val < 0) {
+                            /* decoder errors are handled in smalltalk */
+                            _srcIdx--;
+                            goto getOutOfHere;
+                        }
+                        _jcp[_dstIdx-1] = _val;
+                    } else {
+                        _jcp[_dstIdx-1] = _c1;
+                    }
+                }
+                _dstIdx++;
+                _srcIdx++;
+            }
+    getOutOfHere:
+            srcIdx = __MKSMALLINT(_srcIdx);
+            dstIdx = __MKSMALLINT(_dstIdx);
+        }
+%}.
+
+        [srcIdx <= sz] whileTrue:[
+            c := aString at:srcIdx.
             b1 := c asciiValue.
             b1 < 161 ifTrue:[
                 "/ characters below 16rA1 are left untranslated
                 "/ (control character or roman).
                 newString at:dstIdx put:c.
             ] ifFalse:[
-                start := start + 1.
-                start <= sz ifTrue:[    
+                srcIdx := srcIdx + 1.
+                srcIdx <= sz ifTrue:[    
                     b1 := b1 - 128.
-                    b2 := (c2 := aString at:start) asciiValue.
+                    b2 := (c2 := aString at:srcIdx) asciiValue.
                     b2 := b2 - 128.
                     val := (b1 bitShift:8) bitOr:b2.
                     val <= 0 ifTrue:[
@@ -858,12 +901,12 @@
                 ].
             ].
             dstIdx := dstIdx + 1.
-            start := start + 1.
-
-            start > sz ifTrue:[
-                ^ newString copyFrom:1 to:dstIdx-1.
-            ]
-        ]
+            srcIdx := srcIdx + 1.
+        ].
+
+        (dstIdx-1) ~~ sz ifTrue:[
+            newString := newString copyFrom:1 to:dstIdx-1.
+        ].
     ].
     ^ newString
 
@@ -935,7 +978,7 @@
                     b1 := (c := aString at:i) asciiValue.
                     b1 >= 16r80 ifTrue:[
                         "/ mhmh - seems to be JIS8
-                        "/ (has half-width katakane at A1..DF
+                        "/ (has half-width katakana at A1..DF
                         "/ my romanTable also includes xlations for those ...
 
 "/                        DecodingFailedSignal 
@@ -1104,7 +1147,7 @@
     "
 
     "Created: 17.4.1996 / 16:11:57 / cg"
-    "Modified: 4.7.1997 / 11:01:48 / cg"
+    "Modified: 4.7.1997 / 12:53:20 / cg"
 !
 
 decodeFromSJIS:aString
@@ -1129,7 +1172,7 @@
         dstIdx := 1.
         srcIdx := 1.
 %{
-        if (0 && __isString(aString)
+        if (__isString(aString)
          && (__Class(newString) == @global(JISEncodedString))) {
             int _dstIdx = 1, _srcIdx = 1;
             int _sz = __intVal(sz);
@@ -4904,6 +4947,6 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.127 1997-07-04 10:24:37 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.128 1997-07-04 10:53:52 cg Exp $'
 ! !
 CharacterArray initialize!