tuned SJIS decoding
authorClaus Gittinger <cg@exept.de>
Fri, 04 Jul 1997 12:24:37 +0200
changeset 2749 62552ae3b004
parent 2748 3002ca130a92
child 2750 65927af4fe5b
tuned SJIS decoding
CharArray.st
CharacterArray.st
--- a/CharArray.st	Fri Jul 04 11:23:39 1997 +0200
+++ b/CharArray.st	Fri Jul 04 12:24:37 1997 +0200
@@ -1128,6 +1128,73 @@
     sz ~~ 0 ifTrue:[
         dstIdx := 1.
         srcIdx := 1.
+%{
+        if (0 && __isString(aString)
+         && (__Class(newString) == @global(JISEncodedString))) {
+            int _dstIdx = 1, _srcIdx = 1;
+            int _sz = __intVal(sz);
+            unsigned char *_cp = __stringVal(aString);
+            unsigned char _c1, _c2;
+            unsigned short *_jcp = (unsigned short *)__stringVal(newString);
+
+            while (_srcIdx <= _sz) {
+                int _val;
+
+                _c1 = _cp[_srcIdx-1];
+                _srcIdx++;
+
+                if ((_srcIdx <= _sz)
+                 && (((_c1 >= 129) && (_c1 <= 159))
+                     || ((_c1 >= 224) && (_c1 <= 239)))) {
+                    _c2 = _cp[_srcIdx-1];
+                    _srcIdx++;
+                    if ((_c2 >= 64) && (_c2 <= 252)) {
+                        int _adjust, _rowOffs, _cellOffs;
+                        int _b1, _b2;
+
+                        _adjust = (_c2 < 159) ? 1 : 0;
+                        _rowOffs = (_c1 < 160) ? 112 : 176;
+                        if (_adjust) {
+                            _cellOffs = 31 + ((_c2 > 127) ? 1 : 0);
+                        } else {
+                            _cellOffs = 126;
+                        }
+                        _b1 = ((_c1 - _rowOffs) << 1) - _adjust;
+                        _b2 = (_c2 - _cellOffs);
+                        _val = (_b1<<8) + _b2;
+                        if (_val <= 0) {
+                            /* decoder error - let smalltalk handle that */
+                            _srcIdx -= 2;
+                            goto getOutOfHere;
+                        }
+                        _jcp[_dstIdx-1] = _val;
+                    } else {
+                        /* mhmh - append untranslated */
+
+                        _jcp[_dstIdx-1] = _c1;
+                        _dstIdx++;
+                        _jcp[_dstIdx-1] = _c2;
+                    }
+                } else {
+                    if ((_c1 >= 0xA1 /* 161 */) && (_c1 <= 0xDF /* 223 */)) {
+                        /* HALFWIDTH KATAKANA
+                         * map half-width katakana to 8E:xx
+                         */
+                        _val = _c1 - 128;
+                        _val = _val + 0x8E00;
+                        _jcp[_dstIdx-1] = _val;
+                    } else {
+                        /* roman characters left untranslated */
+                        _jcp[_dstIdx-1] = _c1;
+                    }
+                }
+                _dstIdx++;
+            }
+        getOutOfHere: ;
+            dstIdx = __MKSMALLINT(_dstIdx);
+            srcIdx = __MKSMALLINT(_srcIdx);
+        }
+%}.
 
         [srcIdx <= sz] whileTrue:[
             "/
@@ -1186,7 +1253,9 @@
             ].
             dstIdx := dstIdx + 1.
         ].
-        newString := newString copyTo:dstIdx - 1.
+        (dstIdx-1) ~~ sz ifTrue:[
+            newString := newString copyTo:dstIdx - 1.
+        ]
     ].
 
     ^ newString
@@ -4835,6 +4904,6 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.126 1997-07-04 09:23:39 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.127 1997-07-04 10:24:37 cg Exp $'
 ! !
 CharacterArray initialize!
--- a/CharacterArray.st	Fri Jul 04 11:23:39 1997 +0200
+++ b/CharacterArray.st	Fri Jul 04 12:24:37 1997 +0200
@@ -1128,6 +1128,73 @@
     sz ~~ 0 ifTrue:[
         dstIdx := 1.
         srcIdx := 1.
+%{
+        if (0 && __isString(aString)
+         && (__Class(newString) == @global(JISEncodedString))) {
+            int _dstIdx = 1, _srcIdx = 1;
+            int _sz = __intVal(sz);
+            unsigned char *_cp = __stringVal(aString);
+            unsigned char _c1, _c2;
+            unsigned short *_jcp = (unsigned short *)__stringVal(newString);
+
+            while (_srcIdx <= _sz) {
+                int _val;
+
+                _c1 = _cp[_srcIdx-1];
+                _srcIdx++;
+
+                if ((_srcIdx <= _sz)
+                 && (((_c1 >= 129) && (_c1 <= 159))
+                     || ((_c1 >= 224) && (_c1 <= 239)))) {
+                    _c2 = _cp[_srcIdx-1];
+                    _srcIdx++;
+                    if ((_c2 >= 64) && (_c2 <= 252)) {
+                        int _adjust, _rowOffs, _cellOffs;
+                        int _b1, _b2;
+
+                        _adjust = (_c2 < 159) ? 1 : 0;
+                        _rowOffs = (_c1 < 160) ? 112 : 176;
+                        if (_adjust) {
+                            _cellOffs = 31 + ((_c2 > 127) ? 1 : 0);
+                        } else {
+                            _cellOffs = 126;
+                        }
+                        _b1 = ((_c1 - _rowOffs) << 1) - _adjust;
+                        _b2 = (_c2 - _cellOffs);
+                        _val = (_b1<<8) + _b2;
+                        if (_val <= 0) {
+                            /* decoder error - let smalltalk handle that */
+                            _srcIdx -= 2;
+                            goto getOutOfHere;
+                        }
+                        _jcp[_dstIdx-1] = _val;
+                    } else {
+                        /* mhmh - append untranslated */
+
+                        _jcp[_dstIdx-1] = _c1;
+                        _dstIdx++;
+                        _jcp[_dstIdx-1] = _c2;
+                    }
+                } else {
+                    if ((_c1 >= 0xA1 /* 161 */) && (_c1 <= 0xDF /* 223 */)) {
+                        /* HALFWIDTH KATAKANA
+                         * map half-width katakana to 8E:xx
+                         */
+                        _val = _c1 - 128;
+                        _val = _val + 0x8E00;
+                        _jcp[_dstIdx-1] = _val;
+                    } else {
+                        /* roman characters left untranslated */
+                        _jcp[_dstIdx-1] = _c1;
+                    }
+                }
+                _dstIdx++;
+            }
+        getOutOfHere: ;
+            dstIdx = __MKSMALLINT(_dstIdx);
+            srcIdx = __MKSMALLINT(_srcIdx);
+        }
+%}.
 
         [srcIdx <= sz] whileTrue:[
             "/
@@ -1186,7 +1253,9 @@
             ].
             dstIdx := dstIdx + 1.
         ].
-        newString := newString copyTo:dstIdx - 1.
+        (dstIdx-1) ~~ sz ifTrue:[
+            newString := newString copyTo:dstIdx - 1.
+        ]
     ].
 
     ^ newString
@@ -4835,6 +4904,6 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.126 1997-07-04 09:23:39 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.127 1997-07-04 10:24:37 cg Exp $'
 ! !
 CharacterArray initialize!