added encoding into SJIS
authorClaus Gittinger <cg@exept.de>
Sat, 28 Jun 1997 21:44:28 +0200
changeset 2731 0ba9228a6677
parent 2730 1fbfddfa1d3f
child 2732 79d06cdc7428
added encoding into SJIS
CharArray.st
CharacterArray.st
--- a/CharArray.st	Sat Jun 28 21:06:18 1997 +0200
+++ b/CharArray.st	Sat Jun 28 21:44:28 1997 +0200
@@ -1359,6 +1359,95 @@
     "Modified: 17.4.1996 / 17:30:59 / cg"
 !
 
+encodeIntoSJIS:aJISString
+    "return a new string with aJISStrings characters as SJIS encoded 8bit string.
+     The argument must be a JIS 16 bit character string.
+
+     The resulting string is only useful to be stored on some external file,
+     not for being displayed in an ST/X view."
+
+    |sz "{ Class: SmallInteger }"
+     rval "{ Class: SmallInteger }"
+     val  "{ Class: SmallInteger }"
+     romans c out isSJIS|
+
+    romans := JISEncodedString romanJISDecoderTable.
+
+    sz := aJISString size.
+    sz == 0 ifTrue:[^ ''].
+
+    out := WriteStream on:(String new:(sz * 2)).
+
+    1 to:sz do:[:srcIndex |
+        val := (c := aJISString at:srcIndex) asciiValue.
+        val < 33 ifTrue:[
+            "/ a control character    
+            out nextPut:c.
+        ] ifFalse:[
+            val <= 128 ifTrue:[
+                "/ ascii subset
+                out nextPut:c.
+            ] ifFalse:[
+                (val > 150 and:[val < 224]) ifTrue:[
+                    "/ ascii subset
+                    out nextPut:c.
+                ] ifFalse:[
+                    isSJIS := true.
+
+                    "/ check for HALFWIDTH KATAKANA
+                    "/ 142:xx
+
+                    (val bitAnd:16rFF00) == 16r8E00 ifTrue:[
+                        |b|
+
+                        b := (val bitAnd:16rFF) + 128.
+                        (b >= 161 and:[b <= 223]) ifTrue:[
+                            out nextPut:(Character value:b).
+                            isSJIS := false.
+                        ].
+                    ].
+
+                    isSJIS ifTrue:[
+
+                        "/ check for a roman character
+                        "/ the two numbers below are romanTable min and romanTable max
+                        "/
+                        (val between:16r2121 and:16r2573) ifTrue:[
+                            rval := romans indexOf:val.
+                            rval ~~ 0 ifTrue:[
+                                out nextPut:(Character value:(rval - 1 + 33)).
+                                isSJIS := false.
+                            ].
+                        ].
+                    ].
+
+                    isSJIS ifTrue:[
+                        |b1 b2 rowOffset cellOffset|
+
+                        b1 := (val bitShift:-8).
+                        b2 := (val bitAnd:16rFF).
+                        rowOffset := (b1 < 95) ifTrue:[112] ifFalse:[176].
+                        cellOffset := b1 odd ifTrue:[(b2 > 95) ifTrue:[32] ifFalse:[31]]
+                                             ifFalse:[126].
+
+                        out nextPut:(Character value:(((b1 + 1) bitShift:-1) + rowOffset)).
+                        out nextPut:(Character value:b2 + cellOffset).
+                    ]
+                ]
+            ]
+        ].
+    ].
+    ^ out contents
+
+    "simple:
+
+         'hello' decodeFromEUC encodeIntoEUC    
+    "
+
+    "Created: 28.6.1997 / 21:13:27 / cg"
+    "Modified: 28.6.1997 / 21:43:32 / cg"
+!
+
 guessEncodingFrom:aString
     "try to guess some 8-bit strings encoding by
      searching for certain escape sequences.
@@ -2820,6 +2909,7 @@
      (and those are untested/incomplete):
         #euc
         #jis7
+        #sjis
         #mac
         #msdos
     "
@@ -2834,9 +2924,9 @@
     encodingSymbol == #jis7 ifTrue:[
         ^ self class encodeIntoJIS7:self
     ].
-"/    encodingSymbol == #sjis ifTrue:[
-"/        ^ self class encodeIntoSJIS:self
-"/    ].
+    encodingSymbol == #sjis ifTrue:[
+        ^ self class encodeIntoSJIS:self
+    ].
     (encodingSymbol startsWith:'jis') ifTrue:[
         ^ self class encodeIntoJISwithRomans:self
     ].
@@ -2862,7 +2952,7 @@
     ^ newString
 
     "Created: 22.2.1996 / 15:07:31 / cg"
-    "Modified: 28.6.1997 / 19:19:07 / cg"
+    "Modified: 28.6.1997 / 21:25:11 / cg"
 !
 
 replaceFrom:aString decode:encoding 
@@ -4455,6 +4545,6 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.115 1997-06-28 19:06:18 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.116 1997-06-28 19:44:28 cg Exp $'
 ! !
 CharacterArray initialize!
--- a/CharacterArray.st	Sat Jun 28 21:06:18 1997 +0200
+++ b/CharacterArray.st	Sat Jun 28 21:44:28 1997 +0200
@@ -1359,6 +1359,95 @@
     "Modified: 17.4.1996 / 17:30:59 / cg"
 !
 
+encodeIntoSJIS:aJISString
+    "return a new string with aJISStrings characters as SJIS encoded 8bit string.
+     The argument must be a JIS 16 bit character string.
+
+     The resulting string is only useful to be stored on some external file,
+     not for being displayed in an ST/X view."
+
+    |sz "{ Class: SmallInteger }"
+     rval "{ Class: SmallInteger }"
+     val  "{ Class: SmallInteger }"
+     romans c out isSJIS|
+
+    romans := JISEncodedString romanJISDecoderTable.
+
+    sz := aJISString size.
+    sz == 0 ifTrue:[^ ''].
+
+    out := WriteStream on:(String new:(sz * 2)).
+
+    1 to:sz do:[:srcIndex |
+        val := (c := aJISString at:srcIndex) asciiValue.
+        val < 33 ifTrue:[
+            "/ a control character    
+            out nextPut:c.
+        ] ifFalse:[
+            val <= 128 ifTrue:[
+                "/ ascii subset
+                out nextPut:c.
+            ] ifFalse:[
+                (val > 150 and:[val < 224]) ifTrue:[
+                    "/ ascii subset
+                    out nextPut:c.
+                ] ifFalse:[
+                    isSJIS := true.
+
+                    "/ check for HALFWIDTH KATAKANA
+                    "/ 142:xx
+
+                    (val bitAnd:16rFF00) == 16r8E00 ifTrue:[
+                        |b|
+
+                        b := (val bitAnd:16rFF) + 128.
+                        (b >= 161 and:[b <= 223]) ifTrue:[
+                            out nextPut:(Character value:b).
+                            isSJIS := false.
+                        ].
+                    ].
+
+                    isSJIS ifTrue:[
+
+                        "/ check for a roman character
+                        "/ the two numbers below are romanTable min and romanTable max
+                        "/
+                        (val between:16r2121 and:16r2573) ifTrue:[
+                            rval := romans indexOf:val.
+                            rval ~~ 0 ifTrue:[
+                                out nextPut:(Character value:(rval - 1 + 33)).
+                                isSJIS := false.
+                            ].
+                        ].
+                    ].
+
+                    isSJIS ifTrue:[
+                        |b1 b2 rowOffset cellOffset|
+
+                        b1 := (val bitShift:-8).
+                        b2 := (val bitAnd:16rFF).
+                        rowOffset := (b1 < 95) ifTrue:[112] ifFalse:[176].
+                        cellOffset := b1 odd ifTrue:[(b2 > 95) ifTrue:[32] ifFalse:[31]]
+                                             ifFalse:[126].
+
+                        out nextPut:(Character value:(((b1 + 1) bitShift:-1) + rowOffset)).
+                        out nextPut:(Character value:b2 + cellOffset).
+                    ]
+                ]
+            ]
+        ].
+    ].
+    ^ out contents
+
+    "simple:
+
+         'hello' decodeFromEUC encodeIntoEUC    
+    "
+
+    "Created: 28.6.1997 / 21:13:27 / cg"
+    "Modified: 28.6.1997 / 21:43:32 / cg"
+!
+
 guessEncodingFrom:aString
     "try to guess some 8-bit strings encoding by
      searching for certain escape sequences.
@@ -2820,6 +2909,7 @@
      (and those are untested/incomplete):
         #euc
         #jis7
+        #sjis
         #mac
         #msdos
     "
@@ -2834,9 +2924,9 @@
     encodingSymbol == #jis7 ifTrue:[
         ^ self class encodeIntoJIS7:self
     ].
-"/    encodingSymbol == #sjis ifTrue:[
-"/        ^ self class encodeIntoSJIS:self
-"/    ].
+    encodingSymbol == #sjis ifTrue:[
+        ^ self class encodeIntoSJIS:self
+    ].
     (encodingSymbol startsWith:'jis') ifTrue:[
         ^ self class encodeIntoJISwithRomans:self
     ].
@@ -2862,7 +2952,7 @@
     ^ newString
 
     "Created: 22.2.1996 / 15:07:31 / cg"
-    "Modified: 28.6.1997 / 19:19:07 / cg"
+    "Modified: 28.6.1997 / 21:25:11 / cg"
 !
 
 replaceFrom:aString decode:encoding 
@@ -4455,6 +4545,6 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.115 1997-06-28 19:06:18 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.116 1997-06-28 19:44:28 cg Exp $'
 ! !
 CharacterArray initialize!