some encoding stuff moved to CharacterEncoder
authorClaus Gittinger <cg@exept.de>
Wed, 11 Feb 2004 22:54:21 +0100
changeset 7918 7cf890e71c24
parent 7917 3649394bf5c0
child 7919 92b61bef1b1a
some encoding stuff moved to CharacterEncoder
CharacterArray.st
--- a/CharacterArray.st	Wed Feb 11 19:53:27 2004 +0100
+++ b/CharacterArray.st	Wed Feb 11 22:54:21 2004 +0100
@@ -57,13 +57,11 @@
 
 initialize
     DecodingFailedSignal isNil ifTrue:[    
-        DecodingFailedSignal := Signal new mayProceed:true.
+        DecodingFailedSignal := DecodingFailedError.
         DecodingFailedSignal notifierString:'decoding failure'.
-        DecodingFailedSignal nameClass:self message:#decodingFailedSignal.
-
-        EncodingFailedSignal := QuerySignal new mayProceed:true.
+
+        EncodingFailedSignal := EncodingFailedError.
         EncodingFailedSignal notifierString:'encoding failure'.
-        EncodingFailedSignal nameClass:self message:#encodingFailedSignal.
     ]
 
     "
@@ -943,7 +941,7 @@
                     b2 := b2 - 128.
                     val := (b1 bitShift:8) bitOr:b2.
                     val <= 0 ifTrue:[
-                        DecodingFailedSignal 
+                        self decodingFailedSignal 
                             raiseWith:aString
                             errorString:'EUC decoding failed (not EUC encoded ?)'.
                         newString at:dstIdx put:c.
@@ -994,199 +992,9 @@
      which are interpreted as a JIS7 or ISO2022-JP encoded singleByte string.
      There are various JIS encodings around (New-JIS, Old-JIS, NEC-JIS and ISO2022);
      this one understands New-JIS, ISO2022 and treats Old-JIS just the same.
-     The result is a JISEncodedString (you need a JIS font to display that ...).
-
-     This is a first-class candidate for a primitive"
-
-    |newString 
-     sz         "{ Class: SmallInteger }"
-     dstIdx     "{ Class: SmallInteger }"
-     start      "{ Class: SmallInteger }"
-     stop       "{ Class: SmallInteger }"
-     n1 n2 n3  
-     b1         "{ Class: SmallInteger }"
-     b2         "{ Class: SmallInteger }"
-     val        "{ Class: SmallInteger }"
-     singleBytes katakana c|
-
-    sz := aString size.
-    newString := JISEncodedString new:sz.
-    sz ~~ 0 ifTrue:[
-        dstIdx := 1.
-        start := 1.
-        singleBytes := true.
-        katakana := false.
-
-        [true] whileTrue:[
-            "/
-            "/ scan for next escape"
-            "/
-            stop := aString indexOf:(Character esc) startingAt:start.
-            stop == 0 ifTrue:[
-                stop := sz + 1.
-            ] ifFalse:[
-                (stop + 2) > sz ifTrue:[
-                    stop := sz + 1.
-                ]
-            ].
-            singleBytes ifTrue:[
-                start to:(stop - 1) do:[:i |
-                    c := aString at:i.
-                    newString at:dstIdx put:c.
-                    dstIdx := dstIdx + 1.
-                ].
-            ] ifFalse:[
-                start to:(stop - 2) by:2 do:[:i |
-                    b1 := (aString at:i) asciiValue.
-                    b2 := (aString at:i+1) asciiValue.
-                    val := (b1 bitShift:8) bitOr:b2.
-                    newString at:dstIdx put:(Character value:val).
-                    dstIdx := dstIdx + 1.
-                ]
-            ].
-
-            stop > sz ifTrue:[
-                ^ newString copyFrom:1 to:dstIdx - 1.
-            ].
-            start := stop.
-
-            "/
-            "/ found an escape (at start) 
-            "/ - check for KI (<ESC> '$' 'B') or OLD-JIS-KI (<ESC> '$' '@')
-            "/ and KO(ASCII) (<ESC> '(' 'B') or KO(ROMAN) (<ESC> '(' 'J')
-            "/
-            n1 := aString at:start.
-            n2 := aString at:(start + 1).
-            n3 := aString at:(start + 2).
-            katakana := false.
-
-            (n2 == $$ and:[n3 == $B ]) ifTrue:[
-                singleBytes := false.
-            ] ifFalse:[
-                (n2 == $$ and:[n3 == $@ ]) ifTrue:[
-                    singleBytes := false.
-                ] ifFalse:[
-                    (n2 == $( and:[n3 == $B ]) ifTrue:[
-                        singleBytes := true.
-                    ] ifFalse:[
-                        (n2 == $( and:[n3 == $J ]) ifTrue:[
-                            singleBytes := true.
-                        ] ifFalse:[
-                            (n2 == $( and:[n3 == $I ]) ifTrue:[
-                                singleBytes := true.
-                                katakana := true.
-                            ] ifFalse:[
-                                singleBytes ifTrue:[
-                                    newString at:dstIdx put:n1.
-                                    newString at:(dstIdx + 1) put:n2.
-                                    newString at:(dstIdx + 2) put:n3.
-                                    dstIdx := dstIdx + 3.
-                                ] ifFalse:[
-                                    DecodingFailedSignal 
-                                            raiseWith:aString
-                                            errorString:'JIS7 decoding failed (not JIS7 encoded ?)'.
-                                    newString at:dstIdx put:n1.
-                                    newString at:(dstIdx + 1) put:n2.
-                                    newString at:(dstIdx + 2) put:n3.
-                                    dstIdx := dstIdx + 3.
-                                ]
-                            ]
-                        ]
-                    ]
-                ]
-            ].
-            start := start + 3.
-            start > sz ifTrue:[
-                ^ newString copyFrom:1 to:dstIdx-1.
-            ]
-        ]
-    ].
-    ^ newString
-
-    "simple:
-
-         'hello' decodeFromJIS7
-
-     ending with a crippled escape:
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s decodeFromJIS7
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$A.
-         s decodeFromJIS7
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$$.
-         s decodeFromJIS7
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$$.
-         s := s copyWith:$A.
-         s decodeFromJIS7 
-
-     ending with a KANJI-in,  but no more chars:
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$$.
-         s := s copyWith:$B.
-         s decodeFromJIS7
-
-     ending with a KANJI-in, followed by $3 (KO):
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$$.
-         s := s copyWith:$B.
-         s := s , '$3'.
-         s decodeFromJIS7
-
-     ending with a KANJI-in, followed by $3$l$OF| (KO RE HA NI):
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$$.
-         s := s copyWith:$B.
-         s := s , '$3$l$OF|'.
-         s decodeFromJIS7
-
-     a KO in between:
-
-         |s|
-         s := 'hello' copyWith:Character esc.
-         s := s copyWith:$$.
-         s := s copyWith:$B.
-         s := s , '$3'.
-         s := s copyWith:Character esc.
-         s := s copyWith:$(.
-         s := s copyWith:$B.
-         s := s , 'hello'.
-         s decodeFromJIS7
-
-     I dont know what that means ;-):
-
-         |s t l|
-         s := 'kterm ' copyWith:Character esc.
-         s := s copyWith:$$.
-         s := s copyWith:$B.
-         s := s , '$N4A;zC<Kv%(%_%e%l!!<%?'.
-         s := s copyWith:Character esc.
-         s := s copyWith:$(.
-         s := s copyWith:$B.
-         s := s , ' kterm'.
-         t := s decodeFromJIS7.
-         l := Label new.
-         l label:t.
-         l font:(Font family:'k14' face:nil style:nil size:nil).
-         l font:(Font family:'gothic' size:17).
-         l font:(Font family:'mincho' size:23).
-         l realize
-    "
+     The result is a JISEncodedString (you need a JIS font to display that ...)."
+
+    ^ CharacterEncoder::JIS0208_to_JIS7 decodeString:aString 
 
     "Created: 17.4.1996 / 16:11:57 / cg"
     "Modified: 16.7.1997 / 12:50:55 / cg"
@@ -1309,7 +1117,7 @@
                     b2 := (b2 - cellOffs).
                     val := (b1 bitShift:8) + b2.
                     val <= 0 ifTrue:[
-                        DecodingFailedSignal 
+                        self decodingFailedSignal 
                                 raiseWith:aString
                                 errorString:'SJIS decoding failed (not SJIS encoded ?)'.
                         newString at:dstIdx put:char1.
@@ -1588,90 +1396,7 @@
      The resulting string is only useful to be stored on some external file,
      not for being displayed in an ST/X view."
 
-    |sz "{ Class:SmallInteger }"
-     b1 "{ Class:SmallInteger }"
-     val romans out inSingleByteMode c kanji roman val2|
-
-    inSingleByteMode := true.
-    kanji := JISEncodedString jis7KanjiEscapeSequence.
-    roman := JISEncodedString jis7RomanEscapeSequence.
-
-    romans := JISEncodedString romanTable.
-
-    sz := aJISString size.
-    sz == 0 ifTrue:[^ ''].
-
-    out := WriteStream on:(String new:(sz * 2)).
-
-    1 to:sz do:[:srcIndex |
-        c := aJISString at:srcIndex.
-        b1 := c asciiValue.
-        b1 < 33 ifTrue:[
-            "/ a control character
-            inSingleByteMode ifFalse:[
-                out nextPutAll:roman.
-                inSingleByteMode := true
-            ].
-            out nextPut:c.
-        ] ifFalse:[
-            "/ check for a roman character
-            "/ the two numbers below are romanTable min and romanTable max
-            "/
-            (b1 between:16r2121 and:16r2573) ifTrue:[
-                val := romans indexOf:b1.
-                val2 := val - 1 + 32.
-                (val ~~ 0 and:[val2 <= 16r7F]) ifTrue:[
-                    inSingleByteMode ifFalse:[
-                        out nextPutAll:roman.
-                        inSingleByteMode := true
-                    ].
-                    out nextPut:(Character value:val2)
-                ] ifFalse:[
-                    inSingleByteMode ifTrue:[
-                        out nextPutAll:kanji.
-                        inSingleByteMode := false
-                    ].
-                    out nextPut:(Character value:(b1 bitShift:-8)).
-                    out nextPut:(Character value:(b1 bitAnd:16rFF)).
-                ].
-            ] ifFalse:[
-                b1 <= 255 ifTrue:[
-                    "/ mhmh - unrepresentable roman (national chars)
-"/                    b1 >= 160 ifTrue:[
-"/                        ('no rep for ' , b1 printString) printNL.
-"/                    ].
-                    EncodingFailedSignal
-                        raiseWith:aJISString
-                        errorString:'JIS7 encoding failed (contains 8-bit characters ?)'.
-
-                    inSingleByteMode ifFalse:[
-                        out nextPutAll:roman.
-                        inSingleByteMode := true
-                    ].
-                    out nextPut:c
-                ] ifFalse:[
-                    inSingleByteMode ifTrue:[
-                        out nextPutAll:kanji.
-                        inSingleByteMode := false
-                    ].
-                    out nextPut:(Character value:(b1 bitShift:-8)).
-                    out nextPut:(Character value:(b1 bitAnd:16rFF)).
-                ]
-            ]
-        ].
-    ].
-    inSingleByteMode ifFalse:[
-        out nextPutAll:roman.
-    ].
-    ^ out contents
-
-    "simple:
-
-         'hello' decodeFromJIS7 encodeIntoJIS7 
-    "
-
-    "Created: 17.4.1996 / 16:17:40 / cg"
-    "Modified: 16.7.1997 / 11:30:07 / cg"
+    ^ CharacterEncoder::JIS0208_to_JIS7 encodeString:aJISString 
 !
 
 encodeIntoSJIS:aJISString
@@ -4374,6 +4099,7 @@
     "/ to convert singleByte strings ...
 
     (encodingSymbol startsWith:'jis') ifTrue:[
+        (encodingSymbol startsWith:'jisx0201') ifTrue:[^ self].
         newString := JISEncodedString encodeRomans:self.
 
         "/ jisx0208.1983 has no half-width katakana
@@ -6643,7 +6369,7 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.270 2004-02-10 17:09:07 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.271 2004-02-11 21:54:21 cg Exp $'
 ! !
 
 CharacterArray initialize!