CharacterArray.st
changeset 2720 98c21d9a07c0
parent 2692 281e46d29478
child 2721 77fedec2357e
--- a/CharacterArray.st	Thu Jun 26 11:47:32 1997 +0200
+++ b/CharacterArray.st	Sat Jun 28 00:13:47 1997 +0200
@@ -589,6 +589,7 @@
           'NEXT         (8 bit)' 
           'EUC          (extended unix code japanese)' 
           'JIS7         (jis 7bit escape codes japanese)'
+          'SJIS         (shift jis 8bit codes japanese)'
           'GB           (mainland china)'
           'BIG5         (taiwan)'
           'KSC          (korean)'
@@ -609,6 +610,7 @@
           #'next'   
           #'euc'   
           #'jis7'   
+          #'shiftJis'   
           #'gb'   
           #'big5'
           #'ksc'           "/ korean
@@ -616,7 +618,7 @@
        )
 
     "Created: 22.4.1996 / 14:39:39 / cg"
-    "Modified: 17.3.1997 / 12:27:44 / cg"
+    "Modified: 27.6.1997 / 23:30:21 / cg"
 ! !
 
 !CharacterArray class methodsFor:'encoding / decoding'!
@@ -783,9 +785,9 @@
      b1         "{ Class: SmallInteger }"
      b2         "{ Class: SmallInteger }"
      val        "{ Class: SmallInteger }"
-     singleBytes romans|
-
-    romans := JISEncodedString romanJISDecoderTable.
+     singleBytes "romans"|
+
+"/    romans := JISEncodedString romanJISDecoderTable.
 
     sz := aString size.
     newString := JISEncodedString new:sz.
@@ -959,7 +961,167 @@
     "
 
     "Created: 17.4.1996 / 16:11:57 / cg"
-    "Modified: 17.6.1997 / 18:04:58 / cg"
+    "Modified: 28.6.1997 / 00:10:25 / cg"
+!
+
+decodeFromShiftJIS:aString
+    "return a new string containing the aStrings characters,
+     which are interpreted as a Shift-JIS encoded singleByte string.
+     The result is a JISEncodedString (you need a JIS font to display that ...)."
+
+    |newString char char2
+     sz         "{ Class: SmallInteger }"
+     dstIdx     "{ Class: SmallInteger }"
+     srcIdx     "{ Class: SmallInteger }"
+     b1         "{ Class: SmallInteger }"
+     b2         "{ Class: SmallInteger }"
+     val        "{ Class: SmallInteger }"
+    |
+
+    sz := aString size.
+    newString := JISEncodedString new:sz.
+    sz ~~ 0 ifTrue:[
+        dstIdx := 1.
+        srcIdx := 1.
+
+        [srcIdx <= sz] whileTrue:[
+            "/
+            "/ scan for next character in 129..159 or 224..239
+            "/
+            char := aString at:srcIdx.
+            srcIdx := srcIdx + 1.
+            b1 := char asciiValue.
+            (b1 >= 129 and:[srcIdx <= sz]) ifTrue:[
+                (b1 <= 159 
+                or:[b1 >= 224 and:[b1 <= 239]]) ifTrue:[
+                    char2 := aString at:srcIdx.
+                    srcIdx := srcIdx + 1.
+                    b2 := char2 asciiValue.
+                    (b2 >= 64 
+                    and:[b2 <= 252
+                    and:[b2 ~~ 127]]) ifTrue:[
+                        |adjust rowOffs cellOffs|
+
+                        adjust := (b2 < 159) ifTrue:[1] ifFalse:[0].
+                        rowOffs := b1 < 160 ifTrue:[112] ifFalse:[176].
+                        adjust == 1 ifTrue:[
+                            cellOffs := 31 + (b2 > 127 ifTrue:[1] ifFalse:[0]).
+                        ] ifFalse:[
+                            cellOffs := 126.
+                        ].
+                        val := ((b1 - rowOffs) bitShift:1) - adjust.
+                        val := val + ((b2 - cellOffs) bitShift:8).
+                        newString at:dstIdx put:(Character value:val).
+                    ] ifFalse:[
+                        "/ mhmh - append untranslated
+
+                        newString at:dstIdx put:char.
+                        dstIdx := dstIdx + 1.
+                        newString at:dstIdx put:char2.
+                    ]
+                ] ifFalse:[
+                    newString at:dstIdx put:char
+                ]
+            ] ifFalse:[
+                newString at:dstIdx put:char
+            ].
+            dstIdx := dstIdx + 1.
+        ].
+        newString := newString copyTo:dstIdx - 1.
+    ].
+
+    ^ newString
+
+    "simple:
+
+         'hello' decodeFrom:#shiftJIS         
+
+         '../../doc/online/japanese/TOP.html' asFilename contents asString
+                decodeFrom:#shiftJis  
+
+     ending with a crippled escape:
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s decodeFromJIS7
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$A.
+         s decodeFromJIS7
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$$.
+         s decodeFromJIS7
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$$.
+         s := s copyWith:$A.
+         s decodeFromJIS7 
+
+     ending with a KANJI-in,  but no more chars:
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$$.
+         s := s copyWith:$B.
+         s decodeFromJIS7
+
+     ending with a KANJI-in, followed by $3 (KO):
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$$.
+         s := s copyWith:$B.
+         s := s , '$3'.
+         s decodeFromJIS7
+
+     ending with a KANJI-in, followed by $3$l$OF| (KO RE HA NI):
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$$.
+         s := s copyWith:$B.
+         s := s , '$3$l$OF|'.
+         s decodeFromJIS7
+
+     a KO in between:
+
+         |s|
+         s := 'hello' copyWith:Character esc.
+         s := s copyWith:$$.
+         s := s copyWith:$B.
+         s := s , '$3'.
+         s := s copyWith:Character esc.
+         s := s copyWith:$(.
+         s := s copyWith:$B.
+         s := s , 'hello'.
+         s decodeFromJIS7
+
+     I dont know what that means ;-):
+
+         |s t l|
+         s := 'kterm ' copyWith:Character esc.
+         s := s copyWith:$$.
+         s := s copyWith:$B.
+         s := s , '$N4A;zC<Kv%(%_%e%l!!<%?'.
+         s := s copyWith:Character esc.
+         s := s copyWith:$(.
+         s := s copyWith:$B.
+         s := s , ' kterm'.
+         t := s decodeFromJIS7.
+         l := Label new.
+         l label:t.
+         l font:(Font family:'k14' face:nil style:nil size:nil).
+         l font:(Font family:'gothic' size:17).
+         l font:(Font family:'mincho' size:23).
+         l realize
+    "
+
+    "Created: 17.4.1996 / 16:11:57 / cg"
+    "Modified: 27.6.1997 / 23:38:20 / cg"
 !
 
 encodeIntoBIG5withRomans:aBIG5String
@@ -2501,27 +2663,39 @@
 !CharacterArray methodsFor:'copying'!
 
 , aStringOrCharacter
-    "redefined to allow characters to be appended.
+    "redefined to allow characters and mixed strings to be appended.
      This is nonStandard, but convenient"
 
+    |myWidth otherWidth|
+
     aStringOrCharacter isCharacter ifTrue:[
         ^ self , aStringOrCharacter asString
     ].
     aStringOrCharacter isText ifTrue:[
         ^ aStringOrCharacter concatenateFromString:self
     ].
+    aStringOrCharacter isString ifTrue:[
+        (otherWidth := aStringOrCharacter bitsPerCharacter) ~~ (myWidth := self bitsPerCharacter) ifTrue:[
+            otherWidth > myWidth ifTrue:[
+                ^ (aStringOrCharacter species fromString:self) , aStringOrCharacter
+            ].
+            ^ self , (self species fromString:aStringOrCharacter)
+        ].
+    ].
     ^ super , aStringOrCharacter
 
     "
      'hello' , $1    
      'hello' , '1'   
      'hello' , (' world' asText allBold) 
+     'hello' , (JISEncodedString fromString:' world') 
+     (JISEncodedString fromString:'hello') , ' world'
 
      Transcript showCR:
          (Text string:'hello' emphasis:#italic) , (Text string:' world' emphasis:#bold) 
     "
 
-    "Modified: 18.5.1996 / 12:29:30 / cg"
+    "Modified: 28.6.1997 / 00:13:17 / cg"
 !
 
 concatenate:string1 and:string2
@@ -2605,6 +2779,7 @@
      (and those are untested/incomplete):
         #euc
         #jis7
+        #shiftJis
         #mac
         #msdos
     "
@@ -2614,6 +2789,9 @@
     encodingSymbol == #jis7 ifTrue:[
         ^ self class decodeFromJIS7:self
     ].
+    encodingSymbol == #shiftJis ifTrue:[
+        ^ self class decodeFromShiftJIS:self
+    ].
     encodingSymbol == #euc ifTrue:[
         ^ self class decodeFromEUC:self
     ].
@@ -2630,7 +2808,7 @@
     ^ newString
 
     "Created: 22.2.1996 / 15:06:49 / cg"
-    "Modified: 17.4.1996 / 18:25:17 / cg"
+    "Modified: 27.6.1997 / 22:44:59 / cg"
 !
 
 encodeInto:encodingSymbol
@@ -4273,5 +4451,5 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.111 1997-06-17 16:06:04 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.112 1997-06-27 22:13:47 cg Exp $'
 ! !