--- a/CharacterArray.st Thu Jun 26 11:47:32 1997 +0200
+++ b/CharacterArray.st Sat Jun 28 00:13:47 1997 +0200
@@ -589,6 +589,7 @@
'NEXT (8 bit)'
'EUC (extended unix code japanese)'
'JIS7 (jis 7bit escape codes japanese)'
+ 'SJIS (shift jis 8bit codes japanese)'
'GB (mainland china)'
'BIG5 (taiwan)'
'KSC (korean)'
@@ -609,6 +610,7 @@
#'next'
#'euc'
#'jis7'
+ #'shiftJis'
#'gb'
#'big5'
#'ksc' "/ korean
@@ -616,7 +618,7 @@
)
"Created: 22.4.1996 / 14:39:39 / cg"
- "Modified: 17.3.1997 / 12:27:44 / cg"
+ "Modified: 27.6.1997 / 23:30:21 / cg"
! !
!CharacterArray class methodsFor:'encoding / decoding'!
@@ -783,9 +785,9 @@
b1 "{ Class: SmallInteger }"
b2 "{ Class: SmallInteger }"
val "{ Class: SmallInteger }"
- singleBytes romans|
-
- romans := JISEncodedString romanJISDecoderTable.
+ singleBytes "romans"|
+
+"/ romans := JISEncodedString romanJISDecoderTable.
sz := aString size.
newString := JISEncodedString new:sz.
@@ -959,7 +961,167 @@
"
"Created: 17.4.1996 / 16:11:57 / cg"
- "Modified: 17.6.1997 / 18:04:58 / cg"
+ "Modified: 28.6.1997 / 00:10:25 / cg"
+!
+
+decodeFromShiftJIS:aString
+ "return a new string containing the aStrings characters,
+ which are interpreted as a Shift-JIS encoded singleByte string.
+ The result is a JISEncodedString (you need a JIS font to display that ...)."
+
+ |newString char char2
+ sz "{ Class: SmallInteger }"
+ dstIdx "{ Class: SmallInteger }"
+ srcIdx "{ Class: SmallInteger }"
+ b1 "{ Class: SmallInteger }"
+ b2 "{ Class: SmallInteger }"
+ val "{ Class: SmallInteger }"
+ |
+
+ sz := aString size.
+ newString := JISEncodedString new:sz.
+ sz ~~ 0 ifTrue:[
+ dstIdx := 1.
+ srcIdx := 1.
+
+ [srcIdx <= sz] whileTrue:[
+ "/
+ "/ scan for next character in 129..159 or 224..239
+ "/
+ char := aString at:srcIdx.
+ srcIdx := srcIdx + 1.
+ b1 := char asciiValue.
+ (b1 >= 129 and:[srcIdx <= sz]) ifTrue:[
+ (b1 <= 159
+ or:[b1 >= 224 and:[b1 <= 239]]) ifTrue:[
+ char2 := aString at:srcIdx.
+ srcIdx := srcIdx + 1.
+ b2 := char2 asciiValue.
+ (b2 >= 64
+ and:[b2 <= 252
+ and:[b2 ~~ 127]]) ifTrue:[
+ |adjust rowOffs cellOffs|
+
+ adjust := (b2 < 159) ifTrue:[1] ifFalse:[0].
+ rowOffs := b1 < 160 ifTrue:[112] ifFalse:[176].
+ adjust == 1 ifTrue:[
+ cellOffs := 31 + (b2 > 127 ifTrue:[1] ifFalse:[0]).
+ ] ifFalse:[
+ cellOffs := 126.
+ ].
+ val := ((b1 - rowOffs) bitShift:1) - adjust.
+ val := val + ((b2 - cellOffs) bitShift:8).
+ newString at:dstIdx put:(Character value:val).
+ ] ifFalse:[
+ "/ mhmh - append untranslated
+
+ newString at:dstIdx put:char.
+ dstIdx := dstIdx + 1.
+ newString at:dstIdx put:char2.
+ ]
+ ] ifFalse:[
+ newString at:dstIdx put:char
+ ]
+ ] ifFalse:[
+ newString at:dstIdx put:char
+ ].
+ dstIdx := dstIdx + 1.
+ ].
+ newString := newString copyTo:dstIdx - 1.
+ ].
+
+ ^ newString
+
+ "simple:
+
+ 'hello' decodeFrom:#shiftJIS
+
+ '../../doc/online/japanese/TOP.html' asFilename contents asString
+ decodeFrom:#shiftJis
+
+ ending with a crippled escape:
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s decodeFromJIS7
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$A.
+ s decodeFromJIS7
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$$.
+ s decodeFromJIS7
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$$.
+ s := s copyWith:$A.
+ s decodeFromJIS7
+
+ ending with a KANJI-in, but no more chars:
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$$.
+ s := s copyWith:$B.
+ s decodeFromJIS7
+
+ ending with a KANJI-in, followed by $3 (KO):
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$$.
+ s := s copyWith:$B.
+ s := s , '$3'.
+ s decodeFromJIS7
+
+ ending with a KANJI-in, followed by $3$l$OF| (KO RE HA NI):
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$$.
+ s := s copyWith:$B.
+ s := s , '$3$l$OF|'.
+ s decodeFromJIS7
+
+ a KO in between:
+
+ |s|
+ s := 'hello' copyWith:Character esc.
+ s := s copyWith:$$.
+ s := s copyWith:$B.
+ s := s , '$3'.
+ s := s copyWith:Character esc.
+ s := s copyWith:$(.
+ s := s copyWith:$B.
+ s := s , 'hello'.
+ s decodeFromJIS7
+
+ I dont know what that means ;-):
+
+ |s t l|
+ s := 'kterm ' copyWith:Character esc.
+ s := s copyWith:$$.
+ s := s copyWith:$B.
+ s := s , '$N4A;zC<Kv%(%_%e%l!!<%?'.
+ s := s copyWith:Character esc.
+ s := s copyWith:$(.
+ s := s copyWith:$B.
+ s := s , ' kterm'.
+ t := s decodeFromJIS7.
+ l := Label new.
+ l label:t.
+ l font:(Font family:'k14' face:nil style:nil size:nil).
+ l font:(Font family:'gothic' size:17).
+ l font:(Font family:'mincho' size:23).
+ l realize
+ "
+
+ "Created: 17.4.1996 / 16:11:57 / cg"
+ "Modified: 27.6.1997 / 23:38:20 / cg"
!
encodeIntoBIG5withRomans:aBIG5String
@@ -2501,27 +2663,39 @@
!CharacterArray methodsFor:'copying'!
, aStringOrCharacter
- "redefined to allow characters to be appended.
+ "redefined to allow characters and mixed strings to be appended.
This is nonStandard, but convenient"
+ |myWidth otherWidth|
+
aStringOrCharacter isCharacter ifTrue:[
^ self , aStringOrCharacter asString
].
aStringOrCharacter isText ifTrue:[
^ aStringOrCharacter concatenateFromString:self
].
+ aStringOrCharacter isString ifTrue:[
+ (otherWidth := aStringOrCharacter bitsPerCharacter) ~~ (myWidth := self bitsPerCharacter) ifTrue:[
+ otherWidth > myWidth ifTrue:[
+ ^ (aStringOrCharacter species fromString:self) , aStringOrCharacter
+ ].
+ ^ self , (self species fromString:aStringOrCharacter)
+ ].
+ ].
^ super , aStringOrCharacter
"
'hello' , $1
'hello' , '1'
'hello' , (' world' asText allBold)
+ 'hello' , (JISEncodedString fromString:' world')
+ (JISEncodedString fromString:'hello') , ' world'
Transcript showCR:
(Text string:'hello' emphasis:#italic) , (Text string:' world' emphasis:#bold)
"
- "Modified: 18.5.1996 / 12:29:30 / cg"
+ "Modified: 28.6.1997 / 00:13:17 / cg"
!
concatenate:string1 and:string2
@@ -2605,6 +2779,7 @@
(and those are untested/incomplete):
#euc
#jis7
+ #shiftJis
#mac
#msdos
"
@@ -2614,6 +2789,9 @@
encodingSymbol == #jis7 ifTrue:[
^ self class decodeFromJIS7:self
].
+ encodingSymbol == #shiftJis ifTrue:[
+ ^ self class decodeFromShiftJIS:self
+ ].
encodingSymbol == #euc ifTrue:[
^ self class decodeFromEUC:self
].
@@ -2630,7 +2808,7 @@
^ newString
"Created: 22.2.1996 / 15:06:49 / cg"
- "Modified: 17.4.1996 / 18:25:17 / cg"
+ "Modified: 27.6.1997 / 22:44:59 / cg"
!
encodeInto:encodingSymbol
@@ -4273,5 +4451,5 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.111 1997-06-17 16:06:04 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.112 1997-06-27 22:13:47 cg Exp $'
! !