--- a/CharArray.st Mon Apr 22 13:45:52 1996 +0200
+++ b/CharArray.st Mon Apr 22 15:48:18 1996 +0200
@@ -140,7 +140,7 @@
the internal (iso8859, ansi) format.
This is an experimental interface - unfinished"
- |table|
+ |table unrepresentableCharacterCode x80Table|
DecoderTables notNil ifTrue:[
table := DecoderTables at:encoding ifAbsent:nil.
@@ -150,17 +150,163 @@
].
+ unrepresentableCharacterCode := 16rBF.
+
+ "/
+ "/ setup as identityTranslation
+ "/
table := (0 to:255) asByteArray.
+
"/
"/ mac -> iso8859
"/
+ "/ You will loose the following characters:
+ "/ - all of them will be replaced by 8F (¿)
+ "/
+ "/ A0 (cross)
+ "/ A5 (dot-dot)
+ "/ AA (tm - trademark)
+ "/ AD (not equal)
+ "/ B0 (infinity)
+ "/ B3 (less-double-equal <=)
+ "/ B4 (greater-equal >-)
+ "/ B6 (math lowercase delta)
+ "/ B7 (math sum)
+ "/ B8 (math uppercase pi)
+ "/ B9 (math lowercase pi)
+ "/ BA (math integral)
+ "/ BD (math omega)
+ "/ C3 (math union)
+ "/ C4 (ext latin f)
+ "/ C5 (math almost equal)
+ "/ C6 (math uppercase delta)
+ "/ C9 (dot-dot-dot)
+ "/ CE (OE ligature)
+ "/ CF (oe ligature)
+ "/ D1 (hyphen)
+ "/ D2 (opening top dquote)
+ "/ D3 (closing top dquote)
+ "/ D4 (opening top quote)
+ "/ D5 (closing top quote)
+ "/ D7 (geometric: )
+ "/ D9 (uppercase Y diacrit )
+ "/ DA (slash2 )
+ "/ DC (single oldStyle opening quote (<) )
+ "/ DD (single oldStyle closing quote (>) )
+ "/ DE (? )
+ "/ DF (? )
+ "/ E0 (double cross )
+ "/ E2 (opening bottom quote )
+ "/ E3 (opening bottom dquote )
+ "/ F0 (? )
+ "/ F5 (latin l )
+ "/ F6 (diacrit circumflex)
+ "/ F7 (diacrit tilde)
+ "/ F8 (diacrit top line)
+ "/ F9 (diacrit inverse circumflex)
+ "/ FA (diacrit dot)
+ "/ FB (diacrit ring)
+ "/ FC (diacrit cedille left)
+ "/ FD (diacrit dquote)
+ "/ FE (diacrit cedille right)
+ "/ FF (diacrit circumflex2 ?)
+ "/
encoding == #mac ifTrue:[
- table at:8r232+1 put:246. "/ german umlaut o (mac: 154; 8859: 246)
- table at:8r212+1 put:228. "/ german umlaut a (mac: 138; 8859: 228)
- table at:8r237+1 put:252. "/ german umlaut u (mac: 159; 8859: 252)
- table at:8r206+1 put:220. "/ german umlaut U (mac: 134; 8859: 220)
- "/ more needed here - need info to do it ....
+ x80Table :=
+ #( 16r00c4 16r00c5 16r00c7 16r00c9 16r00d1 16r00d6 16r00dc 16r00e1 "/ 80
+ 16r00e0 16r00e2 16r00e4 16r00e3 16r00e5 16r00e7 16r00e9 16r00e8
+
+ 16r00ea 16r00eb 16r00ed 16r00ec 16r00ee 16r00ef 16r00f1 16r00f3 "/ 90
+ 16r00f2 16r00f4 16r00f6 16r00f5 16r00fa 16r00f9 16r00fb 16r00fc
+
+ 16rFFFF 16r00b0 16r00a2 16r00a3 16r00a7 16rFFFF 16r00b6 16r00df "/ a0
+ 16r00ae 16r00a9 16rFFFF 16r00b4 16r00a8 16rFFFF 16r00c6 16r00d8
+
+ 16rFFFF 16r00b1 16rFFFF 16rFFFF 16r00a5 16r00b5 16rFFFF 16rFFFF "/ b0
+ 16rFFFF 16rFFFF 16rFFFF 16r00aa 16r00ba 16rFFFF 16r00e6 16r00f8
+
+ 16r00bf 16r00a1 16r00ac 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00ab "/ c0
+ 16r00bb 16rFFFF 16r00a0 16r00c1 16r00c3 16r00d5 16rFFFF 16rFFFF
+
+ 16r00ad 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f7 16rFFFF "/ d0
+ 16rFFFF 16rFFFF 16rFFFF 16r00a4 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00b7 16rFFFF 16rFFFF 16rFFFF 16r00c2 16r00ca 16r00c1 "/ e0
+ 16r00cb 16r00c8 16r00cd 16r00ce 16r00cf 16r00cc 16r00d3 16r00d4
+
+ 16rFFFF 16r00d2 16r00da 16r00db 16r00d9 16rFFFF 16rFFFF 16rFFFF "/ f0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+ ).
].
+
+ "/
+ "/ next -> iso8859
+ "/
+ "/ You will loose the following characters:
+ "/ - all of them will be replaced by 8F (¿)
+ "/
+ "/ A4 (diacrit /)
+ "/ A9 (diacrit quote)
+ "/ AA (top opening dquote)
+ "/ AC (single oldStyle opening quote (<))
+ "/ AD (single oldStyle closing quote (>))
+ "/ AE (?)
+ "/ AF (?)
+ "/ B2 (cross)
+ "/ B3 (double cross)
+ "/ B7 (enter dot)
+ "/ B8 (opening bottom quote)
+ "/ B9 (opening bottom dquote)
+ "/ BA (closing top dquote)
+ "/ BC (dot-dot-dot)
+ "/ BD (per-mille)
+ "/ C0 (subscript 1)
+ "/ C1 (accent grave)
+ "/ C2 (accent degu)
+ "/ C3 (accent circonflex)
+ "/ C4 (accent tilde)
+ "/ C5 (accent line)
+ "/ C6 (accent circonflex reverse)
+ "/ C7 (accent dot)
+ "/ C8 (accent dot-dot)
+ "/ D0 (hline)
+ "/ E8 (engl. pound upper case L)
+ "/ EA (OE ligature)
+ "/ F5 (latin l)
+ "/ F8 (engl. pound lower case l)
+ "/ FE (?)
+ "/ FF (?)
+ "/
+ encoding == #next ifTrue:[
+ unrepresentableCharacterCode := 16rBF.
+
+ x80Table :=
+ #( 16r0080 16r00c0 16r00c1 16r00c2 16r00c3 16r00c4 16r00c5 16r00c7 "/ 80
+ 16r00c8 16r00c9 16r00ca 16r00cb 16r00cc 16r00cd 16r00ce 16r00cf
+
+ 16r00d0 16r00d1 16r00d2 16r00d3 16r00d4 16r00d5 16r00d6 16r00d9 "/ 90
+ 16r00da 16r00db 16r00dc 16r00dd 16r00de 16r00b5 16r00d7 16r00f7
+
+ 16r00a9 16r00a1 16r00a2 16r00a3 16rFFFF 16r00a5 16rFFFF 16r00a7 "/ a0
+ 16r00a4 16rFFFF 16rFFFF 16r00ab 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16r00ae 16r00ad 16rFFFF 16rFFFF 16r00b7 16r00a6 16r00b6 16rFFFF "/ b0
+ 16rFFFF 16rFFFF 16rFFFF 16r00bb 16rFFFF 16rFFFF 16r00ac 16r00bf
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ c0
+ 16rFFFF 16r00b2 16r00b0 16r00b8 16r00b3 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00b1 16r00bc 16r00bd 16r00be 16r00e0 16r00e1 16r00e2 "/ d0
+ 16r00e3 16r00e4 16r00e5 16r00e7 16r00e8 16r00e9 16r00ea 16r00eb
+
+ 16r00ec 16r00c6 16r00ed 16r00aa 16r00ee 16r00ef 16r00f0 16r00f1 "/ e0
+ 16rFFFF 16r00d8 16rFFFF 16r00ba 16r00f2 16r00f3 16r00f4 16r00f5
+
+ 16r00f6 16r00e6 16r00f9 16r00fa 16r00fb 16rFFFF 16r00fc 16r00fd "/ f0
+ 16rFFFF 16r00f8 16rFFFF 16r00df 16r00fe 16r00ff 16rFFFF 16rFFFF
+ ).
+ ].
+
"/
"/ postscript -> iso8859
"/
@@ -173,27 +319,81 @@
table at:8r232+1 put:220. "/ german umlaut U (ps: 154; 8859: 220)
"/ more needed here - need info to do it ....
].
+
"/
- "/ msdos -> iso8859
+ "/ msdos (codePage 437) -> iso8859
+ "/
+ "/
+ "/ You will loose the following characters:
+ "/ - all of them will be replaced by 8F (¿)
+ "/
+ "/ 9E (pesetas)
+ "/ 9F (latin f)
+ "/ A9 (technical not)
+ "/ B0 - BF (block graphic)
+ "/ C0 - CF (block graphic)
+ "/ D0 - DF (block graphic)
+ "/ E0 (greek alpha)
+ "/ E2 - E5 (greek)
+ "/ E7 - EC (greek & math)
+ "/ EE - EF (greek & math)
+ "/ F0 (math)
+ "/ F2 - F5 (math & technical)
+ "/ F7 (math)
+ "/ F9 (center dot)
+ "/ FB (math sqrt)
+ "/ FC (super n)
+ "/ FE (block)
"/
encoding == #msdos ifTrue:[
- table at:16r82+1 put:16re9. "/ french e degu (dos: 82; 8859: e9)
- table at:16r8a+1 put:16re8. "/ french e graph (dos: 8a; 8859: e8)
- table at:16r88+1 put:16rea. "/ french e circ. (dos: 88; 8859: ea)
- table at:16r8c+1 put:16ree. "/ french u circ. (dos: 8c; 8859: ee)
- table at:16r87+1 put:16re7. "/ french c cedille (dos: 87; 8859: e7)
- table at:16r85+1 put:16re0. "/ french a degu (dos: 85; 8859: e0)
-
-"/ table at:8r224+1 put:246. "/ german umlaut o (ps: 148; 8859: 246)
-"/ table at:8r204+1 put:228. "/ german umlaut a (ps: 132; 8859: 228)
-"/ table at:8r201+1 put:252. "/ german umlaut u (ps: 129; 8859: 252)
-"/ table at:8r231+1 put:214. "/ german umlaut O (ps: 153; 8859: 214)
-"/ table at:8r216+1 put:196. "/ german umlaut A (ps: 142; 8859: 196)
-"/ table at:8r232+1 put:220. "/ german umlaut U (ps: 154; 8859: 220)
- "/ more needed here - need info to do it ....
+ unrepresentableCharacterCode := 16rBF.
+
+ x80Table :=
+ #( 16r00c7 16r00fc 16r00e9 16r00e2 16r00e4 16r00e0 16r00e5 16r00e7 "/ 80
+ 16r00ea 16r00eb 16r00e8 16r00ef 16r00ee 16r00ec 16r00c4 16r00c5
+
+ 16r00c9 16r00e6 16r00c6 16r00f4 16r00f6 16r00f2 16r00fb 16r00f9 "/ 90
+ 16r00ff 16r00d6 16r00dc 16r00a2 16r00a3 16r00a5 16rFFFF 16rFFFF
+
+ 16r00e1 16r00ed 16r00f3 16r00fa 16r00f1 16r00d1 16r00aa 16r00da "/ a0
+ 16r00bf 16rFFFF 16r00ac 16r00bd 16r00bc 16r00a1 16r00ab 16r00bb
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ b0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ c0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ d0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00df 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00b5 16rFFFF "/ e0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f8 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00b1 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f7 16rFFFF "/ f0
+ 16r00b0 16rFFFF 16r00b7 16rFFFF 16rFFFF 16r00b2 16rFFFF 16r00a0
+ ).
].
"/ more encodings needed here ....
+
+ "/
+ "/ x80Table should contain the translation for
+ "/ 0x80..0xFF characters
+ "/
+ x80Table notNil ifTrue:[
+ x80Table keysAndValuesDo:[:idx :repl |
+ |ch|
+
+ repl == 16rFFFF ifTrue:[
+ ch := unrepresentableCharacterCode
+ ] ifFalse:[
+ ch := repl
+ ].
+ table at:(16r80 + idx) put:ch
+ ].
+ ].
+
DecoderTables at:encoding put:table.
^ table
@@ -202,7 +402,7 @@
"
"Created: 20.10.1995 / 23:04:43 / cg"
- "Modified: 22.2.1996 / 17:19:38 / cg"
+ "Modified: 22.4.1996 / 15:30:10 / cg"
!
encoderTableFor:encoding
@@ -210,7 +410,8 @@
into an alien encoding.
This is an experimental interface - unfinished"
- |table|
+ |table decoderTable unrepresentableCharacterCode
+ unrepresentableISOCharacterCode x80Table|
EncoderTables notNil ifTrue:[
table := EncoderTables at:encoding ifAbsent:nil.
@@ -219,61 +420,110 @@
EncoderTables := IdentityDictionary new
].
- table := (0 to:255) asByteArray.
"/
"/ iso8859 -> mac
"/
encoding == #mac ifTrue:[
- table at:246+1 put:8r232. "/ german umlaut o (mac: 154; 8859: 246)
- table at:228+1 put:8r212. "/ german umlaut a (mac: 138; 8859: 228)
- table at:252+1 put:8r237. "/ german umlaut u (mac: 159; 8859: 252)
- table at:220+1 put:8r206. "/ german umlaut U (mac: 134; 8859: 220)
- "/ more needed here - need info to do it ....
+ decoderTable := self decoderTableFor:encoding.
+ unrepresentableCharacterCode := 16rFF.
+ unrepresentableISOCharacterCode := 16rBF.
].
"/
"/ iso8859 -> msdos
"/
encoding == #msdos ifTrue:[
- table at:233+1 put:8r202. "/ french e accent degu (dos: 130; 8859: 233)
- table at:232+1 put:8r212. "/ french e accent graph (dos: 138; 8859: 232)
- table at:234+1 put:8r210. "/ french e accent circ. (dos: 136; 8859: 234)
- table at:238+1 put:8r214. "/ french u accent circ. (dos: 140; 8859: 238)
- table at:231+1 put:8r207. "/ french c cedille (dos: 135; 8859: 231)
-
- table at:16re9+1 put:16r82. "/ french e degu (dos: 82; 8859: e9)
- table at:16re8+1 put:16r8a. "/ french e graph (dos: 8a; 8859: e8)
- table at:16rea+1 put:16r88. "/ french e circ. (dos: 88; 8859: ea)
- table at:16ree+1 put:16r8c. "/ french u circ. (dos: 8c; 8859: ee)
- table at:16re7+1 put:16r87. "/ french c cedille (dos: 87; 8859: e7)
- table at:16re0+1 put:16r85. "/ french a degu (dos: 85; 8859: e0)
- "/ more needed here - need info to do it ....
+ decoderTable := self decoderTableFor:encoding.
+ unrepresentableCharacterCode := 16rFF.
+ unrepresentableISOCharacterCode := 16rBF.
].
+
"/ more encodings needed here ....
+ decoderTable notNil ifTrue:[
+ table := Array new:256 withAll:unrepresentableCharacterCode.
+
+ 0 to:16rFF do:[:code |
+ |isoCode destIdex|
+
+ isoCode := decoderTable at:(code+1).
+ table at:isoCode+1 put:code.
+ ]
+ ].
+
+ table isNil ifTrue:[
+ "/
+ "/ setup as identityTranslation
+ "/
+ table := (0 to:255) asByteArray.
+ ].
EncoderTables at:encoding put:table.
^ table
"
EncoderTables := nil
+ DecoderTables := nil
+ CharacterArray decoderTableFor:#mac
+ CharacterArray encoderTableFor:#mac
"
"Created: 22.2.1996 / 16:17:58 / cg"
- "Modified: 22.2.1996 / 17:21:13 / cg"
+ "Modified: 22.4.1996 / 15:42:22 / cg"
!
supportedEncodings
- "/ the first row gives the external (names);
- "/ the second the internally used symbol.
- "/ you may map more than one external name onto the same internal
+ "return an array containing symbolic names of supported encodings.
+ These are internally visible supported ones only"
^ #(
- ('DOS' 'MAC' 'EUC' 'JIS' 'JIS7' 'GB' 'BIG5')
- (msdos mac euc jis jis7 gb big5)
+ (
+ iso8859
+ ascii
+ msdos
+ mac
+ next
+ euc
+ jis
+ jis7
+ gb
+ big5)
)
"Created: 22.2.1996 / 16:19:20 / cg"
- "Modified: 17.4.1996 / 16:18:28 / cg"
+ "Modified: 22.4.1996 / 14:43:50 / cg"
+!
+
+supportedExternalEncodings
+ "return an array of two arrays containing the names of supported
+ encodings which are supported for external resources (i.e. files).
+ The first array contains user-readable strings (descriptions),
+ the second contains the internally used symbolic names.
+ More than one external name may be mapped onto the same symbolic."
+
+ ^ #(
+ (
+ 'iso8859 (ansi)'
+ 'msdos (codepage 437)'
+ 'macintosh'
+ 'NEXT'
+ 'EUC (extended unix code)'
+ 'JIS7 (jis 7bit escape codes)'
+ 'GB (mainland china hanzi)'
+ 'BIG5 (taiwan hanzi)'
+ )
+ (
+ iso8859
+ msdos
+ mac
+ next
+ euc
+ jis7
+ gb
+ big5)
+ )
+
+ "Created: 22.4.1996 / 14:39:39 / cg"
+ "Modified: 22.4.1996 / 14:49:34 / cg"
! !
!CharacterArray class methodsFor:'encoding / decoding'!
@@ -3404,5 +3654,5 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.73 1996-04-20 21:16:54 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.74 1996-04-22 13:48:18 cg Exp $'
! !
--- a/CharacterArray.st Mon Apr 22 13:45:52 1996 +0200
+++ b/CharacterArray.st Mon Apr 22 15:48:18 1996 +0200
@@ -140,7 +140,7 @@
the internal (iso8859, ansi) format.
This is an experimental interface - unfinished"
- |table|
+ |table unrepresentableCharacterCode x80Table|
DecoderTables notNil ifTrue:[
table := DecoderTables at:encoding ifAbsent:nil.
@@ -150,17 +150,163 @@
].
+ unrepresentableCharacterCode := 16rBF.
+
+ "/
+ "/ setup as identityTranslation
+ "/
table := (0 to:255) asByteArray.
+
"/
"/ mac -> iso8859
"/
+ "/ You will loose the following characters:
+ "/ - all of them will be replaced by 8F (¿)
+ "/
+ "/ A0 (cross)
+ "/ A5 (dot-dot)
+ "/ AA (tm - trademark)
+ "/ AD (not equal)
+ "/ B0 (infinity)
+ "/ B3 (less-double-equal <=)
+ "/ B4 (greater-equal >-)
+ "/ B6 (math lowercase delta)
+ "/ B7 (math sum)
+ "/ B8 (math uppercase pi)
+ "/ B9 (math lowercase pi)
+ "/ BA (math integral)
+ "/ BD (math omega)
+ "/ C3 (math union)
+ "/ C4 (ext latin f)
+ "/ C5 (math almost equal)
+ "/ C6 (math uppercase delta)
+ "/ C9 (dot-dot-dot)
+ "/ CE (OE ligature)
+ "/ CF (oe ligature)
+ "/ D1 (hyphen)
+ "/ D2 (opening top dquote)
+ "/ D3 (closing top dquote)
+ "/ D4 (opening top quote)
+ "/ D5 (closing top quote)
+ "/ D7 (geometric: )
+ "/ D9 (uppercase Y diacrit )
+ "/ DA (slash2 )
+ "/ DC (single oldStyle opening quote (<) )
+ "/ DD (single oldStyle closing quote (>) )
+ "/ DE (? )
+ "/ DF (? )
+ "/ E0 (double cross )
+ "/ E2 (opening bottom quote )
+ "/ E3 (opening bottom dquote )
+ "/ F0 (? )
+ "/ F5 (latin l )
+ "/ F6 (diacrit circumflex)
+ "/ F7 (diacrit tilde)
+ "/ F8 (diacrit top line)
+ "/ F9 (diacrit inverse circumflex)
+ "/ FA (diacrit dot)
+ "/ FB (diacrit ring)
+ "/ FC (diacrit cedille left)
+ "/ FD (diacrit dquote)
+ "/ FE (diacrit cedille right)
+ "/ FF (diacrit circumflex2 ?)
+ "/
encoding == #mac ifTrue:[
- table at:8r232+1 put:246. "/ german umlaut o (mac: 154; 8859: 246)
- table at:8r212+1 put:228. "/ german umlaut a (mac: 138; 8859: 228)
- table at:8r237+1 put:252. "/ german umlaut u (mac: 159; 8859: 252)
- table at:8r206+1 put:220. "/ german umlaut U (mac: 134; 8859: 220)
- "/ more needed here - need info to do it ....
+ x80Table :=
+ #( 16r00c4 16r00c5 16r00c7 16r00c9 16r00d1 16r00d6 16r00dc 16r00e1 "/ 80
+ 16r00e0 16r00e2 16r00e4 16r00e3 16r00e5 16r00e7 16r00e9 16r00e8
+
+ 16r00ea 16r00eb 16r00ed 16r00ec 16r00ee 16r00ef 16r00f1 16r00f3 "/ 90
+ 16r00f2 16r00f4 16r00f6 16r00f5 16r00fa 16r00f9 16r00fb 16r00fc
+
+ 16rFFFF 16r00b0 16r00a2 16r00a3 16r00a7 16rFFFF 16r00b6 16r00df "/ a0
+ 16r00ae 16r00a9 16rFFFF 16r00b4 16r00a8 16rFFFF 16r00c6 16r00d8
+
+ 16rFFFF 16r00b1 16rFFFF 16rFFFF 16r00a5 16r00b5 16rFFFF 16rFFFF "/ b0
+ 16rFFFF 16rFFFF 16rFFFF 16r00aa 16r00ba 16rFFFF 16r00e6 16r00f8
+
+ 16r00bf 16r00a1 16r00ac 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00ab "/ c0
+ 16r00bb 16rFFFF 16r00a0 16r00c1 16r00c3 16r00d5 16rFFFF 16rFFFF
+
+ 16r00ad 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f7 16rFFFF "/ d0
+ 16rFFFF 16rFFFF 16rFFFF 16r00a4 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00b7 16rFFFF 16rFFFF 16rFFFF 16r00c2 16r00ca 16r00c1 "/ e0
+ 16r00cb 16r00c8 16r00cd 16r00ce 16r00cf 16r00cc 16r00d3 16r00d4
+
+ 16rFFFF 16r00d2 16r00da 16r00db 16r00d9 16rFFFF 16rFFFF 16rFFFF "/ f0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+ ).
].
+
+ "/
+ "/ next -> iso8859
+ "/
+ "/ You will loose the following characters:
+ "/ - all of them will be replaced by 8F (¿)
+ "/
+ "/ A4 (diacrit /)
+ "/ A9 (diacrit quote)
+ "/ AA (top opening dquote)
+ "/ AC (single oldStyle opening quote (<))
+ "/ AD (single oldStyle closing quote (>))
+ "/ AE (?)
+ "/ AF (?)
+ "/ B2 (cross)
+ "/ B3 (double cross)
+ "/ B7 (enter dot)
+ "/ B8 (opening bottom quote)
+ "/ B9 (opening bottom dquote)
+ "/ BA (closing top dquote)
+ "/ BC (dot-dot-dot)
+ "/ BD (per-mille)
+ "/ C0 (subscript 1)
+ "/ C1 (accent grave)
+ "/ C2 (accent degu)
+ "/ C3 (accent circonflex)
+ "/ C4 (accent tilde)
+ "/ C5 (accent line)
+ "/ C6 (accent circonflex reverse)
+ "/ C7 (accent dot)
+ "/ C8 (accent dot-dot)
+ "/ D0 (hline)
+ "/ E8 (engl. pound upper case L)
+ "/ EA (OE ligature)
+ "/ F5 (latin l)
+ "/ F8 (engl. pound lower case l)
+ "/ FE (?)
+ "/ FF (?)
+ "/
+ encoding == #next ifTrue:[
+ unrepresentableCharacterCode := 16rBF.
+
+ x80Table :=
+ #( 16r0080 16r00c0 16r00c1 16r00c2 16r00c3 16r00c4 16r00c5 16r00c7 "/ 80
+ 16r00c8 16r00c9 16r00ca 16r00cb 16r00cc 16r00cd 16r00ce 16r00cf
+
+ 16r00d0 16r00d1 16r00d2 16r00d3 16r00d4 16r00d5 16r00d6 16r00d9 "/ 90
+ 16r00da 16r00db 16r00dc 16r00dd 16r00de 16r00b5 16r00d7 16r00f7
+
+ 16r00a9 16r00a1 16r00a2 16r00a3 16rFFFF 16r00a5 16rFFFF 16r00a7 "/ a0
+ 16r00a4 16rFFFF 16rFFFF 16r00ab 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16r00ae 16r00ad 16rFFFF 16rFFFF 16r00b7 16r00a6 16r00b6 16rFFFF "/ b0
+ 16rFFFF 16rFFFF 16rFFFF 16r00bb 16rFFFF 16rFFFF 16r00ac 16r00bf
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ c0
+ 16rFFFF 16r00b2 16r00b0 16r00b8 16r00b3 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00b1 16r00bc 16r00bd 16r00be 16r00e0 16r00e1 16r00e2 "/ d0
+ 16r00e3 16r00e4 16r00e5 16r00e7 16r00e8 16r00e9 16r00ea 16r00eb
+
+ 16r00ec 16r00c6 16r00ed 16r00aa 16r00ee 16r00ef 16r00f0 16r00f1 "/ e0
+ 16rFFFF 16r00d8 16rFFFF 16r00ba 16r00f2 16r00f3 16r00f4 16r00f5
+
+ 16r00f6 16r00e6 16r00f9 16r00fa 16r00fb 16rFFFF 16r00fc 16r00fd "/ f0
+ 16rFFFF 16r00f8 16rFFFF 16r00df 16r00fe 16r00ff 16rFFFF 16rFFFF
+ ).
+ ].
+
"/
"/ postscript -> iso8859
"/
@@ -173,27 +319,81 @@
table at:8r232+1 put:220. "/ german umlaut U (ps: 154; 8859: 220)
"/ more needed here - need info to do it ....
].
+
"/
- "/ msdos -> iso8859
+ "/ msdos (codePage 437) -> iso8859
+ "/
+ "/
+ "/ You will loose the following characters:
+ "/ - all of them will be replaced by 8F (¿)
+ "/
+ "/ 9E (pesetas)
+ "/ 9F (latin f)
+ "/ A9 (technical not)
+ "/ B0 - BF (block graphic)
+ "/ C0 - CF (block graphic)
+ "/ D0 - DF (block graphic)
+ "/ E0 (greek alpha)
+ "/ E2 - E5 (greek)
+ "/ E7 - EC (greek & math)
+ "/ EE - EF (greek & math)
+ "/ F0 (math)
+ "/ F2 - F5 (math & technical)
+ "/ F7 (math)
+ "/ F9 (center dot)
+ "/ FB (math sqrt)
+ "/ FC (super n)
+ "/ FE (block)
"/
encoding == #msdos ifTrue:[
- table at:16r82+1 put:16re9. "/ french e degu (dos: 82; 8859: e9)
- table at:16r8a+1 put:16re8. "/ french e graph (dos: 8a; 8859: e8)
- table at:16r88+1 put:16rea. "/ french e circ. (dos: 88; 8859: ea)
- table at:16r8c+1 put:16ree. "/ french u circ. (dos: 8c; 8859: ee)
- table at:16r87+1 put:16re7. "/ french c cedille (dos: 87; 8859: e7)
- table at:16r85+1 put:16re0. "/ french a degu (dos: 85; 8859: e0)
-
-"/ table at:8r224+1 put:246. "/ german umlaut o (ps: 148; 8859: 246)
-"/ table at:8r204+1 put:228. "/ german umlaut a (ps: 132; 8859: 228)
-"/ table at:8r201+1 put:252. "/ german umlaut u (ps: 129; 8859: 252)
-"/ table at:8r231+1 put:214. "/ german umlaut O (ps: 153; 8859: 214)
-"/ table at:8r216+1 put:196. "/ german umlaut A (ps: 142; 8859: 196)
-"/ table at:8r232+1 put:220. "/ german umlaut U (ps: 154; 8859: 220)
- "/ more needed here - need info to do it ....
+ unrepresentableCharacterCode := 16rBF.
+
+ x80Table :=
+ #( 16r00c7 16r00fc 16r00e9 16r00e2 16r00e4 16r00e0 16r00e5 16r00e7 "/ 80
+ 16r00ea 16r00eb 16r00e8 16r00ef 16r00ee 16r00ec 16r00c4 16r00c5
+
+ 16r00c9 16r00e6 16r00c6 16r00f4 16r00f6 16r00f2 16r00fb 16r00f9 "/ 90
+ 16r00ff 16r00d6 16r00dc 16r00a2 16r00a3 16r00a5 16rFFFF 16rFFFF
+
+ 16r00e1 16r00ed 16r00f3 16r00fa 16r00f1 16r00d1 16r00aa 16r00da "/ a0
+ 16r00bf 16rFFFF 16r00ac 16r00bd 16r00bc 16r00a1 16r00ab 16r00bb
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ b0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ c0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF "/ d0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00df 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00b5 16rFFFF "/ e0
+ 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f8 16rFFFF 16rFFFF
+
+ 16rFFFF 16r00b1 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f7 16rFFFF "/ f0
+ 16r00b0 16rFFFF 16r00b7 16rFFFF 16rFFFF 16r00b2 16rFFFF 16r00a0
+ ).
].
"/ more encodings needed here ....
+
+ "/
+ "/ x80Table should contain the translation for
+ "/ 0x80..0xFF characters
+ "/
+ x80Table notNil ifTrue:[
+ x80Table keysAndValuesDo:[:idx :repl |
+ |ch|
+
+ repl == 16rFFFF ifTrue:[
+ ch := unrepresentableCharacterCode
+ ] ifFalse:[
+ ch := repl
+ ].
+ table at:(16r80 + idx) put:ch
+ ].
+ ].
+
DecoderTables at:encoding put:table.
^ table
@@ -202,7 +402,7 @@
"
"Created: 20.10.1995 / 23:04:43 / cg"
- "Modified: 22.2.1996 / 17:19:38 / cg"
+ "Modified: 22.4.1996 / 15:30:10 / cg"
!
encoderTableFor:encoding
@@ -210,7 +410,8 @@
into an alien encoding.
This is an experimental interface - unfinished"
- |table|
+ |table decoderTable unrepresentableCharacterCode
+ unrepresentableISOCharacterCode x80Table|
EncoderTables notNil ifTrue:[
table := EncoderTables at:encoding ifAbsent:nil.
@@ -219,61 +420,110 @@
EncoderTables := IdentityDictionary new
].
- table := (0 to:255) asByteArray.
"/
"/ iso8859 -> mac
"/
encoding == #mac ifTrue:[
- table at:246+1 put:8r232. "/ german umlaut o (mac: 154; 8859: 246)
- table at:228+1 put:8r212. "/ german umlaut a (mac: 138; 8859: 228)
- table at:252+1 put:8r237. "/ german umlaut u (mac: 159; 8859: 252)
- table at:220+1 put:8r206. "/ german umlaut U (mac: 134; 8859: 220)
- "/ more needed here - need info to do it ....
+ decoderTable := self decoderTableFor:encoding.
+ unrepresentableCharacterCode := 16rFF.
+ unrepresentableISOCharacterCode := 16rBF.
].
"/
"/ iso8859 -> msdos
"/
encoding == #msdos ifTrue:[
- table at:233+1 put:8r202. "/ french e accent degu (dos: 130; 8859: 233)
- table at:232+1 put:8r212. "/ french e accent graph (dos: 138; 8859: 232)
- table at:234+1 put:8r210. "/ french e accent circ. (dos: 136; 8859: 234)
- table at:238+1 put:8r214. "/ french u accent circ. (dos: 140; 8859: 238)
- table at:231+1 put:8r207. "/ french c cedille (dos: 135; 8859: 231)
-
- table at:16re9+1 put:16r82. "/ french e degu (dos: 82; 8859: e9)
- table at:16re8+1 put:16r8a. "/ french e graph (dos: 8a; 8859: e8)
- table at:16rea+1 put:16r88. "/ french e circ. (dos: 88; 8859: ea)
- table at:16ree+1 put:16r8c. "/ french u circ. (dos: 8c; 8859: ee)
- table at:16re7+1 put:16r87. "/ french c cedille (dos: 87; 8859: e7)
- table at:16re0+1 put:16r85. "/ french a degu (dos: 85; 8859: e0)
- "/ more needed here - need info to do it ....
+ decoderTable := self decoderTableFor:encoding.
+ unrepresentableCharacterCode := 16rFF.
+ unrepresentableISOCharacterCode := 16rBF.
].
+
"/ more encodings needed here ....
+ decoderTable notNil ifTrue:[
+ table := Array new:256 withAll:unrepresentableCharacterCode.
+
+ 0 to:16rFF do:[:code |
+ |isoCode destIdex|
+
+ isoCode := decoderTable at:(code+1).
+ table at:isoCode+1 put:code.
+ ]
+ ].
+
+ table isNil ifTrue:[
+ "/
+ "/ setup as identityTranslation
+ "/
+ table := (0 to:255) asByteArray.
+ ].
EncoderTables at:encoding put:table.
^ table
"
EncoderTables := nil
+ DecoderTables := nil
+ CharacterArray decoderTableFor:#mac
+ CharacterArray encoderTableFor:#mac
"
"Created: 22.2.1996 / 16:17:58 / cg"
- "Modified: 22.2.1996 / 17:21:13 / cg"
+ "Modified: 22.4.1996 / 15:42:22 / cg"
!
supportedEncodings
- "/ the first row gives the external (names);
- "/ the second the internally used symbol.
- "/ you may map more than one external name onto the same internal
+ "return an array containing symbolic names of supported encodings.
+ These are internally visible supported ones only"
^ #(
- ('DOS' 'MAC' 'EUC' 'JIS' 'JIS7' 'GB' 'BIG5')
- (msdos mac euc jis jis7 gb big5)
+ (
+ iso8859
+ ascii
+ msdos
+ mac
+ next
+ euc
+ jis
+ jis7
+ gb
+ big5)
)
"Created: 22.2.1996 / 16:19:20 / cg"
- "Modified: 17.4.1996 / 16:18:28 / cg"
+ "Modified: 22.4.1996 / 14:43:50 / cg"
+!
+
+supportedExternalEncodings
+ "return an array of two arrays containing the names of supported
+ encodings which are supported for external resources (i.e. files).
+ The first array contains user-readable strings (descriptions),
+ the second contains the internally used symbolic names.
+ More than one external name may be mapped onto the same symbolic."
+
+ ^ #(
+ (
+ 'iso8859 (ansi)'
+ 'msdos (codepage 437)'
+ 'macintosh'
+ 'NEXT'
+ 'EUC (extended unix code)'
+ 'JIS7 (jis 7bit escape codes)'
+ 'GB (mainland china hanzi)'
+ 'BIG5 (taiwan hanzi)'
+ )
+ (
+ iso8859
+ msdos
+ mac
+ next
+ euc
+ jis7
+ gb
+ big5)
+ )
+
+ "Created: 22.4.1996 / 14:39:39 / cg"
+ "Modified: 22.4.1996 / 14:49:34 / cg"
! !
!CharacterArray class methodsFor:'encoding / decoding'!
@@ -3404,5 +3654,5 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.73 1996-04-20 21:16:54 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.74 1996-04-22 13:48:18 cg Exp $'
! !