CharacterEncoder.st
author Stefan Vogel <sv@exept.de>
Tue, 28 Apr 2020 16:21:34 +0200
changeset 25373 f030619565e1
parent 25340 9230ffff3935
permissions -rw-r--r--
#REFACTORING by stefan class: ArrayedCollection class changed: #with:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
     1
"{ Encoding: utf8 }"
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
     2
8048
293c8178c6eb utf8 errors
Claus Gittinger <cg@exept.de>
parents: 8033
diff changeset
     3
"
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
     4
 COPYRIGHT (c) 2004 by eXept Software AG
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
     5
              All Rights Reserved
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
     6
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
     7
 This software is furnished under a license and may be used
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
     8
 only in accordance with the terms of that license and with the
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
     9
 inclusion of the above copyright notice.   This software may not
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    10
 be provided or otherwise made available to, or used by, any
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    11
 other person.  No title to or ownership of the software is
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    12
 hereby transferred.
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    13
"
8114
05274a80fcc4 separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents: 8105
diff changeset
    14
"{ Package: 'stx:libbasic' }"
05274a80fcc4 separated implementation into dynamically (lazy) loaded classes
Claus Gittinger <cg@exept.de>
parents: 8105
diff changeset
    15
17491
6462f81e6623 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17118
diff changeset
    16
"{ NameSpace: Smalltalk }"
6462f81e6623 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17118
diff changeset
    17
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
    18
Object subclass:#CharacterEncoder
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    19
	instanceVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    20
	classVariableNames:'AccessLock CachedEncoders EncoderClassesByName EncodersByName
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    21
		EncodingDetectors Jis7KanjiEscapeSequence
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    22
		Jis7KanjiOldEscapeSequence Jis7RomanEscapeSequence
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    23
		JisISO2022EscapeSequence NullEncoderInstance'
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    24
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    25
	category:'Collections-Text-Encodings'
7969
1c252e9cf79c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7967
diff changeset
    26
!
1c252e9cf79c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7967
diff changeset
    27
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
    28
CharacterEncoder subclass:#CompoundEncoder
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    29
	instanceVariableNames:'decoder encoder'
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    30
	classVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    31
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    32
	privateIn:CharacterEncoder
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    33
!
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    34
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
    35
CharacterEncoder subclass:#NullEncoder
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    36
	instanceVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    37
	classVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    38
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    39
	privateIn:CharacterEncoder
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
    40
!
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
    41
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
    42
CharacterEncoder subclass:#InverseEncoder
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    43
	instanceVariableNames:'decoder readAhead'
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    44
	classVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    45
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    46
	privateIn:CharacterEncoder
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    47
!
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    48
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
    49
CharacterEncoder::NullEncoder subclass:#DefaultEncoder
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    50
	instanceVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    51
	classVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    52
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    53
	privateIn:CharacterEncoder
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    54
!
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    55
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
CharacterEncoder subclass:#OtherEncoding
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    57
	instanceVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    58
	classVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    59
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    60
	privateIn:CharacterEncoder
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    61
!
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
    62
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
    63
CharacterEncoder subclass:#TwoStepEncoder
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    64
	instanceVariableNames:'encoder1 encoder2'
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    65
	classVariableNames:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    66
	poolDictionaries:''
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
    67
	privateIn:CharacterEncoder
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
    68
!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
    69
7893
80df105ac17c checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7892
diff changeset
    70
!CharacterEncoder class methodsFor:'documentation'!
80df105ac17c checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7892
diff changeset
    71
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    72
copyright
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    73
"
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    74
 COPYRIGHT (c) 2004 by eXept Software AG
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
    75
              All Rights Reserved
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    76
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    77
 This software is furnished under a license and may be used
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    78
 only in accordance with the terms of that license and with the
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    79
 inclusion of the above copyright notice.   This software may not
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    80
 be provided or otherwise made available to, or used by, any
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    81
 other person.  No title to or ownership of the software is
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    82
 hereby transferred.
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    83
"
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    84
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    85
7893
80df105ac17c checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7892
diff changeset
    86
documentation
80df105ac17c checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7892
diff changeset
    87
"
22397
38c1f93a3dd1 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22348
diff changeset
    88
    please read howToAddMoreCoders.
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    89
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    90
    Character mappings are based on information in character maps found at either:
8226
81d95cffe5be *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8214
diff changeset
    91
        http://std.dkuug.dk/i18n/charmaps
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    92
    or:
8226
81d95cffe5be *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8214
diff changeset
    93
        http://www.unicode.org/Public/MAPPINGS
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    94
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
    95
    No Warranty.
8226
81d95cffe5be *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8214
diff changeset
    96
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
    97
    All the ISO 8859 codesets include ASCII as a proper codeset within them:
8226
81d95cffe5be *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8214
diff changeset
    98
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
    99
    ISO-8859-1: Latin 1 - Western European Languages.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   100
    ISO-8859-2: Latin 2 - Eastern European Languages.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   101
    ISO-8859-3: Latin 3 - Afrikaans, Catalan, Dutch, English, Esperanto, German,
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   102
                          Italian, Maltese, Spanish and Turkish.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   103
    ISO-8859-4: Latin 4 - Danish, English, Estonian, Finnish, German, Greenlandic, Lappish and Latvian.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   104
    ISO-8859-5: Latin/Cyrillic - Bulgarian, Byelorussian, English, Macedonian, Russian, Serbo-Croat and Ukranian.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   105
    ISO-8859-6: Latin/Arabic - Arabic.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   106
    ISO-8859-7: Latin/Greek - Greek.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   107
    ISO-8859-8: Latin/Hebrew - Hebrew.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   108
    ISO-8859-9: Latin 5 - Danish, Dutch, English, Finnish, French, German, Irish, Italian,
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   109
                          Norwegian, Portuguese, Spanish, Swedish and Turkish.
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   110
    ISO-8859-10: Latin 6 - Danish, English, Estonian, Finnish, German, Greenlandic, Icelandic,
8226
81d95cffe5be *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8214
diff changeset
   111
                          Sami (Lappish), Latvian, Lithuanian, Norwegian, Faroese and Swedish.
8810
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
   112
    [author:]
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
   113
        Claus Gittinger
22397
38c1f93a3dd1 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22348
diff changeset
   114
38c1f93a3dd1 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22348
diff changeset
   115
    [see also:]
38c1f93a3dd1 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22348
diff changeset
   116
        EncodedStream
38c1f93a3dd1 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22348
diff changeset
   117
        Base64Coder
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   118
"
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   119
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   120
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   121
examples
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   122
"
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   123
                                                                        [exBegin]
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   124
    |s1 s2|
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   125
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   126
    s1 := 'hello'.
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   127
    s2 := CharacterEncoder encodeString:s1 from:#'iso8859-1' into:#'unicode'.
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   128
    s2
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   129
                                                                        [exEnd]
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   130
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   131
                                                                        [exBegin]
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   132
    |s1 s2|
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   133
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   134
    s1 := 'hello'.
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   135
    s2 := CharacterEncoder encodeString:s1 from:#'iso8859-1' into:#'iso8859-7'.
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   136
    s2
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   137
                                                                        [exEnd]
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   138
"
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   139
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   140
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   141
howToAddMoreCoders
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   142
"
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   143
    Coders can be hand-written or automagically generated via a mapping table.
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   144
    Examples for hand-written coders are UTF8_to_ISO10464 or JIS0208_to_JIS7.
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   145
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   146
    The table driven encode/decode methods can be generated from a character mapping document
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   147
    as found on the unicode consortium host
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   148
        (for example: 'http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT')
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   149
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   150
    or from the i18n character maps:
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   151
        (for example: 'http://std.dkuug.dk/i18n/charmaps/ISO-8859-1
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   152
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   153
    In order to add another coder (for example: for EBCDIC or ms-codePage 278),
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   154
    perform the following steps:
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   155
        - create a public subclass of CharacterEncoderImplementations::CharacterEncoderImplementation named (for example) CharacterEncoderImplementations::CP267.
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   156
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   157
        - define the mappingURL1_relativeName (if the table is found on 'www.unicode.org')
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   158
          or the mappingURL2_relativeName (if it is found on 'std.dkuug.dk') method, which
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   159
          should return the name of the tables file, relative to the top directory there
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   160
          (which is '.../Public/MAPPINGS' on www.unicode.org and '.../i18n/charmaops' on 'std.dkuug.dk'.
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   161
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   162
          In this example, the table from 'std.dkuug.dk' is used, and named 'EBCDIC-CP-FI' there.
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   163
16054
171c7f8b4547 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 15966
diff changeset
   164
        - generate code by evaluating (make sure that CharacterEncoderGenerator is loaded from stx:goodies):
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   165
            CharacterEncoder::CP267 generateCode
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   166
20227
4090cde0c345 #DOCUMENTATION by mawalch
mawalch
parents: 20004
diff changeset
   167
    That's all!!
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   168
7909
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   169
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   170
    The existing code was generated by:
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   171
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   172
        CharacterEncoder::SingleByteEncoder subclassesDo:[:cls | Transcript showCR:cls name. cls flushCode; generateCode ]
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   173
        CharacterEncoder::SingleByteEncoder subclassesDo:[:cls | cls allSubclassesDo:[:sub | Transcript showCR:sub name. sub flushCode; generateSubclassCode]]
7909
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   174
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   175
    or individually:
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   176
        CharacterEncoder::ASCII flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   177
        CharacterEncoder::ISO8859_1 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   178
        CharacterEncoder::ISO8859_2 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   179
        CharacterEncoder::ISO8859_3 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   180
        CharacterEncoder::ISO8859_4 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   181
        CharacterEncoder::ISO8859_5 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   182
        CharacterEncoder::ISO8859_6 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   183
        CharacterEncoder::ISO8859_7 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   184
        CharacterEncoder::ISO8859_8 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   185
        CharacterEncoder::ISO8859_9 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   186
        CharacterEncoder::ISO8859_10 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   187
        CharacterEncoder::ISO8859_11 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   188
        CharacterEncoder::ISO8859_13 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   189
        CharacterEncoder::ISO8859_14 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   190
        CharacterEncoder::ISO8859_15 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   191
        CharacterEncoder::ISO8859_16 flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   192
        CharacterEncoder::KOI8_R flushCode; generateCode.
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   193
        CharacterEncoder::GSM0338 flushCode; generateCode.
7909
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   194
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   195
        CharacterEncoder::KOI8_U flushCode; generateSubclassCode.
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   196
9143
28eeea2f0112 comments
Claus Gittinger <cg@exept.de>
parents: 9064
diff changeset
   197
        CharacterEncoder::JIS0208 flushCode; generateCode.
13072
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   198
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   199
    Please check if your encoder tables are complete; for example, with:
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   200
        0 to:255 do:[:ebc |
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   201
            |asc ebc2|
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   202
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   203
            asc := CharacterEncoderImplementations::EBCDIC new decode:ebc.
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   204
            asc notNil ifTrue:[
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   205
               ebc2 := CharacterEncoderImplementations::EBCDIC new encode:asc.
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   206
               self assert:(ebc2 = ebc)
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   207
            ].
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   208
        ].
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   209
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   210
        0 to:255 do:[:asc |
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   211
            |ebc asc2|
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   212
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   213
            ebc := CharacterEncoderImplementations::EBCDIC new encode:asc.
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   214
            ebc notNil ifTrue:[
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   215
               asc2 := CharacterEncoderImplementations::EBCDIC new decode:ebc.
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   216
               self assert:(asc2 = asc)
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   217
            ].
e189e07c16aa changed: #howToAddMoreCoders
Claus Gittinger <cg@exept.de>
parents: 13063
diff changeset
   218
        ].
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   219
"
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   220
! !
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   221
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   222
!CharacterEncoder class methodsFor:'instance creation'!
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   223
22584
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   224
decoderForUTF8
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   225
    "return an encoder-instance which can map utf8 to/from unicode"
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   226
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   227
    ^ InverseEncoder new decoder:self encoderForUTF8
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   228
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   229
    "
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   230
     self encoderForUTF8 
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   231
     self decoderForUTF8
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   232
    "
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   233
!
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
   234
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   235
encoderFor:encodingNameSymbol
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   236
    "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   237
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   238
    ^ self
8156
bd5169c15b31 *** empty log message ***
ca
parents: 8155
diff changeset
   239
        encoderFor:encodingNameSymbol 
bd5169c15b31 *** empty log message ***
ca
parents: 8155
diff changeset
   240
        ifAbsent:[
bd5169c15b31 *** empty log message ***
ca
parents: 8155
diff changeset
   241
            "/ proceed to ignore this error in the future.    
8352
20d2476f538e add nullEncoder BEFORE raising an error
Claus Gittinger <cg@exept.de>
parents: 8262
diff changeset
   242
16054
171c7f8b4547 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 15966
diff changeset
   243
"/            (EncodersByName at:#unicode) at:encodingNameSymbol put:NullEncoderInstance. 
171c7f8b4547 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 15966
diff changeset
   244
"/            (EncoderClassesByName at:#unicode) at:encodingNameSymbol put:NullEncoder.    
8352
20d2476f538e add nullEncoder BEFORE raising an error
Claus Gittinger <cg@exept.de>
parents: 8262
diff changeset
   245
8388
b5cf7abdfe64 no encoder: send a message to stdError instead of entering
Claus Gittinger <cg@exept.de>
parents: 8352
diff changeset
   246
            "/ self error:'no encoder for ' , encodingNameSymbol mayProceed:true.
17520
0084ef840ac7 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17498
diff changeset
   247
            ('CharacterEncoder [warning]: no encoder for "' , encodingNameSymbol,'"') infoPrintCR.
8388
b5cf7abdfe64 no encoder: send a message to stdError instead of entering
Claus Gittinger <cg@exept.de>
parents: 8352
diff changeset
   248
            
8156
bd5169c15b31 *** empty log message ***
ca
parents: 8155
diff changeset
   249
            NullEncoderInstance
bd5169c15b31 *** empty log message ***
ca
parents: 8155
diff changeset
   250
        ]
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   251
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   252
    "
8388
b5cf7abdfe64 no encoder: send a message to stdError instead of entering
Claus Gittinger <cg@exept.de>
parents: 8352
diff changeset
   253
     CharacterEncoder encoderFor:#'blabla2'       
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   254
     CharacterEncoder encoderFor:#'latin1'       
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   255
     self encoderFor:#'arabic'       
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   256
     self encoderFor:#'ms-arabic'       
8814
501f04d1f533 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8810
diff changeset
   257
     self encoderFor:#'cp1250'       
501f04d1f533 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8810
diff changeset
   258
     self encoderFor:#'cp1251'       
501f04d1f533 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8810
diff changeset
   259
     self encoderFor:#'cp1252'       
501f04d1f533 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8810
diff changeset
   260
     self encoderFor:#'cp1253'       
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   261
     self encoderFor:#'iso8859-5'    
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   262
     self encoderFor:#'koi8-r'      
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   263
     self encoderFor:#'koi8-u'      
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   264
     self encoderFor:#'jis0208'      
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   265
     self encoderFor:#'jis7'      
8087
0a2ee76bcf55 last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents: 8062
diff changeset
   266
     self encoderFor:#'utf8'      
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   267
     (self encoderFor:#'utf16le') encodeString:'hello'      
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   268
     (self encoderFor:#'utf16le') encode:5    
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   269
     (self encoderFor:#'utf16be') encodeString:'hello'      
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   270
     (self encoderFor:#'utf16be') encode:5      
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   271
     (self encoderFor:#'utf32le') encodeString:'hello'      
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   272
     (self encoderFor:#'utf32be') encodeString:'hello'      
10111
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
   273
     self encoderFor:#'sgml'      
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
   274
     self encoderFor:#'java'      
25340
9230ffff3935 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 25269
diff changeset
   275
     self encoderFor:#'cp850'      
9230ffff3935 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 25269
diff changeset
   276
     self encoderFor:#'CP850'      
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   277
    "
10111
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
   278
14207
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   279
    "Modified: / 12-07-2012 / 19:35:43 / cg"
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   280
!
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   281
8168
8f8da8bb046d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8156
diff changeset
   282
encoderFor:encodingNameSymbolArg ifAbsent:exceptionValue
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   283
    "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   284
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   285
    |encodingNameSymbol enc clsName cls unicodeEncoders unicodeEncoderClasses|
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   286
22579
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   287
    encodingNameSymbolArg isNil ifTrue:[ 
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   288
        ^ NullEncoderInstance
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   289
    ].
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   290
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   291
    encodingNameSymbol := encodingNameSymbolArg asLowercase asSymbolIfInternedOrSelf.
22579
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   292
    (encodingNameSymbol == #'iso10646-1' or:[encodingNameSymbol == #unicode]) ifTrue:[
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   293
        "encode unicode from/into unicode"
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   294
        ^ NullEncoderInstance
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   295
    ].
8168
8f8da8bb046d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8156
diff changeset
   296
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   297
    encodingNameSymbol includesMatchCharacters ifTrue:[
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   298
        AccessLock critical:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   299
            unicodeEncoders := EncodersByName at:#unicode ifAbsent:nil.
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   300
        ].
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   301
        unicodeEncoders notNil ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   302
            unicodeEncoders keysAndValuesDo:[:eachEncodingAlias :eachEncoderInstance |
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   303
                (encodingNameSymbol matches:eachEncodingAlias) ifTrue:[
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   304
                    ^ eachEncoderInstance.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   305
                ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   306
            ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   307
        ].
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   308
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   309
        AccessLock critical:[
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   310
            unicodeEncoderClasses := self encoderClassesByName at:#unicode.
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   311
        ].
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   312
        unicodeEncoderClasses notNil ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   313
            unicodeEncoderClasses keysAndValuesDo:[:eachEncodingAlias :eachEncoderClassOrName |
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   314
                (encodingNameSymbol matches:eachEncodingAlias) ifTrue:[
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   315
                    eachEncoderClassOrName isBehavior ifTrue:[
8194
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   316
                        cls := eachEncoderClassOrName
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   317
                    ] ifFalse:[
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   318
                        cls := CharacterEncoderImplementations at:eachEncoderClassOrName.
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   319
                    ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   320
                    cls notNil ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   321
                        ^ cls new.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   322
                    ]
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   323
                ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   324
            ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   325
        ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   326
        ^ exceptionValue value
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   327
    ].
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   328
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   329
    AccessLock critical:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   330
        unicodeEncoders := EncodersByName at:#unicode ifAbsentPut:[Dictionary new].
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   331
        enc := unicodeEncoders at:encodingNameSymbol ifAbsent:nil.
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   332
    ].
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   333
    enc isNil ifTrue:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   334
        AccessLock critical:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   335
            unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsentPut:[Dictionary new].
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   336
            clsName := unicodeEncoderClasses at:encodingNameSymbol ifAbsent:nil.
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   337
        ].
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   338
        clsName notNil ifTrue:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   339
            clsName isBehavior ifTrue:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   340
                cls := clsName
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   341
            ] ifFalse:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   342
                cls := CharacterEncoderImplementations at:clsName.
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   343
            ].
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   344
            cls notNil ifTrue:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   345
                enc := cls new.
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   346
                AccessLock critical:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   347
                    unicodeEncoders at:encodingNameSymbol put:enc.
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   348
                ]
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   349
            ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   350
        ].
7973
6dea491d56f7 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7972
diff changeset
   351
    ].
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   352
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   353
    enc notNil ifTrue:[
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   354
        ^ enc 
7973
6dea491d56f7 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7972
diff changeset
   355
    ].
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   356
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   357
    "/ no direct encoder from unicode->encodingNameSymbol
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   358
    "/ search for unicode->any and: any->encodingNameSymbol
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   359
    AccessLock critical:[
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   360
        unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsent:nil.
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   361
    ].
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   362
    unicodeEncoderClasses keysAndValuesDo:[:eachEncodingAlias :eachEncoderClass |
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   363
        |dict2 enc1 enc2|
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   364
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   365
        AccessLock critical:[
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   366
            dict2 := self encoderClassesByName at:eachEncodingAlias ifAbsent:nil.
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   367
        ].
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   368
        dict2 notNil ifTrue:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   369
            clsName := dict2 at:encodingNameSymbol ifAbsent:nil.
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   370
            clsName notNil ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   371
                clsName isBehavior ifTrue:[
8194
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   372
                    cls := clsName
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   373
                ] ifFalse:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   374
                    cls := CharacterEncoderImplementations at:clsName.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   375
                ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   376
                cls notNil ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   377
                    enc2 := cls new.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   378
                    enc1 := self encoderFor:eachEncodingAlias.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   379
                    (enc1 notNil and:[enc2 notNil]) ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   380
                        enc := TwoStepEncoder new encoder1:enc1 encoder2:enc2.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   381
                        AccessLock critical:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   382
                            unicodeEncoders at:encodingNameSymbol put:enc.    
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   383
                        ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   384
                        ^ enc.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   385
                    ]
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   386
                ]
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   387
            ]
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   388
        ].
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   389
    ].
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   390
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   391
    self encoderClassesByName keysAndValuesDo:[:encoding1 :dict1 |
8194
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   392
        dict1 keysAndValuesDo:[:encoding2 :clsName1|
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   393
            |clsName2 cls1 cls2 dict2 enc1 enc2|
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   394
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   395
            encoding2 = encodingNameSymbol ifTrue:[
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   396
                AccessLock critical:[
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   397
                    dict2 := self encoderClassesByName at:#unicode.
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   398
                ].
8194
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   399
                clsName2 := dict2 at:encoding1 ifAbsent:nil.
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   400
                clsName2 notNil ifTrue:[
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   401
                    clsName1 isBehavior ifTrue:[
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   402
                        cls1 := clsName1
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   403
                    ] ifFalse:[
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   404
                        cls1 := CharacterEncoderImplementations at:clsName1.
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   405
                    ].
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   406
                    clsName2 isBehavior ifTrue:[
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   407
                        cls2 := clsName2
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   408
                    ] ifFalse:[
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   409
                        cls2 := CharacterEncoderImplementations at:clsName2.
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   410
                    ].
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   411
                    (cls1 notNil and:[cls2 notNil]) ifTrue:[
14207
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   412
                        enc1 := cls1 new.
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   413
                        enc2 := cls2 new.
8194
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   414
                        enc := TwoStepEncoder new encoder1:enc1 encoder2:enc2.
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   415
                        ^ enc.
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   416
                    ].
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   417
                ]
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   418
            ]
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   419
        ]
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   420
    ].
7027457dbe4f *** empty log message ***
ca
parents: 8190
diff changeset
   421
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   422
    ^ exceptionValue value
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   423
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   424
    "
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   425
     CharacterEncoder encoderFor:#'latin1'       
22579
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   426
     self encoderFor:#'iso10646-1'              
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   427
     self encoderFor:#'arabic'              
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   428
     self encoderFor:#'ms-arabic'           
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   429
     self encoderFor:#'iso8859-5'           
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   430
     self encoderFor:#'koi8-r'      
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   431
     self encoderFor:#'koi8-u'      
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   432
     self encoderFor:#'jis0208'      
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   433
     self encoderFor:#'jis7'      
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   434
     self encoderFor:#'unicode'      
17520
0084ef840ac7 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17498
diff changeset
   435
     self encoderFor:#'UTF-8'      
0084ef840ac7 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17498
diff changeset
   436
     self encoderFor:'UTF-8'      
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   437
    "
14207
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   438
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   439
    "Modified: / 12-07-2012 / 19:45:58 / cg"
22579
bae0a1a089a4 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 22470
diff changeset
   440
    "Modified (comment): / 05-03-2018 / 16:04:52 / stefan"
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   441
!
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   442
8210
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   443
encoderForUTF8
8211
c4377c6c20e4 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8210
diff changeset
   444
    "return an encoder-instance which can map unicode into/from utf8"
c4377c6c20e4 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8210
diff changeset
   445
8210
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   446
    ^ self encoderFor:#utf8
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   447
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   448
    "
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   449
     self encoderForUTF8      
8210
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   450
    "
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   451
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   452
    "Modified (comment): / 17-01-2018 / 13:07:31 / stefan"
8210
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   453
!
cac1802b8603 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8194
diff changeset
   454
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   455
encoderToEncodeFrom:oldEncodingArg into:newEncodingArg
8135
f22398526ae2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8134
diff changeset
   456
    |oldEncoding newEncoding encoders encoderClasses encoder decoder clsName cls|
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   457
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   458
    oldEncoding := oldEncodingArg ? #unicode.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   459
    oldEncoding == #'iso10646-1' ifTrue:[ oldEncoding := #unicode].
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   460
    newEncoding := newEncodingArg ? #unicode.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   461
    newEncoding == #'iso10646-1' ifTrue:[ newEncoding := #unicode].
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   462
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   463
    oldEncoding = newEncoding ifTrue:[^ NullEncoderInstance].
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   464
    (oldEncoding match:newEncoding) ifTrue:[^ NullEncoderInstance].
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   465
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   466
    (oldEncoding = #unicode) ifTrue:[
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   467
        "/ unicode -> something 
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   468
        ^ self encoderFor:newEncoding.
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
   469
    ].
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   470
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   471
    oldEncoding := oldEncoding asSymbol.
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   472
    newEncoding := newEncoding asSymbol.
8120
bafc72f60618 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8119
diff changeset
   473
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   474
    AccessLock critical:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   475
        encoders := EncodersByName at:oldEncoding ifAbsentPut:[Dictionary new].
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   476
        encoder := encoders at:newEncodingArg ifAbsent:nil.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   477
        encoder isNil ifTrue:[
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   478
            encoderClasses := self encoderClassesByName at:oldEncoding ifAbsentPut:[Dictionary new].
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   479
            clsName := encoderClasses at:newEncoding ifAbsent:nil.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   480
            clsName notNil ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   481
                clsName isBehavior ifTrue:[
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   482
                    cls := clsName
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   483
                ] ifFalse:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   484
                    cls := CharacterEncoderImplementations at:clsName.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   485
                ]
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   486
            ].
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   487
        ].
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   488
    ].
8262
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   489
    cls notNil ifTrue:[
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   490
        encoder := cls new.
550c67712dfa do not autoload while in accesslock (deadlock)
Claus Gittinger <cg@exept.de>
parents: 8261
diff changeset
   491
    ].
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   492
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   493
    encoder isNil ifTrue:[
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   494
        "/ something -> unicode 
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   495
        decoder := self encoderFor:oldEncoding.
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   496
        (newEncoding == #unicode) ifTrue:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   497
            encoder := InverseEncoder new decoder:decoder.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   498
        ] ifFalse:[
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   499
            "/ do it as: oldEncoding -> unicode -> newEncoding
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   500
            "/ unicode -> something
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   501
            encoder := self encoderFor:newEncoding.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   502
            encoder := CompoundEncoder new encoder:encoder decoder:decoder.
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   503
        ].
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   504
    ].
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   505
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   506
    AccessLock critical:[
8155
5c67868ddc38 *** empty log message ***
ca
parents: 8154
diff changeset
   507
        (EncodersByName at:oldEncoding) at:newEncoding put:encoder
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   508
    ].
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   509
    ^ encoder
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   510
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   511
    "   
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   512
     CharacterEncoder initialize
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   513
     CharacterEncoder encoderToEncodeFrom:#'latin1' into:#'jis7'      
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   514
     CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'mac-cyrillic'              
8087
0a2ee76bcf55 last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents: 8062
diff changeset
   515
     CharacterEncoder encoderToEncodeFrom:#'ms-arabic' into:#'mac-arabic'           
0a2ee76bcf55 last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents: 8062
diff changeset
   516
     CharacterEncoder encoderToEncodeFrom:#'iso8859-5' into:#'koi8-r'           
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   517
     CharacterEncoder encoderToEncodeFrom:#'iso8859-5' into:#'unicode'           
8087
0a2ee76bcf55 last version before separating into extra classes
Claus Gittinger <cg@exept.de>
parents: 8062
diff changeset
   518
     CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'koi8-u'       
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   519
     CharacterEncoder encoderToEncodeFrom:#'utf-8' into:#unicode       
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   520
    "
14207
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   521
f80306416305 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 14206
diff changeset
   522
    "Modified: / 12-07-2012 / 19:45:15 / cg"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   523
    "Modified: / 16-01-2018 / 17:11:17 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   524
    "Modified (comment): / 17-01-2018 / 12:58:32 / stefan"
7971
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   525
! !
357e53496acc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7969
diff changeset
   526
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   527
!CharacterEncoder class methodsFor:'Compatibility-ST80'!
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   528
25233
6c9bab93b7fd #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 25185
diff changeset
   529
encoderNamed:encoderName
6c9bab93b7fd #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 25185
diff changeset
   530
    "/ q & d hack: 
6c9bab93b7fd #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 25185
diff changeset
   531
    "/ given a name (such as cp850), return an encoder instance
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   532
25124
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   533
    |e|
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   534
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   535
    encoderName == #default ifTrue:[
11262
5de131eaba9e changed #classMenuCompareTwoRepositoryVersions
Claus Gittinger <cg@exept.de>
parents: 11228
diff changeset
   536
        ^ DefaultEncoder new
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   537
    ].
25124
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   538
    e := self encoderFor:encoderName asSymbolIfInterned.
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   539
    e notNil ifTrue:[
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   540
        ^ e
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   541
    ].
22699
8f9cc18802d4 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 22597
diff changeset
   542
    self halt:'should not be reached'.
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   543
    ^ self new
25124
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   544
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   545
    "
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   546
     self encoderNamed:'foo'
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   547
     self encoderNamed:'utf8'
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   548
     self encoderNamed:'cp850'
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   549
    "
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   550
!
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   551
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   552
platformName
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   553
    ^ OperatingSystem platformName
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   554
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   555
    "Created: 20.6.1997 / 17:34:03 / cg"
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   556
    "Modified: 20.6.1997 / 17:38:40 / cg"
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   557
! !
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
   558
11316
0b2757774461 access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents: 11300
diff changeset
   559
!CharacterEncoder class methodsFor:'accessing'!
0b2757774461 access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents: 11300
diff changeset
   560
0b2757774461 access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents: 11300
diff changeset
   561
nullEncoderInstance
0b2757774461 access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents: 11300
diff changeset
   562
    ^ NullEncoderInstance
0b2757774461 access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents: 11300
diff changeset
   563
! !
0b2757774461 access method #nullEncoderInstance
Stefan Vogel <sv@exept.de>
parents: 11300
diff changeset
   564
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   565
!CharacterEncoder class methodsFor:'class initialization'!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   566
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   567
encoderClassesByName
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   568
    EncoderClassesByName isNil ifTrue:[
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   569
        self initializeEncoderClassesByName
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   570
    ].
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   571
    ^ EncoderClassesByName    
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   572
!
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   573
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   574
initialize
17529
e98cd0dd77a6 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17520
diff changeset
   575
    AccessLock notNil ifTrue:[^ self].  "/ already initialized
e98cd0dd77a6 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17520
diff changeset
   576
22187
39cff5666d22 #TUNING by stefan
Stefan Vogel <sv@exept.de>
parents: 21723
diff changeset
   577
    AccessLock := RecursionLock name:'CharacterEncoder'.
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   578
    NullEncoderInstance := NullEncoder new.
7973
6dea491d56f7 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7972
diff changeset
   579
8126
33f9c4850e84 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8122
diff changeset
   580
    EncodersByName := Dictionary new.
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   581
    CachedEncoders := Dictionary new.
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   582
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   583
    self initializeEncoderClassesByName.
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   584
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   585
    "
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   586
     self initialize
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   587
    "
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   588
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   589
    "Modified: / 01-04-2011 / 14:30:06 / cg"
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   590
    "Modified (format): / 23-01-2013 / 09:56:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   591
    "Modified: / 27-02-2017 / 15:43:56 / stefan"
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   592
!
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   593
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   594
initializeEncoderClassesByName
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   595
    "initialize the dictionary which maps commonly used names
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   596
     to encoder classes. 
22343
b7ec81400cfa #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22187
diff changeset
   597
     This is done, because some encodings come along with different names"
18305
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   598
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   599
    |ud|
6f48f7030f47 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 18159
diff changeset
   600
8126
33f9c4850e84 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8122
diff changeset
   601
    EncoderClassesByName := Dictionary new.
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   602
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   603
    EncoderClassesByName at:#'unicode' put:(ud := Dictionary new:237).
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   604
    ud at:#'fontspecific'       put:NullEncoder.    
8154
87ec7c3be46a *** empty log message ***
ca
parents: 8153
diff changeset
   605
    ud at:#'adobe-fontspecific' put:NullEncoder.    
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   606
    ud at:#'ms-oem'             put:NullEncoder.    
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   607
    ud at:#'ms-default'         put:NullEncoder.    
8152
e07693c46cf5 *** empty log message ***
ca
parents: 8151
diff changeset
   608
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   609
    "/ className            decoded-name    array-of-encodingNames
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   610
    #(
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   611
        (ASCII                  unicode     ( ascii 'us-ascii' 'iso-ir-6' 'ibm-367' 'ms-cp367' 'cp367'  'iso646-us' 'ibm-cp367' 'ansi_x3.4-1968' ))
22348
a70138435f93 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22343
diff changeset
   612
        (#'ASCII::ASCII7'       unicode     ( ascii7))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   613
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   614
        (BIG5                   unicode     ( big5 ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   615
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   616
        (CNS11643               unicode     ( 'cns11643' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   617
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   618
        (CP437                  unicode     ( 'cp437'  'cp-437' 'ibm-437' 'ms-cp437' 'microsoft-cp437' 'ibm-cp437' ))
25124
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   619
        (CP850                  unicode     ( 'cp850'  'cp-850'   'ms-cp850'  'microsoft-cp850' 
861a0fd6a262 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 24942
diff changeset
   620
                                              'oem850' 'oem-850'  'ms-oem850' 'microsoft-oem850'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   621
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   622
        (EBCDIC                 unicode     ( 'ebcdic' ))
22348
a70138435f93 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22343
diff changeset
   623
        (#'EBCDIC::EBCDIC_037'  unicode     ( 'ebcdic-037' 'cp-037' 'cp-37' ))
13063
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   624
16054
171c7f8b4547 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 15966
diff changeset
   625
"/        (GB2313_1980        unicode     ( 'gb2313' 'gb2313-1980' ))
171c7f8b4547 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 15966
diff changeset
   626
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   627
        (GB2312_1980_0          unicode     ( 'gb2312' 'gb2312.1980' 'gb2312.1980-0'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   628
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   629
        (HANGUL                 unicode     ( 'hangul' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   630
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   631
        (ISO10646_1             unicode     ( unicode 'iso10646_1' 'iso10646-1' 'iso-10646-1' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   632
23355
b3da554904fa #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22782
diff changeset
   633
        (ISO10646_to_UTF8       unicode   ( utf8 'utf-8' 'utf_8' ))
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   634
        (ISO10646_to_UTF16BE    unicode   ( utf16b utf16be 'utf-16b' 'utf-16be' ))
24474
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
   635
        (ISO10646_to_UTF16LE    unicode   ( utf16l utf16le 'utf-16e' 'utf-16le' 'utf-16'))
17491
6462f81e6623 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17118
diff changeset
   636
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   637
        (ISO10646_to_UTF8_MAC   unicode   ( 'utf8-mac' 'utf-8-mac' ))
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   638
        (ISO10646_to_XMLUTF8    unicode   ( 'utf8-XML' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   639
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   640
        (ISO8859_1              unicode     ( 'iso8859_1' 'iso8859-1' 'iso-8859-1' 'latin-1' 'latin1' 'iso-ir-100' 'ibm-819' 'ms-cp819' 'ibm-cp819' 'iso8859'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   641
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   642
        (ISO8859_2              unicode     ( 'iso8859_2' 'iso8859-2' 'iso-8859-2' 'latin2' 'latin-2' 'iso-ir-101'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   643
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   644
        (ISO8859_3              unicode     ( 'iso8859_3' 'iso8859-3' 'iso-8859-3' 'latin3' 'latin-3' 'iso-ir-109'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   645
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   646
        (ISO8859_4              unicode     ( 'iso8859_4' 'iso8859-4' 'iso-8859-4' 'latin4' 'latin-4' 'iso-ir-110'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   647
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   648
        (ISO8859_5              unicode     ( 'iso8859_5' 'iso8859-5' 'iso-8859-5' 'cyrillic' 'iso-ir-144' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   649
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   650
        (ISO8859_6              unicode     ( 'iso8859_6' 'iso8859-6' 'iso-8859-6' 'arabic' 'asmo-708' 'ecma-114' 'iso-ir-127' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   651
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   652
        (ISO8859_7              unicode     ( 'iso8859_7' 'iso8859-7' 'iso-8859-7' 'greek' 'iso-ir-126' 'ecma-118'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   653
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   654
        (ISO8859_8              unicode     ( 'iso8859_8' 'iso8859-8' 'iso-8859-8' 'hebrew' 'iso-ir-138' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   655
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   656
        (ISO8859_9              unicode     ( 'iso8859_9' 'iso8859-9' 'iso-8859-9' 'latin5' 'latin-5' 'iso-ir-148'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   657
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   658
        (ISO8859_10             unicode     ( 'iso8859_10' 'iso8859-10' 'iso-8859-10' 'latin6' 'latin-6' 'iso-ir-157'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   659
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   660
        (ISO8859_11             unicode     ( 'iso8859_11' 'iso8859-11' 'iso-8859-11' 'thai' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   661
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   662
        (ISO8859_13             unicode     ( 'iso8859_13' 'iso8859-13' 'iso-8859-13' 'latin7' 'latin-7' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   663
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   664
        (ISO8859_14             unicode     ( 'iso8859_14' 'iso8859-14' 'iso-8859-14' 'latin8' 'latin-8' 'latin-celtic' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   665
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   666
        (ISO8859_15             unicode     ( 'iso8859_15' 'iso8859-15' 'iso-8859-15' 'latin9' 'latin-9' 'iso-ir-203'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   667
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   668
        (ISO8859_16             unicode     ( 'iso8859_16' 'iso8859-16' 'iso-8859-16' 'latin10' 'latin-10' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   669
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   670
        (JIS0201                unicode     ( 'jis0201' #'jisx0201.1976-0'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   671
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   672
        (JIS0208                unicode     ( jis0208 'jisx0208' 'jisx0208.1983-0' 'jisx0208.1990-0'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   673
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   674
        (JIS0208_to_JIS7        jis0208     ( jis7 'jis-7' 'x-jis7' 'x-iso2022-jp' 'iso2022-jp'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   675
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   676
        (JIS0208_to_EUC         jis0208     ( euc #'x-euc-jp' ))
8122
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   677
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   678
        (JIS0208_to_SJIS        jis0208     ( 'sjis' 'shiftjis' 'x-sjis' #'x-shift-jis' #'shift-jis'))
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
   679
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   680
        (JIS0212                unicode     ( 'jis0212' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   681
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   682
        (JOHAB                  unicode     ( 'johab' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   683
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   684
        (KOI7                   unicode     ( 'koi7' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   685
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   686
        (KOI8_R                 unicode     ( #'koi8-r' 'cp878' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   687
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   688
        (KOI8_U                 unicode     ( #'koi8-u' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   689
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   690
        (KSC5601                unicode     ( #'ksc5601' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   691
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   692
        (MAC_Arabic             unicode     ( #'mac-arabic' 'macarabic' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   693
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   694
        (MAC_CentralEuropean    unicode    ( #'mac-centraleuropean' #'mac-centraleurope' 'maccentraleurope' 'maccentraleuropean' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   695
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   696
        (MAC_Croatian           unicode     ( #'mac-croatian' 'maccroatian'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   697
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   698
        (MAC_Cyrillic           unicode     ( #'mac-cyrillic' 'maccyrillic' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   699
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   700
        (MAC_Dingbats           unicode     ( #'mac-dingbats'  'macdingbats'  'macdingbat'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   701
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   702
        (MAC_Farsi              unicode     ( #'mac-farsi' 'macfarsi' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   703
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   704
        (MAC_Greek              unicode     ( #'mac-greek' #'macgreek' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   705
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   706
        (MAC_Hebrew             unicode     ( #'mac-hebrew' #'machebrew'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   707
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   708
        (MAC_Iceland            unicode     ( #'mac-iceland' #'maciceland'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   709
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   710
        (MAC_Japanese           unicode     ( #'mac-japanese' #'macjapanese'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   711
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   712
        (MAC_Korean             unicode     ( #'mac-korean' #'mackorean'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   713
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   714
        (MAC_Roman              unicode     ( #'mac-roman' #'macroman' 'macintosh' 'cp10000' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   715
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   716
        (MAC_Romanian           unicode     ( #'mac-romanian' #'macromanian'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   717
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   718
        (MAC_Symbol             unicode     ( #'mac-symbol' #'macsymbol'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   719
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   720
        (MAC_Thai               unicode     ( #'mac-thai' #'macthai'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   721
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   722
        (MAC_Turkish            unicode     ( #'mac-turkish' #'macturkish'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   723
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   724
        (MS_Ansi                unicode     ( #'ms-ansi' 'microsoft-ansi'))
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   725
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   726
        (MS_CP1252              unicode     ( 'cp1252' 'cp-1252' 'ms-cp1252' 'microsoft-cp1252' 'windows-1252' 'windows-latin1'))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   727
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   728
        (MS_Arabic              unicode     ( 'cp1256' 'cp-1256' 'ms-arabic' 'ms-cp1256' 'microsoft-cp1256'  'microsoft-arabic' 'windows-1256'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   729
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   730
        (MS_Baltic              unicode     ( 'cp1257' 'cp-1257' 'ms-baltic' 'ms-cp1257' 'microsoft-cp1257' 'microsoft-baltic' 'windows-1257'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   731
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   732
        (MS_Cyrillic            unicode     ( 'cp1251' 'cp-1251' 'ms-cyrillic' 'ms-cp1251' 'microsoft-cp1251' 'microsoft-cyrillic' 'windows-1251'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   733
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   734
        (MS_EastEuropean        unicode     ( 'cp1250' 'cp-1250' 'ms-easteuropean' 'ms-ee' 'ms-cp1250' 'microsoft-cp1250' 'microsoft-easteuropean' 'windows-1250'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   735
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   736
        (MS_Greek               unicode     ( 'cp1253' 'cp-1253' 'ms-greek' 'ms-cp1253' 'microsoft-cp1253' 'microsoft-greek' 'windows-1253' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   737
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   738
        (MS_Hebrew              unicode     ( 'cp1255' 'cp-1255' 'ms-hebrew' 'ms-cp1255' 'microsoft-cp1255'  'microsoft-hebrew' 'windows-1255' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   739
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   740
"/        (MS_Symbol           unicode     ( 'ms-symbol' 'microsoft-symbol'  ))
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   741
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
   742
        (MS_Turkish             unicode     ( 'cp1254' 'cp-1254' 'ms-turkish' 'ms-cp1254' 'microsoft-cp1254' 'microsoft-turkish' 'windows-1254'  ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   743
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   744
        (NEXT                   unicode     ( 'next' 'nextstep'  ))
8186
ae97115c26f5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8176
diff changeset
   745
10111
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
   746
        (ISO10646_to_SGML       unicode     ( 'sgml' ))
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
   747
        (ISO10646_to_JavaText   unicode     ( 'java' 'javaText' ))
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   748
21723
1ea8471bc9eb #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 21714
diff changeset
   749
"/        (AdobeStandard          unicode     ( 'Adobe Standard' 'AdobeStandard' 'Adobe' 'adobe-standard' ))
1ea8471bc9eb #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 21714
diff changeset
   750
"/        (AdobeSymbol            unicode     ( 'Adobe Symbol' 'AdobeSymbol' 'Symbol' 'adobe-symbol' ))
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   751
    ) triplesDo:[:className :decodesTo :encodesTo |
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   752
        |decodesToDict|
8134
0296806cb4bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8132
diff changeset
   753
8151
1f0fc1d4516b *** empty log message ***
ca
parents: 8150
diff changeset
   754
        "/ notice that the encoders are not yet installed as autoloaded.
1f0fc1d4516b *** empty log message ***
ca
parents: 8150
diff changeset
   755
        "/ Therefore, we remember their names here.
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   756
        decodesToDict := EncoderClassesByName at:decodesTo ifAbsentPut:[Dictionary new].
8151
1f0fc1d4516b *** empty log message ***
ca
parents: 8150
diff changeset
   757
        encodesTo do:[:eachEncodingAlias |
23410
850fa3b6150d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 23355
diff changeset
   758
            decodesToDict at:eachEncodingAlias put:className ifPresent:[:classAlready | self halt:'conflicting alias'].    
8151
1f0fc1d4516b *** empty log message ***
ca
parents: 8150
diff changeset
   759
        ].
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   760
    ].
22597
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
   761
    "/ flush
1b5087a97102 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 22587
diff changeset
   762
    "/ EncodersByName := Dictionary new.
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   763
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   764
    "
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   765
     self initializeEncoderClassesByName
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   766
    "
10111
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
   767
15966
72f3e3a9ba29 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 15609
diff changeset
   768
    "Modified (format): / 23-01-2013 / 09:56:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
21602
c63ec4a97409 Remove dependeny of UnixOperatingSystem
Stefan Vogel <sv@exept.de>
parents: 21471
diff changeset
   769
    "Modified: / 27-02-2017 / 16:17:43 / stefan"
22348
a70138435f93 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22343
diff changeset
   770
    "Modified: / 12-11-2017 / 13:05:38 / cg"
23410
850fa3b6150d #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 23355
diff changeset
   771
    "Modified: / 08-10-2018 / 08:59:01 / Claus Gittinger"
24474
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
   772
    "Modified: / 26-07-2019 / 16:35:46 / Stefan Vogel"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   773
! !
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   774
8122
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   775
!CharacterEncoder class methodsFor:'constants'!
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   776
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   777
jis7KanjiEscapeSequence
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   778
    "return the escape sequence used to switch to kanji in jis7 encoded strings.
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   779
     This happens to be the same as ISO2022-JP's escape sequence."
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   780
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   781
    Jis7KanjiEscapeSequence isNil ifTrue:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   782
        Jis7KanjiEscapeSequence := Character esc asString , '$B'.
8122
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   783
    ].
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   784
    ^ Jis7KanjiEscapeSequence.
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   785
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   786
    "Created: 26.2.1996 / 17:38:08 / cg"
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   787
    "Modified: 30.6.1997 / 16:03:16 / cg"
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   788
!
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   789
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   790
jis7KanjiOldEscapeSequence
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   791
    "return the escape sequence used to switch to kanji in some old jis7 encoded strings."
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   792
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   793
    Jis7KanjiOldEscapeSequence isNil ifTrue:[
8856
cb0a15744854 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8855
diff changeset
   794
        Jis7KanjiOldEscapeSequence := Character esc asString , '$@'.
8122
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   795
    ].
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   796
    ^ Jis7KanjiOldEscapeSequence.
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   797
!
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   798
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   799
jis7RomanEscapeSequence
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   800
    "return the escape sequence used to switch to roman in jis7 encoded strings"
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   801
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   802
    Jis7RomanEscapeSequence isNil ifTrue:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   803
        Jis7RomanEscapeSequence := Character esc asString , '(J'.
8122
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   804
    ].
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   805
    ^ Jis7RomanEscapeSequence.
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   806
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   807
    "Created: 26.2.1996 / 17:38:08 / cg"
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   808
    "Modified: 30.6.1997 / 16:03:16 / cg"
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   809
!
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   810
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   811
jisISO2022EscapeSequence
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   812
    "return the escape sequence used to switch to kanji in iso2022 encoded strings"
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   813
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   814
    JisISO2022EscapeSequence isNil ifTrue:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   815
        JisISO2022EscapeSequence := Character esc asString , '&@' , Character esc asString , '$B'.
8122
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   816
    ].
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   817
    ^ JisISO2022EscapeSequence.
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   818
! !
29670db31014 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8120
diff changeset
   819
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   820
!CharacterEncoder class methodsFor:'encoding & decoding'!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   821
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   822
decodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   823
    ^ self new decodeString:anEncodedStringOrByteCollection
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   824
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   825
    "
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   826
     CharacterEncoderImplementations::ISO8859_1 decodeString:'hello'
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   827
     CharacterEncoderImplementations::ISO8859_1 decodeString:'hello' asByteArray
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   828
    "
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   829
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   830
    "Modified (comment): / 17-01-2018 / 13:44:41 / stefan"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   831
!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   832
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   833
decodeString:aString from:oldEncoding
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   834
    ^ self encodeString:aString from:oldEncoding into:#unicode
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   835
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   836
    "
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   837
     self encodeString:'hello' into:#ebcdic
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   838
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   839
     self decodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic   
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   840
    "
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   841
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   842
    "Modified (format): / 17-01-2018 / 15:47:00 / stefan"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   843
!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   844
7994
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
   845
encode:codePoint from:oldEncodingArg into:newEncodingArg
8015
e85b0c11e871 caching encoders
Claus Gittinger <cg@exept.de>
parents: 7994
diff changeset
   846
    |oldEncoding newEncoding encoder|
7994
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
   847
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   848
    oldEncodingArg == newEncodingArg ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   849
        ^ codePoint
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   850
    ].
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   851
    oldEncoding := oldEncodingArg.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   852
    newEncoding := newEncodingArg.
7994
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
   853
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   854
    (oldEncoding isNil or:[oldEncoding == #'iso10646-1' or:[oldEncoding == #'ms-default']]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   855
        oldEncoding := #unicode
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   856
    ].
7994
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
   857
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   858
    (newEncoding isNil or:[newEncoding == #'iso10646-1' or:[newEncoding == #'ms-default']]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   859
        newEncoding := #unicode.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   860
    ].
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   861
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   862
    oldEncoding == newEncoding ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   863
        ^ codePoint
8016
6344e4e47261 characterEncoding stuff
Claus Gittinger <cg@exept.de>
parents: 8015
diff changeset
   864
    ].
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   865
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   866
    (oldEncoding == #unicode and:[newEncoding == #'iso8859-1' and:[codePoint <= 16rFF]]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   867
        ^ codePoint
8016
6344e4e47261 characterEncoding stuff
Claus Gittinger <cg@exept.de>
parents: 8015
diff changeset
   868
    ].
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   869
    (newEncoding == #unicode and:[oldEncoding == #'iso8859-1' and:[codePoint <= 16rFF]]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   870
        ^ codePoint
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   871
    ].
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   872
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   873
    encoder := self encoderToEncodeFrom:oldEncoding into:newEncoding.
8015
e85b0c11e871 caching encoders
Claus Gittinger <cg@exept.de>
parents: 7994
diff changeset
   874
    ^ encoder encode:codePoint.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   875
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   876
    "Modified: / 17-01-2018 / 14:33:08 / stefan"
7994
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
   877
!
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
   878
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   879
encodeString:aUnicodeString
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   880
    "given a string in unicode, return a string in my encoding for it"
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   881
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   882
    ^ self new encodeString:aUnicodeString
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   883
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   884
    "
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   885
     CharacterEncoderImplementations::ISO8859_1 encodeString:'hello'
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   886
    "
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   887
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   888
    "Modified (comment): / 16-01-2018 / 21:57:35 / stefan"
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   889
!
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   890
7967
f9baf81d6991 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7964
diff changeset
   891
encodeString:aString from:oldEncodingArg into:newEncodingArg
8015
e85b0c11e871 caching encoders
Claus Gittinger <cg@exept.de>
parents: 7994
diff changeset
   892
    |oldEncoding newEncoding encoder|
7967
f9baf81d6991 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7964
diff changeset
   893
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   894
    oldEncodingArg == newEncodingArg ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   895
        ^ aString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   896
    ].
14916
d81790d8f204 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 14777
diff changeset
   897
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   898
    oldEncoding := oldEncodingArg.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   899
    newEncoding := newEncodingArg.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   900
    "/ some hard coded aliases
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   901
    (oldEncoding isNil or:[oldEncoding == #'iso10646-1' or:[oldEncoding == #'ms-default']]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   902
        oldEncoding :=  #'unicode'
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   903
    ].
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   904
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   905
    (newEncoding isNil or:[newEncoding == #'iso10646-1' or:[newEncoding == #'ms-default']]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   906
        newEncoding :=  #'unicode'
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   907
    ].
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   908
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   909
    oldEncoding == newEncoding ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   910
        ^ aString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   911
    ].
7967
f9baf81d6991 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7964
diff changeset
   912
14916
d81790d8f204 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 14777
diff changeset
   913
    "/ for single-byte strings, iso8859-1 and unicode (up to FF) have the same encoding
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   914
    (oldEncoding == #unicode and:[newEncoding == #'iso8859-1' and:[aString isWideString not]]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   915
        ^ aString
8016
6344e4e47261 characterEncoding stuff
Claus Gittinger <cg@exept.de>
parents: 8015
diff changeset
   916
    ].
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   917
    (newEncoding == #unicode and:[oldEncoding == #'iso8859-1' and:[aString isWideString not]]) ifTrue:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   918
        ^ aString
8016
6344e4e47261 characterEncoding stuff
Claus Gittinger <cg@exept.de>
parents: 8015
diff changeset
   919
    ].
6344e4e47261 characterEncoding stuff
Claus Gittinger <cg@exept.de>
parents: 8015
diff changeset
   920
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
   921
    encoder := self encoderToEncodeFrom:oldEncoding into:newEncoding.
8015
e85b0c11e871 caching encoders
Claus Gittinger <cg@exept.de>
parents: 7994
diff changeset
   922
    ^ encoder encodeString:aString.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   923
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   924
    "
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   925
     self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#ascii    
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   926
     self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#unicode    
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
   927
     self encodeString:(self encodeString:'Äh ... hello' into:#ebcdic) from:#ebcdic into:#utf8    
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   928
    "
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   929
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   930
    "Modified (comment): / 17-01-2018 / 15:49:40 / stefan"
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   931
!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   932
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
   933
encodeString:aString into:newEncoding
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   934
    ^ self encodeString:aString from:#unicode into:newEncoding
13063
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   935
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   936
    "
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   937
     self encodeString:'hello' into:#ebcdic
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   938
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   939
     self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#ascii    
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   940
     self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#unicode    
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   941
     self encodeString:(self encodeString:'hello' into:#ebcdic) from:#ebcdic into:#utf8    
13063
a17ba204b911 comment/format in: #encodeString:into:
Claus Gittinger <cg@exept.de>
parents: 12608
diff changeset
   942
    "
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   943
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
   944
    "Modified (comment): / 17-01-2018 / 15:48:07 / stefan"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   945
! !
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   946
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   947
!CharacterEncoder class methodsFor:'private'!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   948
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   949
flushCode
8127
7531ed2cdf35 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8126
diff changeset
   950
    self initialize.
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   951
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   952
    self isAbstract ifFalse:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   953
        (self mapFileURL1_relativePathName notNil
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   954
        or:[ self mapFileURL2_relativePathName notNil]) ifTrue:[
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   955
            self class removeSelector:#mapping.
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   956
        ].
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   957
    ].
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   958
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   959
    "
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   960
     self flushCode
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   961
    "
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   962
! !
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   963
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   964
!CharacterEncoder class methodsFor:'private-mapping setup'!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   965
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   966
generateCode
7909
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   967
    (CharacterEncoderCodeGenerator new targetClass:self) generateCode.
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   968
!
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   969
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   970
generateSubclassCode
a045c719fca2 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7904
diff changeset
   971
    (CharacterEncoderCodeGenerator new targetClass:self) generateSubclassCode.
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   972
!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   973
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   974
mapFileURL1_codeColumn
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   975
    ^ 1
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   976
!
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
   977
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   978
mapFileURL1_relativePathName
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   979
    "must be redefined in concrete subclass(es)"
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   980
    
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   981
    ^ nil
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   982
!
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   983
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   984
mapFileURL2_relativePathName
21711
2020534180c5 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 21602
diff changeset
   985
    "must be redefined in concrete subclass(es)"
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   986
    
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   987
    ^ nil
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   988
!
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   989
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   990
mappingURL1
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   991
    |rel|
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   992
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   993
    rel := self mapFileURL1_relativePathName.
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   994
    rel isNil ifTrue:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
   995
        ^ nil
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
   996
    ].
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   997
    ^ 'http://www.unicode.org/Public/MAPPINGS/' , rel
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   998
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
   999
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1000
mappingURL2
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1001
    |rel|
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1002
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
  1003
    rel := self mapFileURL2_relativePathName.
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
  1004
    rel isNil ifTrue:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1005
        ^ nil
7912
fbbb59645576 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7909
diff changeset
  1006
    ].
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1007
    ^ 'http://std.dkuug.dk/i18n/charmaps/' , rel
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1008
! !
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1009
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1010
!CharacterEncoder class methodsFor:'queries'!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1011
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1012
isAbstract
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1013
    "Return if this class is an abstract class.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1014
     True is returned for CharacterEncoder here; false for subclasses.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1015
     Abstract subclasses must redefine this again."
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1016
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1017
    ^ self == CharacterEncoder
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1018
!
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1019
7938
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1020
isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg
7994
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
  1021
    "return true, if superSetEncoding encoding includes all characters of subSetEncoding.
42b5face56fb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7986
diff changeset
  1022
     (this means: characters are included - not that they have the same encoding)"
7938
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1023
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1024
    |subSetEncoding superSetEncoding|
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1025
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1026
    subSetEncodingArg = superSetEncodingArg ifTrue:[^ true].
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1027
    subSetEncoding := subSetEncodingArg asLowercase.
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1028
    superSetEncoding := superSetEncodingArg asLowercase.
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1029
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1030
    (subSetEncoding match:superSetEncoding) ifTrue:[^ true].
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1031
8214
406c7fc10e12 assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents: 8211
diff changeset
  1032
    (('iso10646*' match:superSetEncoding) 
406c7fc10e12 assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents: 8211
diff changeset
  1033
    or:[superSetEncoding = 'unicode'
24002
ac83f90e549c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 23982
diff changeset
  1034
    or:[superSetEncoding = 'ms-ansi'
ac83f90e549c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 23982
diff changeset
  1035
    or:[superSetEncoding = 'ms-default']]]) ifTrue:[
8214
406c7fc10e12 assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents: 8211
diff changeset
  1036
        "/ assume that any character is in unicode
406c7fc10e12 assume ms-ansi is same as unicode
Claus Gittinger <cg@exept.de>
parents: 8211
diff changeset
  1037
        ^ true.
7938
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1038
    ].
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1039
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1040
    "/ if the subSet is iso8859-*, that means ascii (i.e. the lower 7 bits of iso8859 only).
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1041
    ((subSetEncoding = 'iso8859*') or:[subSetEncoding = 'iso8859-*']) ifTrue:[
8168
8f8da8bb046d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8156
diff changeset
  1042
        ('ascii*' match:superSetEncoding) ifTrue:[^ true].
8f8da8bb046d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8156
diff changeset
  1043
        ('ms-ansi*' match:superSetEncoding) ifTrue:[^ true].
24002
ac83f90e549c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 23982
diff changeset
  1044
        ('ms-default*' match:superSetEncoding) ifTrue:[^ true].
7938
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1045
    ].
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1046
    (subSetEncoding = 'ascii') ifTrue:[
8168
8f8da8bb046d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8156
diff changeset
  1047
        ('iso8859*' match:superSetEncoding) ifTrue:[^ true].
8f8da8bb046d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8156
diff changeset
  1048
        ('ms-ansi*' match:superSetEncoding) ifTrue:[^ true].
24002
ac83f90e549c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 23982
diff changeset
  1049
        ('ms-default*' match:superSetEncoding) ifTrue:[^ true].
7938
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1050
    ].
a53aae4a05bb *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7932
diff changeset
  1051
7923
e8286ccdf20c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7922
diff changeset
  1052
    "/ TODO: check the charSets mappingTables...
e8286ccdf20c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7922
diff changeset
  1053
    "/ self halt.
e8286ccdf20c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7922
diff changeset
  1054
    ^ false.
e8286ccdf20c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7922
diff changeset
  1055
!
e8286ccdf20c *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7922
diff changeset
  1056
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1057
nameOfDecodedCode
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1058
    "Most coders decode from their code into unicode / encode from unicode into their code.
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1059
     There are a few exceptions to this, though - these must redefine this."
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1060
    
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1061
    ^ #'unicode'
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1062
!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1063
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1064
nameOfEncoding
7974
9905043988ee *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7973
diff changeset
  1065
    ^ (self nameWithoutPrefix asLowercase copyReplaceAll:$_ with:$-) asSymbol
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1066
!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1067
7959
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1068
supportedExternalEncodings
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1069
    "return an array of arrays containing the names of supported
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1070
     encodings which are supported for external resources (i.e. files).
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1071
     The first element contains the internally used symbolic name,
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1072
     the second contains a user-readable string (description).
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1073
     More than one external name may be mapped onto the same symbolic."
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1074
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1075
    ^ #( 
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1076
         ('utf8'        'Unicode as 8Bit characters'    )  
8904
d358f0a17f07 utf16 support
Claus Gittinger <cg@exept.de>
parents: 8856
diff changeset
  1077
         ('utf16BE'     'Unicode as 16Bit big-endian'    )  
d358f0a17f07 utf16 support
Claus Gittinger <cg@exept.de>
parents: 8856
diff changeset
  1078
         ('utf16LE'     'Unicode as 16Bit little-endian' )  
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1079
"/         ('utf7'        'Unicode as 7Bit characters'    ) 
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1080
"/       nil
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1081
         ('ascii'       'Common 7bit subset of iso8859' )
14188
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1082
         ('iso8859-1'   'Western'                       )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1083
         ('iso8859-2'   'Central European'              )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1084
         ('iso8859-3'   'South European'                )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1085
         ('iso8859-4'   'Baltic'                        )
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1086
         ('iso8859-5'   'Cyrillic'                      )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1087
         ('iso8859-6'   'Arabic'                        )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1088
         ('iso8859-7'   'Greek'                         )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1089
         ('iso8859-8'   'Hebrew'                        )
14188
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1090
         ('iso8859-15'  'Western with Euro'             )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1091
         ('iso8859-16'  'South European with Euro'      )
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1092
"/       nil
16522
36f64d703785 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 16054
diff changeset
  1093
         ('macintosh'   'MAC Western'      )
36f64d703785 class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 16054
diff changeset
  1094
"/       nil
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1095
         ('koi7'        'Cyrillic (Old)'                )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1096
         ('koi8-r'      'Cyrillic'                      )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1097
         ('koi8-u'      'Cyrillic (Ukraine)'            )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1098
"/       nil
14188
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1099
         ('cp437'       'Windows US / codepage 437'       )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1100
         ('cp850'       'Windows Latin1 / codepage 850'   )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1101
         ('cp1250'      'Windows Latin2 / codepage 1250'  )
9ff8607b11a4 #userFriendlyName
Stefan Vogel <sv@exept.de>
parents: 14174
diff changeset
  1102
         ('cp1251'      'Windows Cyrillic / codepage 1251')
22584
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
  1103
         ('cp1252'      'Windows ANSI / codepage 1252'    )
8176
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1104
"/         ('mac'         'macintosh 8 bit'               )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1105
         ('next'        'NeXT 8 bit'                    )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1106
"/         ('hp'          'hpux 8 bit'                    )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1107
"/       nil
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1108
         ('euc'         'EUC - extended unix code (japanese)'     )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1109
         ('jis7'        'JIS7 - jis 7bit escape codes (japanese)' )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1110
         ('iso-2022-jp' 'Same as jis 7bit'                        )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1111
         ('sjis'        'SJIS - shift jis 8bit codes (japanese)'  )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1112
"/       nil
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1113
         ('gb'          'GB - mainland china'                   )
66d1004f1806 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8168
diff changeset
  1114
         ('big5'        'BIG5 - taiwan'                         )
7959
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1115
"/         ('ksc'         'korean'                        )
8186
ae97115c26f5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8176
diff changeset
  1116
         ('sgml'        'SGML (XML/HTML) character escapes'     )
10111
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
  1117
         ('java'        'JavaText (\uXXXX) character escapes'   )
7959
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1118
       )
10111
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
  1119
7485e9da838c +javaText encoder
Claus Gittinger <cg@exept.de>
parents: 9143
diff changeset
  1120
    "Modified: / 23-10-2006 / 13:27:48 / cg"
7959
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1121
!
0276f0a46dd1 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7956
diff changeset
  1122
7947
16b2306f9bc9 utf8 - full 30 bit range
Claus Gittinger <cg@exept.de>
parents: 7942
diff changeset
  1123
userFriendlyNameOfEncoding
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1124
    ^ self nameOfEncoding asUppercaseFirst
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1125
! !
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1126
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1127
!CharacterEncoder class methodsFor:'utilities'!
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1128
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1129
detectAndSkipBOMInStream:stream
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1130
    "skips over the BOM and returns one of 
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1131
        #utf8
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1132
        #utf32be
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1133
        #utf32le
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1134
        #utf16le
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1135
        #utf16be
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1136
     if no BOM is detected, the stream is repositions to where it was before."
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1137
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1138
    |pos byte1|
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1139
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1140
    pos := stream position.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1141
    stream atEnd ifTrue:[^ nil].
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1142
    byte1 := stream peek asInteger.
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1143
    "/ EF-BB-BF -> utf8
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1144
    byte1 == 16rEF ifTrue:[
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1145
        stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1146
        stream peek asInteger == 16rBB ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1147
            stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1148
            stream next asInteger == 16rBF ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1149
                ^ #utf8
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1150
            ]
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1151
        ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1152
        stream position:pos. ^nil
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1153
    ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1154
    "00-00-FE-FF big endian utf32"
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1155
    byte1 == 16r00 ifTrue:[
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1156
        stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1157
        stream peek asInteger == 16r00 ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1158
            stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1159
            stream peek asInteger == 16rFE ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1160
                stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1161
                stream next asInteger == 16rFF ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1162
                    ^ #utf32be
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1163
                ]
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1164
            ]
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1165
        ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1166
        stream position:pos. ^nil
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1167
    ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1168
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1169
    "FF-FE little endian utf16 or utf32"
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1170
    byte1 == 16rFF ifTrue:[
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1171
        stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1172
        stream peek asInteger == 16rFE ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1173
            stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1174
            stream peek asInteger == 0 ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1175
                stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1176
                stream next asInteger == 0 ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1177
                    "FF-FE-00-00 little endian utf32"
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1178
                    ^ #utf32le.   
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1179
                ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1180
                stream skip:-2
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1181
            ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1182
            ^ #utf16le
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1183
        ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1184
        stream position:pos. ^nil
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1185
    ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1186
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1187
    "FE-FF big endian utf16"
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1188
    byte1 == 16rFE ifTrue:[
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1189
        stream next.
24012
16660cbe8bf2 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 24002
diff changeset
  1190
        stream next asInteger == 16rFF ifTrue:[
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1191
            ^ #utf16be
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1192
        ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1193
    ].
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1194
    stream position:pos.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1195
    ^ nil
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1196
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1197
    "
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1198
     |s enc|
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1199
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1200
     s := #[1 2 3 4] readStream.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1201
     enc := self detectAndSkipBOMInStream:s.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1202
     self assert:(enc == nil).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1203
     self assert:(s position == 0).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1204
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1205
     s := #[16rFF 2 3 4] readStream.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1206
     enc := self detectAndSkipBOMInStream:s.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1207
     self assert:(enc == nil).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1208
     self assert:(s position == 0).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1209
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1210
     s := #[16rFF 16rFE 3 4] readStream.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1211
     enc := self detectAndSkipBOMInStream:s.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1212
     self assert:(enc == #utf16le).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1213
     self assert:(s position == 2).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1214
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1215
     s := #[16rFE 16rFF 3 4] readStream.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1216
     enc := self detectAndSkipBOMInStream:s.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1217
     self assert:(enc == #utf16be).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1218
     self assert:(s position == 2).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1219
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1220
     s := #[16rFF 16rFE 0 0 3 4] readStream.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1221
     enc := self detectAndSkipBOMInStream:s.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1222
     self assert:(enc == #utf32le).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1223
     self assert:(s position == 4).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1224
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1225
     s := #[0 0 16rFE 16rFF 0 0 3 4] readStream.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1226
     enc := self detectAndSkipBOMInStream:s.
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1227
     self assert:(enc == #utf32be).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1228
     self assert:(s position == 4).
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1229
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1230
    "
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1231
!
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1232
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1233
detectBOMInBuffer:buffer
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1234
    "returns one of 
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1235
        #utf8
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1236
        #utf32be
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1237
        #utf32le
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1238
        #utf16le
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1239
        #utf16be
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1240
        nil"
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1241
23982
18c16665c868 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23981
diff changeset
  1242
    ^ self detectAndSkipBOMInStream:(buffer readStream)
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1243
!
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1244
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1245
guessEncodingOfBuffer:buffer
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1246
    "try to guess a string-buffer's encoding.
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1247
     Basically looks for BOM (byte order marks)
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1248
     pr a special string of the form
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1249
            encoding #name
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1250
     or:
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1251
            encoding: name
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1252
     within the given buffer 
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1253
     (which is usually found within the first few bytes of a textFile).
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1254
     Many editors and tools write such comments (eg. emacs, st/x, etc.)"
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1255
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1256
    buffer size < 4 ifTrue:[
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1257
        "not enough bytes to determine the contents"
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1258
        ^ nil.
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1259
    ].
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1260
    EncodingDetectors isNil ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1261
        self initializeEncodingDetectors.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1262
    ].    
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1263
    EncodingDetectors do:[:each |
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1264
        |guess|
10672
b6230a13035b #guessEncodingOfBuffer - do NOT handle encoding=utf8
Stefan Vogel <sv@exept.de>
parents: 10111
diff changeset
  1265
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1266
        (guess := each value:buffer) notNil ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1267
            ^ guess
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1268
        ].
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1269
    ].    
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1270
    ^ nil
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1271
!
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1272
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1273
guessEncodingOfFile:aFilename
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1274
    "look for a BOM (byte order mark) or a special string of the form:
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1275
        encoding #name
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1276
     or:
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1277
        encoding: name
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1278
     within the given buffer 
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1279
     (which is usually found in the first few bytes of a textFile).
20403
7f44b6aab60d #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 20227
diff changeset
  1280
     If that's not found, use heuristics (in CharacterArray) to guess.
18762
4b2d5801f66c #REFACTORING
Stefan Vogel <sv@exept.de>
parents: 18624
diff changeset
  1281
     Return a symbol like #utf8."
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1282
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1283
    |s buffer|
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1284
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1285
    s := aFilename asFilename readStreamOrNil.
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1286
    s isNil ifTrue:[^ nil].
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1287
20645
51353c4d390c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 20403
diff changeset
  1288
    buffer := String new:512.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1289
    s nextBytes:buffer size into:buffer.
14169
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1290
    s close.
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1291
eab487f07a2b comment/format in: #encoderFor:
Stefan Vogel <sv@exept.de>
parents: 14094
diff changeset
  1292
    ^ self guessEncodingOfBuffer:buffer.
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1293
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1294
    "
14094
b4db3e0f0102 changed:
Stefan Vogel <sv@exept.de>
parents: 13382
diff changeset
  1295
     self guessEncodingOfFile:'../../libview/resources/de.rs' asFilename
b4db3e0f0102 changed:
Stefan Vogel <sv@exept.de>
parents: 13382
diff changeset
  1296
     self guessEncodingOfFile:'../../libview/resources/ru.rs' asFilename
b4db3e0f0102 changed:
Stefan Vogel <sv@exept.de>
parents: 13382
diff changeset
  1297
     self guessEncodingOfFile:'../../libview/resources/th.rs' asFilename
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1298
    "
13382
8e998649b3ac changed:
Claus Gittinger <cg@exept.de>
parents: 13326
diff changeset
  1299
8e998649b3ac changed:
Claus Gittinger <cg@exept.de>
parents: 13326
diff changeset
  1300
    "Modified: / 31-05-2011 / 15:45:19 / cg"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1301
    "Modified: / 16-01-2018 / 17:12:41 / stefan"
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1302
!
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1303
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1304
guessEncodingOfStream:aStream
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1305
    "look for a BOM (byte order mark) or a special string of the form:
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1306
            encoding #name
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1307
     or:
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1308
            encoding: name
18762
4b2d5801f66c #REFACTORING
Stefan Vogel <sv@exept.de>
parents: 18624
diff changeset
  1309
     in the first few bytes of aStream.
4b2d5801f66c #REFACTORING
Stefan Vogel <sv@exept.de>
parents: 18624
diff changeset
  1310
     Return a symbol like #utf8."
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1311
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1312
    |oldPosition buffer|
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1313
20645
51353c4d390c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 20403
diff changeset
  1314
    "/ must be able to position back
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1315
    aStream isPositionable ifFalse:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1316
        ^ nil
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1317
    ].
20645
51353c4d390c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 20403
diff changeset
  1318
    
51353c4d390c #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 20403
diff changeset
  1319
    buffer := String new:512.
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1320
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1321
    oldPosition := aStream position.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1322
    aStream nextBytes:buffer size into:buffer.
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1323
    aStream position:oldPosition.
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1324
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1325
    ^ self guessEncodingOfBuffer:buffer
13382
8e998649b3ac changed:
Claus Gittinger <cg@exept.de>
parents: 13326
diff changeset
  1326
8e998649b3ac changed:
Claus Gittinger <cg@exept.de>
parents: 13326
diff changeset
  1327
    "Modified: / 31-05-2011 / 15:45:23 / cg"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1328
    "Modified: / 16-01-2018 / 17:12:57 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1329
    "Modified (format): / 17-01-2018 / 15:51:09 / stefan"
8810
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1330
!
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1331
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1332
initializeEncodingDetectors
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1333
    "setup the list of encoding detectors.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1334
     This is a list of blocks, which get a buffer as argument,
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1335
     and return an encoding symbol or nil.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1336
     Can be customized for more detectors 
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1337
     (used to be hard-coded in guessEncodingOfBuffer:)"
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1338
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1339
    EncodingDetectors := OrderedCollection new.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1340
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1341
    "check for Unicode Byte Order Marks (BOM)"
23981
f7ae2f7c1554 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23662
diff changeset
  1342
    EncodingDetectors add:[:buffer | self detectBOMInBuffer:buffer].
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1343
        
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1344
    "check for an inline encoding markup (charset= / encoding=) substring"
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1345
    EncodingDetectors
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1346
        add:[:buffer |
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1347
            |guess lcBuffer quote|
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1348
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1349
            lcBuffer := buffer asLowercase.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1350
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1351
            guess :=
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1352
                #(charset encoding) doWithExit:[:keyWord :exit |
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1353
                    |encoderOrNil idx s w enc|
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1354
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1355
                    guess isNil ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1356
                    (idx := lcBuffer findString:keyWord) ~~ 0 ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1357
                        s := ReadStream on:buffer.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1358
                        s position:idx-1 + keyWord size.
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1359
                        s skipSeparators. 
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1360
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1361
                        "do not include '=' here, otherwise
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1362
                         files containing xml code (<?xml charset='utf8'> will be parsed as UTF-8"
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1363
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1364
                        [':#=' includes:s peek] whileTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1365
                            s next.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1366
                            s skipSeparators. 
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1367
                        ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1368
                        s skipSeparators.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1369
                        ('"''' includes:s peek) ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1370
                            quote := s next.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1371
                            w := s upTo:quote.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1372
                        ] ifFalse:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1373
                            w := s upToElementForWhich:[:ch | ch isSeparator or:[ch == $" or:[ch == $' or:[ch == $> ]]]].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1374
                        ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1375
                        w notNil ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1376
                            enc := w withoutQuotes.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1377
                            (enc startsWith:'x-') ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1378
                                enc := enc copyFrom:3.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1379
                            ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1380
                            encoderOrNil := self encoderFor:enc ifAbsent:nil.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1381
                            encoderOrNil notNil ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1382
                                exit value:(encoderOrNil nameOfEncoding)
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1383
                            ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1384
                        ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1385
                    ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1386
                ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1387
                nil
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1388
            ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1389
            guess
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1390
        ].
22587
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1391
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1392
    "/ check for a string like /*@!!Encoding:1252*/
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1393
    EncodingDetectors
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1394
        add:[:buffer |
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1395
            |guess idx s keyWord codePageNr enc encoderOrNil|
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1396
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1397
            keyWord := '@!!Encoding:'.
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1398
            (idx := buffer findString:keyWord) ~~ 0 ifTrue:[
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1399
                s := ReadStream on:buffer.
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1400
                s position:idx-1 + keyWord size.
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1401
                s skipSeparators. 
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1402
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1403
                s peek isDigit ifTrue:[
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1404
                    codePageNr := Integer readFrom:s.
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1405
                    enc := 'cp%1' bindWith:codePageNr.
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1406
                    encoderOrNil := self encoderFor:enc ifAbsent:nil.
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1407
                    encoderOrNil notNil ifTrue:[
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1408
                        guess := (encoderOrNil nameOfEncoding)
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1409
                    ].
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1410
                ].
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1411
            ].
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1412
            guess
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1413
        ].
3d2c9f1a70bd #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22584
diff changeset
  1414
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1415
    "/ check for JIS7 encoding
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1416
    EncodingDetectors
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1417
        add:[:buffer |
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1418
            (buffer includesString:self jisISO2022EscapeSequence) ifTrue:[
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1419
                #'iso2020-jp'
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1420
            ] ifFalse:[
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1421
                (buffer includesString:self jis7KanjiEscapeSequence) ifTrue:[
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1422
                    #jis7
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1423
                ] ifFalse:[
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1424
                    (buffer includesString:self jis7KanjiOldEscapeSequence) ifTrue:[
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1425
                        #jis7
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1426
                    ] ifFalse:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1427
                        nil
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1428
                    ]
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1429
                ]
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1430
            ]    
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1431
        ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1432
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1433
    "/ TODO: look for EUC, SJIS etc.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1434
    "/ Disabled, due to too many false positives.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1435
    "/ if required, think about it, fix it and uncomment it
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1436
"/    EncodingDetectors
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1437
"/        add:[:buffer |
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1438
"/            |guess idx|
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1439
"/
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1440
"/            idx := buffer 
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1441
"/                        findFirst:[:char | 
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1442
"/                            |code|
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1443
"/                            code := char codePoint.
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1444
"/                            code between:16rA1 and: 16rFE
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1445
"/                        ].
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1446
"/            ((idx ~~ 0) 
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1447
"/                and:[ (buffer at:(idx + 1)) codePoint between:16rA1 and: 16rFE ])
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1448
"/            ifTrue:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1449
"/                guess := #euc
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1450
"/            ] ifFalse:[
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1451
"/                "/ look for SJIS ...
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1452
"/            ]
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1453
"/        ].
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1454
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1455
    "Modified: / 17-01-2018 / 15:55:36 / stefan"
23662
93011efedaa3 #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 23410
diff changeset
  1456
    "Modified: / 05-02-2019 / 09:23:37 / Claus Gittinger"
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1457
!
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1458
8810
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1459
showCharacterSet
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1460
    |font|
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1461
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1462
    font := View defaultFont.
14206
70aa64d89dca comment/format in: #showCharacterSet
Stefan Vogel <sv@exept.de>
parents: 14188
diff changeset
  1463
"/    font := (Font family:'courier' face:'medium' style:'roman' size:12 encoding:'iso10646-1').
8810
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1464
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1465
    CharacterSetView
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1466
        openOn:font
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1467
        label:'Characters of ',self nameWithoutPrefix
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1468
        clickLabel:nil
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1469
        asInputFor:nil
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1470
        encoder:self
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1471
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1472
    "
14206
70aa64d89dca comment/format in: #showCharacterSet
Stefan Vogel <sv@exept.de>
parents: 14188
diff changeset
  1473
     CharacterEncoderImplementations::MS_Ansi showCharacterSet
19465
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1474
     CharacterEncoderImplementations::ISO8859_1 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1475
     CharacterEncoderImplementations::ISO8859_2 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1476
     CharacterEncoderImplementations::ISO8859_3 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1477
     CharacterEncoderImplementations::ISO8859_4 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1478
     CharacterEncoderImplementations::ISO8859_5 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1479
     CharacterEncoderImplementations::ISO8859_6 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1480
     CharacterEncoderImplementations::ISO8859_7 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1481
     CharacterEncoderImplementations::ISO8859_8 showCharacterSet
83cd3327e4c4 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 18762
diff changeset
  1482
     CharacterEncoderImplementations::ISO8859_9 showCharacterSet
8810
8f509238ef9f +showCharacterSet
Claus Gittinger <cg@exept.de>
parents: 8722
diff changeset
  1483
    "
8711
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1484
! !
c5f28b4c719d guessEncoding now implemented in CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 8388
diff changeset
  1485
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1486
!CharacterEncoder methodsFor:'encoding & decoding'!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1487
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1488
decodeString:anEncodedStringOrByteCollection
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1489
    "given a string in my encoding, return a unicode-string for it"
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1490
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1491
    ^ self subclassResponsibility
8118
efc99c0f68bc *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 8114
diff changeset
  1492
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1493
    "Modified: / 16-01-2018 / 19:54:51 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1494
    "Modified (format): / 17-01-2018 / 13:45:06 / stefan"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1495
!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1496
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1497
encodeCharacter:aUnicodeCharacterOrCodePoint
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1498
    "encode aUnicodeCharacterOrCodePoint to a (8-bit) String or ByteArray"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1499
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1500
    ^ self encodeString:aUnicodeCharacterOrCodePoint asString.
21471
bbf99c77f552 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 21138
diff changeset
  1501
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1502
    "Created: / 17-01-2018 / 13:59:44 / stefan"
21471
bbf99c77f552 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 21138
diff changeset
  1503
!
bbf99c77f552 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 21138
diff changeset
  1504
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1505
encodeString:aUnicodeString
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1506
    "given a string in unicode, return a string or ByteArray in my encoding for it"
8150
ba9c6e587973 care for bitsPerCharacter change during encodeString/decodeString.
ca
parents: 8136
diff changeset
  1507
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1508
    ^ self subclassResponsibility
17664
9f921fb412cd class: CharacterEncoder
Claus Gittinger <cg@exept.de>
parents: 17529
diff changeset
  1509
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1510
    "Modified: / 16-01-2018 / 19:54:44 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1511
    "Modified (comment): / 17-01-2018 / 13:54:44 / stefan"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1512
! !
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1513
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1514
!CharacterEncoder methodsFor:'error handling'!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1515
24942
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1516
decodesToUnicode
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1517
    "answer true, if this encoder decodes data to unicode"
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1518
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1519
    ^ self class nameOfDecodedCode == #unicode
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1520
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1521
    "Created: / 21-11-2019 / 18:42:51 / Stefan Vogel"
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1522
!
7ecac95aa6b6 #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24474
diff changeset
  1523
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1524
decodingError 
7904
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1525
    "report an error that there is no unicode-codePoint for a given codePoint in this encoding.
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1526
     (which is unlikely) or that the encoding is undefined for that value
20004
1e7f67889c89 #OTHER by mawalch
mawalch
parents: 19829
diff changeset
  1527
     (for example, holes in the ISO-8859-3 encoding)"
7904
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1528
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1529
    |badCodePoint sender|
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1530
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1531
    sender := thisContext sender.
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1532
    ((sender selector == #encode:) or:[sender selector == #decode:]) ifFalse:[
11295
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1533
        badCodePoint := sender methodHome argAt:1
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1534
    ].
11295
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1535
    ^ (DecodingError new)
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1536
        defaultValue:(self defaultDecoderValue);
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1537
        parameter:badCodePoint;
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1538
        messageText:'invalid code'; 
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1539
        suspendedContext:sender;
94171df45ac5 encoding error handling
Claus Gittinger <cg@exept.de>
parents: 11262
diff changeset
  1540
        raiseRequest.
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1541
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1542
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1543
defaultDecoderValue
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1544
    "placed into a decoded string, in case there is no unicode codePoint
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1545
     for a given encoded codePoint.
7904
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1546
     (typically 16rFFFF)."
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1547
    
7904
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1548
    ^ 16rFFFF
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1549
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1550
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1551
defaultEncoderValue
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1552
    "placed into an encoded string, in case there is no codePoint
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1553
     for a given unicode codePoint.
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1554
     (typically $?)."
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1555
8101
f7023a4735bf Use the ANSI-blessed #codePoint instead of deprecated #asciiValue
Stefan Vogel <sv@exept.de>
parents: 8087
diff changeset
  1556
    ^ $? codePoint
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1557
!
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1558
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1559
encodingError
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1560
    "report an error that some unicode-codePoint cannot be represented by this encoder"
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1561
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1562
    |badCodePoint sender|
7904
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1563
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1564
    sender := thisContext sender.
25185
c69a46903233 #QUALITY by stefan
Stefan Vogel <sv@exept.de>
parents: 25124
diff changeset
  1565
    ((sender selector == #encode:) or:[sender selector == #decode:]) ifTrue:[
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1566
        badCodePoint := sender methodHome argAt:1
7904
e3940bba2746 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7903
diff changeset
  1567
    ].
8048
293c8178c6eb utf8 errors
Claus Gittinger <cg@exept.de>
parents: 8033
diff changeset
  1568
    ^ (EncodingError new)
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1569
        defaultValue:(self defaultEncoderValue);
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1570
        parameter:badCodePoint;
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1571
        messageText:'unrepresentable code (some character cannot be represented)'; 
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1572
        suspendedContext:sender;
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1573
        raiseRequest
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1574
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1575
    "Modified: / 12-07-2012 / 20:36:37 / cg"
25185
c69a46903233 #QUALITY by stefan
Stefan Vogel <sv@exept.de>
parents: 25124
diff changeset
  1576
    "Modified: / 10-01-2020 / 15:31:46 / Stefan Vogel"
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1577
! !
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  1578
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1579
!CharacterEncoder methodsFor:'printing'!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1580
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1581
printOn:aStream
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1582
    aStream 
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1583
        nextPutAll:(self nameOfDecodedCode);
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1584
        nextPutAll:'->';
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1585
        nextPutAll:(self nameOfEncoding)
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1586
! !
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1587
7917
3649394bf5c0 checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7915
diff changeset
  1588
!CharacterEncoder methodsFor:'queries'!
3649394bf5c0 checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7915
diff changeset
  1589
22426
a68868d5efbd #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22425
diff changeset
  1590
characterSize:charOrCodePoint
11975
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1591
    "return the number of bytes required to encode codePoint"
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1592
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1593
    ^ self subclassResponsibility
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1594
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1595
    "Created: / 15-06-2005 / 15:11:04 / janfrog"
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1596
!
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1597
17118
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1598
isEncoderFor:encoding
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1599
    "does this encode to encoding?"
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1600
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1601
    |encodingNameSymbol|
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1602
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1603
    encodingNameSymbol := encoding asLowercase.
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1604
    encodingNameSymbol = #'iso10646-1' ifTrue:[ encodingNameSymbol := #unicode].
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1605
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1606
    ^ encodingNameSymbol = self nameOfEncoding
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1607
!
fcf86d824eeb class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 16628
diff changeset
  1608
7917
3649394bf5c0 checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7915
diff changeset
  1609
isNullEncoder
3649394bf5c0 checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7915
diff changeset
  1610
    ^ false
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1611
!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1612
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1613
nameOfDecodedCode
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1614
    "Most coders decode from their code into unicode / encode from unicode into their code.
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1615
     There are a few exceptions to this, though - these must redefine this."
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1616
    
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1617
    ^ self class nameOfDecodedCode
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1618
!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1619
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1620
nameOfEncoding
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1621
    ^ self class nameOfEncoding
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1622
!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1623
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1624
userFriendlyNameOfEncoding
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1625
    ^ self class userFriendlyNameOfEncoding
7917
3649394bf5c0 checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7915
diff changeset
  1626
! !
3649394bf5c0 checkin from browser
Claus Gittinger <cg@exept.de>
parents: 7915
diff changeset
  1627
11975
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1628
!CharacterEncoder methodsFor:'stream support'!
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1629
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1630
encodeCharacter:aUnicodeCharacter on:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1631
    "given a character in unicode, encode it onto aStream.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1632
     Subclasses can redefine this to avoid allocating many new string instances."
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1633
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1634
    aStream nextPutAll:(self encodeCharacter:aUnicodeCharacter).
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1635
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1636
    "Created: / 16-02-2017 / 16:18:33 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1637
    "Modified: / 17-01-2018 / 14:00:28 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1638
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1639
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1640
encodeString:aUnicodeString on:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1641
    "given a string in unicode, encode it onto aStream.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1642
     Subclasses can redefine this to avoid allocating many new string instances.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1643
     (but must then also redefine encodeString:aUnicodeString to collect the characters)"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1644
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1645
    aStream nextPutAll:(self encodeString:aUnicodeString).
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1646
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1647
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1648
readNext:countArg charactersFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1649
    |writeStream count "{ Class:SmallInteger }"|
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1650
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1651
    count := countArg.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1652
    writeStream := CharacterWriteStream on:(String new:count).
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1653
    count timesRepeat:[
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1654
        writeStream nextPut:(self readNextCharacterFrom:aStream).
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1655
    ].
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1656
    ^ writeStream contents.
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1657
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1658
    "Created: / 16-01-2018 / 20:08:10 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1659
    "Modified: / 17-01-2018 / 16:44:29 / stefan"
11975
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1660
!
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1661
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1662
readNextCharacterFrom:aStream
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1663
    ^ self subclassResponsibility
12608
Claus Gittinger <cg@exept.de>
parents: 12435
diff changeset
  1664
Claus Gittinger <cg@exept.de>
parents: 12435
diff changeset
  1665
    "Created: / 14-06-2005 / 17:03:21 / janfrog"
Claus Gittinger <cg@exept.de>
parents: 12435
diff changeset
  1666
    "Modified: / 15-06-2005 / 15:27:49 / janfrog"
Claus Gittinger <cg@exept.de>
parents: 12435
diff changeset
  1667
    "Modified: / 20-06-2005 / 13:13:52 / masca"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1668
    "Modified: / 16-01-2018 / 20:12:07 / stefan"
11975
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1669
! !
7b37b4dbd66f *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 11374
diff changeset
  1670
24474
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1671
!CharacterEncoder methodsFor:'testing'!
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1672
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1673
isUnicodeSubsetEncoder
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1674
    "answer true, if this encodes a subset of Unicode, that is an 1-to-1 
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1675
     mapping to unicode"
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1676
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1677
    ^ false
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1678
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1679
    "Created: / 27-07-2019 / 14:51:28 / Stefan Vogel"
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1680
!
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1681
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1682
isUtf16Encoder
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1683
    "answer true, if this encodes from/to UTF-16 (regardless of byte-order)"
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1684
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1685
    ^ false
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1686
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1687
    "Created: / 27-07-2019 / 14:44:52 / Stefan Vogel"
25269
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1688
!
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1689
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1690
isUtfEncoder
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1691
    "answer true, if this encodes from/to any UTF (regardless of how many bytes and byte-order).
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1692
     In other words: does it make sense to prepend a BOM"
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1693
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1694
    ^ false
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1695
55eb9e01d1da #FEATURE by Stefan Reise
sr
parents: 25233
diff changeset
  1696
    "Created: / 19-02-2020 / 16:17:20 / Stefan Reise"
24474
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1697
! !
e08f13d39b3f #FEATURE by stefan
Stefan Vogel <sv@exept.de>
parents: 24012
diff changeset
  1698
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1699
!CharacterEncoder::CompoundEncoder class methodsFor:'documentation'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1700
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1701
documentation
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1702
"
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1703
    A compoundEncoder uses two real encoders;
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1704
    to encode:
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1705
        string -> decoder(encode) -> encoder -> result
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1706
    to decode:
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1707
        string -> encoder -> decoder -> result
7956
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1708
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1709
    |e|
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1710
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1711
    e := CompoundEncoder new.
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1712
    e encoder:ISO8859_5 decoder:KOI8_R.
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1713
    e decode:16rB0.  'CYRILLIC CAPITAL LETTER A; 16rB0 in 8859-5; 16rE1 in KOI8-R'.
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1714
    e encode:16rE1.  
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1715
"
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1716
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1717
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1718
!CharacterEncoder::CompoundEncoder methodsFor:'accessing'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1719
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1720
encoder:encoderArg decoder:decoderArg  
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1721
    "set instance variables (automatically generated)"
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1722
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1723
    decoder := decoderArg.
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1724
    encoder := encoderArg.
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1725
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1726
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1727
!CharacterEncoder::CompoundEncoder methodsFor:'encoding & decoding'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1728
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1729
decodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1730
    ^ decoder encodeString:(encoder decodeString:anEncodedStringOrByteCollection)
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1731
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1732
    "Modified (format): / 17-01-2018 / 13:44:08 / stefan"
7956
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1733
!
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1734
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1735
encodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1736
    ^ encoder encodeString:(decoder decodeString:anEncodedStringOrByteCollection)
7956
c43ee9e00bab *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7948
diff changeset
  1737
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1738
    "Modified (format): / 17-01-2018 / 13:46:26 / stefan"
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1739
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1740
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1741
!CharacterEncoder::CompoundEncoder methodsFor:'printing'!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1742
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1743
printOn:aStream
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1744
    aStream 
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1745
        nextPutAll:(decoder nameOfEncoding);
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1746
        nextPutAll:'->'.
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1747
"/        nextPutAll:(decoder nameOfDecodedCode);
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1748
"/        nextPutAll:'->';
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1749
"/        nextPutAll:(encoder nameOfEncoding)
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1750
    encoder printOn:aStream
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1751
! !
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1752
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1753
!CharacterEncoder::CompoundEncoder methodsFor:'queries'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1754
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1755
characterSize:aCharacterOrCodepoint
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1756
    "return the number of bytes required to encode aCharacterOrCodepoint"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1757
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1758
    ^ encoder characterSize:(decoder decode:aCharacterOrCodepoint)
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1759
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1760
    "Created: / 16-01-2018 / 17:58:51 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1761
! !
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1762
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1763
!CharacterEncoder::CompoundEncoder methodsFor:'stream support'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1764
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1765
readNext:count charactersFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1766
    ^ decoder encodeString:(encoder readNext:count charactersFrom:aStream) asString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1767
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1768
    "Created: / 16-01-2018 / 20:50:56 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1769
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1770
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1771
readNextCharacterFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1772
    ^ (decoder encodeString:(encoder readNextCharacterFrom:aStream) asString) first
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1773
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1774
    "Created: / 16-01-2018 / 21:10:28 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1775
! !
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1776
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1777
!CharacterEncoder::NullEncoder class methodsFor:'documentation'!
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
  1778
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
  1779
documentation
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
  1780
"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1781
    A NullEncoder does nothing.
7932
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
  1782
"
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
  1783
! !
ee233bf44df5 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7924
diff changeset
  1784
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1785
!CharacterEncoder::NullEncoder methodsFor:'encoding & decoding'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1786
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1787
decodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1788
    ^ anEncodedStringOrByteCollection asString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1789
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1790
    "Modified: / 17-01-2018 / 13:43:42 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1791
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1792
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1793
encodeString:aString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1794
    ^ aString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1795
! !
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1796
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1797
!CharacterEncoder::NullEncoder methodsFor:'queries'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1798
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1799
characterSize:charOrCodePoint
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1800
    "return the number of bytes required to encode aCharacterOrCodepoint"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1801
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1802
    ^ charOrCodePoint asCharacter bytesPerCharacter
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1803
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1804
    "
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1805
     NullEncoder basicNew characterSize:$a codePoint 
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1806
     NullEncoder basicNew characterSize:16r3fe       
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1807
     NullEncoder basicNew characterSize:16r3ffe      
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1808
    "
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1809
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1810
    "Modified (comment): / 16-01-2018 / 21:15:01 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1811
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1812
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1813
isNullEncoder
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1814
    ^ true
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1815
! !
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1816
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1817
!CharacterEncoder::NullEncoder methodsFor:'stream support'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1818
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1819
readNext:count charactersFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1820
    ^ (aStream next:count) asString
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1821
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1822
    "Created: / 16-01-2018 / 20:19:38 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1823
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1824
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1825
readNextCharacterFrom:aStream
22782
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1826
    |chOrNil|
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1827
22782
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1828
    chOrNil := aStream next.
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1829
    chOrNil notNil ifTrue:[
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1830
        ^ chOrNil asCharacter
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1831
    ].
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1832
    ^ nil.
d76c7a49ae45 #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22699
diff changeset
  1833
    
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1834
    "Created: / 16-01-2018 / 20:04:01 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1835
! !
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1836
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1837
!CharacterEncoder::InverseEncoder class methodsFor:'documentation'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1838
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1839
documentation
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1840
"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1841
    An InverseEncoder does the inverse - i.e. encode is really a decode
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1842
    and decode is really an encode.
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1843
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1844
    InverseEncoder is always used to encode to unicode and decode from unicode 
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1845
    (see CharacterEncoder class >> #encoderToEncodeFrom:into:).
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1846
"
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1847
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1848
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1849
!CharacterEncoder::InverseEncoder methodsFor:'accessing'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1850
22584
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
  1851
decoder:anEncoder
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
  1852
    decoder := anEncoder.
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1853
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1854
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1855
!CharacterEncoder::InverseEncoder methodsFor:'encoding & decoding'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1856
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1857
decodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1858
    ^ decoder encodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1859
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1860
    "Modified (format): / 17-01-2018 / 13:43:57 / stefan"
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1861
!
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1862
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1863
encodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1864
    ^ decoder decodeString:anEncodedStringOrByteCollection
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1865
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1866
    "Modified (format): / 17-01-2018 / 13:46:47 / stefan"
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1867
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1868
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1869
!CharacterEncoder::InverseEncoder methodsFor:'printing'!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1870
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1871
printOn:aStream
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1872
    aStream 
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1873
        nextPutAll:(decoder nameOfEncoding);
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1874
        nextPutAll:'->';
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1875
        nextPutAll:(decoder nameOfDecodedCode)
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1876
! !
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1877
12435
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1878
!CharacterEncoder::InverseEncoder methodsFor:'queries'!
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1879
22426
a68868d5efbd #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22425
diff changeset
  1880
characterSize:charOrCodePoint
21138
1cc7a2e7ee19 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 20645
diff changeset
  1881
    "return the number of bytes required to encode codePoint"
1cc7a2e7ee19 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 20645
diff changeset
  1882
22426
a68868d5efbd #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22425
diff changeset
  1883
    ^ decoder characterSize:charOrCodePoint
12435
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1884
! !
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1885
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1886
!CharacterEncoder::InverseEncoder methodsFor:'stream support'!
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1887
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1888
readNext:count charactersFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1889
    "decode the next count bytes or characters on aStream from unicode to something else"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1890
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1891
    ^ decoder encodeString:(aStream next:count).
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1892
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1893
    "Created: / 16-01-2018 / 20:53:42 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1894
    "Modified (comment): / 17-01-2018 / 13:28:41 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1895
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1896
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1897
readNextCharacterFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1898
    "decode the next byte or character on aStream from unicode to something else"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1899
22584
661989139856 #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 22579
diff changeset
  1900
    ^ decoder encodeString:(String with:aStream next).
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1901
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1902
    "Created: / 16-01-2018 / 21:08:11 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1903
    "Modified: / 17-01-2018 / 13:29:59 / stefan"
12435
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1904
! !
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  1905
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1906
!CharacterEncoder::DefaultEncoder class methodsFor:'documentation'!
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1907
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1908
documentation
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1909
"
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1910
    That is only a dummy for ST80 compatibility
7914
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1911
"
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1912
! !
86a3784b40dd *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7913
diff changeset
  1913
7915
0b92b16542f6 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7914
diff changeset
  1914
!CharacterEncoder::OtherEncoding class methodsFor:'private'!
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1915
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1916
flushCode
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1917
    "do nothing here"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1918
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1919
    "Modified (comment): / 16-01-2018 / 17:08:17 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1920
! !
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1921
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1922
!CharacterEncoder::OtherEncoding class methodsFor:'testing'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1923
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1924
isAbstract
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1925
    ^ self == CharacterEncoder::OtherEncoding
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1926
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1927
    "Created: / 17-01-2018 / 16:06:13 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1928
    "Modified: / 17-01-2018 / 17:50:37 / stefan"
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1929
! !
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  1930
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1931
!CharacterEncoder::TwoStepEncoder class methodsFor:'documentation'!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1932
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1933
documentation
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1934
"
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1935
    A twoStepEncoder uses two real encoders;
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1936
    to encode:
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1937
        string -> encoder1(encode) -> encoder2(encode) -> result
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1938
    to decode:
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1939
        string -> encoder2(decode) -> encoder1(decode) -> result
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1940
"
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1941
! !
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1942
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1943
!CharacterEncoder::TwoStepEncoder methodsFor:'accessing'!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1944
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1945
encoder1:encoder1Arg encoder2:encoder2Arg
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1946
    "set instance variables (automatically generated)"
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1947
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1948
    encoder1 := encoder1Arg.
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1949
    encoder2 := encoder2Arg.
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1950
! !
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1951
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1952
!CharacterEncoder::TwoStepEncoder methodsFor:'encoding & decoding'!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1953
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1954
decodeString:anEncodedStringOrByteCollection
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1955
    ^ encoder1 decodeString:(encoder2 decodeString:anEncodedStringOrByteCollection)
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1956
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  1957
    "Modified (format): / 17-01-2018 / 13:45:20 / stefan"
7919
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1958
!
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1959
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1960
encodeString:aString
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1961
    ^ encoder2 encodeString:(encoder1 encodeString:aString)
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1962
! !
92b61bef1b1a *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7917
diff changeset
  1963
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1964
!CharacterEncoder::TwoStepEncoder methodsFor:'printing'!
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1965
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1966
printOn:aStream
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1967
    aStream 
14209
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1968
        nextPutAll:(encoder1 nameOfDecodedCode);
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1969
        nextPutAll:'->';
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1970
        nextPutAll:(encoder1 nameOfEncoding);
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1971
        nextPutAll:'->';
912e4845d386 changed: #encodingError
Claus Gittinger <cg@exept.de>
parents: 14207
diff changeset
  1972
        nextPutAll:(encoder2 nameOfEncoding)
7972
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1973
! !
91aa73f89491 *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 7971
diff changeset
  1974
11300
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1975
!CharacterEncoder::TwoStepEncoder methodsFor:'queries'!
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1976
22426
a68868d5efbd #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22425
diff changeset
  1977
characterSize:charOrCodePoint
21138
1cc7a2e7ee19 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 20645
diff changeset
  1978
    "return the number of bytes required to encode codePoint"
1cc7a2e7ee19 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 20645
diff changeset
  1979
14523
91746a24d5ad characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents: 14209
diff changeset
  1980
    "/ naive; actually, we have to do a real encoding to get this info proper
22426
a68868d5efbd #BUGFIX by cg
Claus Gittinger <cg@exept.de>
parents: 22425
diff changeset
  1981
    ^ (encoder2 characterSize:charOrCodePoint)
14523
91746a24d5ad characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents: 14209
diff changeset
  1982
91746a24d5ad characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents: 14209
diff changeset
  1983
    "Created: / 22-11-2012 / 13:07:47 / cg"
91746a24d5ad characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents: 14209
diff changeset
  1984
!
91746a24d5ad characterSize: query was missing
Claus Gittinger <cg@exept.de>
parents: 14209
diff changeset
  1985
11300
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1986
nameOfEncoding
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1987
    ^ "encoder1 nameOfEncoding , '-' ," encoder2 nameOfEncoding
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1988
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1989
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1990
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1991
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1992
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1993
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1994
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1995
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1996
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1997
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1998
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  1999
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  2000
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  2001
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  2002
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  2003
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  2004
! !
2e90a91ff766 nameOf two-step encoder
Claus Gittinger <cg@exept.de>
parents: 11295
diff changeset
  2005
22470
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2006
!CharacterEncoder::TwoStepEncoder methodsFor:'stream support'!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2007
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2008
readNext:count charactersFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2009
    ^ encoder1 decodeString:(encoder2 readNext:count charactersFrom:aStream)
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2010
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2011
    "Created: / 16-01-2018 / 20:47:52 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2012
!
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2013
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2014
readNextCharacterFrom:aStream
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2015
    ^ (encoder1 decodeString:(encoder2 readNextCharacterFrom:aStream) asString) first
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2016
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2017
    "Created: / 16-01-2018 / 21:06:48 / stefan"
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2018
! !
c9dc532200c9 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 22426
diff changeset
  2019
7892
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2020
!CharacterEncoder class methodsFor:'documentation'!
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2021
149a145e871c initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
  2022
version
18624
1f113cce940e class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 18305
diff changeset
  2023
    ^ '$Header$'
12435
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  2024
!
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  2025
539c24148e90 added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents: 11975
diff changeset
  2026
version_CVS
18624
1f113cce940e class: CharacterEncoder
Stefan Vogel <sv@exept.de>
parents: 18305
diff changeset
  2027
    ^ '$Header$'
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  2028
! !
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  2029
14777
a669080229da add user friendly name to semaphores
Stefan Vogel <sv@exept.de>
parents: 14559
diff changeset
  2030
7899
7577df77ba95 character encodings - first attempt
Claus Gittinger <cg@exept.de>
parents: 7893
diff changeset
  2031
CharacterEncoder initialize!