--- a/CharacterEncoder.st Thu Feb 19 15:13:00 2004 +0100
+++ b/CharacterEncoder.st Thu Feb 19 18:17:00 2004 +0100
@@ -558,7 +558,7 @@
howToAddMoreCoders
"
- Coders can be hand-written or generated via a mapping table.
+ Coders can be hand-written or automagically generated via a mapping table.
Examples for hand-written coders are UTF8_to_ISO10464 or JIS0208_to_JIS7.
The table driven encode/decode methods can be generated from a character mapping document
@@ -616,6 +616,176 @@
"
! !
+!CharacterEncoder class methodsFor:'instance creation'!
+
+encoderFor:encodingNameSymbol
+ "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
+
+ ^ self
+ encoderFor:encodingNameSymbol
+ ifAbsent:[
+ self error:'no encoder for ' , encodingNameSymbol mayProceed:true.
+ NullEncoder new
+ ]
+
+ "
+ CharacterEncoder encoderFor:#'latin1'
+ self encoderFor:#'arabic'
+ self encoderFor:#'ms-arabic'
+ self encoderFor:#'iso8859-5'
+ self encoderFor:#'koi8-r'
+ self encoderFor:#'koi8-u'
+ self encoderFor:#'jis0208'
+ self encoderFor:#'jis7'
+ "
+!
+
+encoderFor:encodingNameSymbol ifAbsent:exceptionValue
+ "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
+
+ |cls lcName name|
+
+ lcName := encodingNameSymbol asLowercase asSymbolIfInterned.
+ name := lcName ? encodingNameSymbol.
+
+ cls := EncodersByName at:name ifAbsent:nil.
+ cls notNil ifTrue:[^ cls new ].
+
+ self allSubclassesDo:[:cls |
+ cls nameOfDecodedCode == #unicode ifTrue:[
+ cls nameOfEncoding = name ifTrue:[
+ EncodersByName at:name put:cls.
+ ^ cls new.
+ ]
+ ].
+ ].
+ self allSubclassesDo:[:cls |
+ cls nameOfDecodedCode == #unicode ifTrue:[
+ (cls alternativeNamesOfEncoding includes:name) ifTrue:[
+ EncodersByName at:name put:cls.
+ ^ cls new.
+ ].
+ ].
+ ].
+
+ self allSubclassesDo:[:cls |
+ ((cls nameOfEncoding = name)
+ or:[(cls alternativeNamesOfEncoding includes:name)]) ifTrue:[
+ "/ ok, found some other encoder - need a compound encoder then.
+ "/ the one found encodes into what we need, but needs something else as input.
+
+ ^ TwoStepEncoder new
+ encoder1:(self encoderFor:(cls nameOfDecodedCode))
+ encoder2:(cls new).
+ ].
+ ].
+ ^ exceptionValue value
+
+ "
+ CharacterEncoder encoderFor:#'latin1'
+ self encoderFor:#'arabic'
+ self encoderFor:#'ms-arabic'
+ self encoderFor:#'iso8859-5'
+ self encoderFor:#'koi8-r'
+ self encoderFor:#'koi8-u'
+ self encoderFor:#'jis0208'
+ self encoderFor:#'jis7'
+ "
+!
+
+encoderToEncodeFrom:oldEncodingArg into:newEncodingArg
+ |oldEncoding newEncoding encoder decoder|
+
+ oldEncoding := oldEncodingArg ? #'unicode'.
+ newEncoding := newEncodingArg ? #'unicode'.
+ oldEncoding == newEncoding ifTrue:[^ NullEncoder new].
+ (oldEncoding match:newEncoding) ifTrue:[^ NullEncoder new].
+ "/ (newEncoding match:oldEncoding) ifTrue:[^ NullEncoder new].
+
+ ((oldEncoding == #unicode) or:[(oldEncoding == #'iso10646-1')]) ifTrue:[
+ ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[^ NullEncoder new].
+
+ "/ unicode -> something
+ ^ self encoderFor:newEncoding.
+ ].
+ ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[
+ "/ something -> unicode
+ decoder := self encoderFor:oldEncoding.
+ ^ InverseEncoder new decoder:decoder.
+ ].
+
+ "/ look for a specialized encoder...
+ self allSubclassesDo:[:cls |
+ (cls nameOfEncoding = oldEncoding
+ or:[ cls alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[
+ |nameOfDecodedCode encoderForDecodedCode|
+
+ nameOfDecodedCode := cls nameOfDecodedCode.
+ encoderForDecodedCode := self encoderFor:nameOfDecodedCode.
+ (nameOfDecodedCode = newEncoding
+ or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[
+ ^ InverseEncoder new decoder:cls new.
+ ]
+ ].
+ (cls nameOfEncoding = newEncoding
+ or:[ cls alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[
+ |nameOfDecodedCode encoderForDecodedCode|
+
+ nameOfDecodedCode := cls nameOfDecodedCode.
+ encoderForDecodedCode := self encoderFor:nameOfDecodedCode.
+ (nameOfDecodedCode = oldEncoding
+ or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[
+ ^ cls new.
+ ]
+ ].
+ ].
+
+ "/ do it as: oldEncoding -> unicode -> newEncoding
+
+ "/ something -> unicode
+ decoder := self encoderFor:oldEncoding.
+
+ "/ unicode -> something
+ encoder := self encoderFor:newEncoding.
+ ^ CompoundEncoder new encoder:encoder decoder:decoder
+!
+
+unicodeEncoderFor:encodingNameSymbol
+ "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
+
+ self obsoleteMethodWarning.
+ ^ self encoderFor:encodingNameSymbol
+
+ "
+ CharacterEncoder unicodeEncoderFor:#'latin1'
+ self unicodeEncoderFor:#'arabic'
+ self unicodeEncoderFor:#'ms-arabic'
+ self unicodeEncoderFor:#'iso8859-5'
+ self unicodeEncoderFor:#'koi8-r'
+ self unicodeEncoderFor:#'koi8-u'
+ self unicodeEncoderFor:#'jis0208'
+ self unicodeEncoderFor:#'jis7'
+ "
+!
+
+unicodeEncoderFor:encodingNameSymbol ifAbsent:exceptionValue
+ "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
+
+ self obsoleteMethodWarning.
+ ^ self encoderFor:encodingNameSymbol ifAbsent:exceptionValue
+
+ "
+ CharacterEncoder unicodeEncoderFor:#'latin1'
+ self unicodeEncoderFor:#'arabic'
+ self unicodeEncoderFor:#'ms-arabic'
+ self unicodeEncoderFor:#'iso8859-5'
+ self unicodeEncoderFor:#'koi8-r'
+ self unicodeEncoderFor:#'koi8-u'
+ self unicodeEncoderFor:#'jis0208'
+ self unicodeEncoderFor:#'jis7'
+ "
+! !
+
!CharacterEncoder class methodsFor:'Compatibility-ST80'!
encoderNamed: encoderName
@@ -725,63 +895,6 @@
encodeString:aString into:newEncodingArg
^ self encodeString:aString from:'unicode' into:newEncodingArg
-!
-
-encoderToEncodeFrom:oldEncodingArg into:newEncodingArg
- |oldEncoding newEncoding encoder decoder|
-
- oldEncoding := oldEncodingArg ? #'unicode'.
- newEncoding := newEncodingArg ? #'unicode'.
- oldEncoding == newEncoding ifTrue:[^ NullEncoder new].
- (oldEncoding match:newEncoding) ifTrue:[^ NullEncoder new].
- "/ (newEncoding match:oldEncoding) ifTrue:[^ NullEncoder new].
-
- ((oldEncoding == #unicode) or:[(oldEncoding == #'iso10646-1')]) ifTrue:[
- ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[^ NullEncoder new].
-
- "/ unicode -> something
- ^ self encoderFor:newEncoding.
- ].
- ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[
- "/ something -> unicode
- decoder := self encoderFor:oldEncoding.
- ^ InverseEncoder new decoder:decoder.
- ].
-
- "/ look for a specialized encoder...
- self allSubclassesDo:[:cls |
- (cls nameOfEncoding = oldEncoding
- or:[ cls alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[
- |nameOfDecodedCode encoderForDecodedCode|
-
- nameOfDecodedCode := cls nameOfDecodedCode.
- encoderForDecodedCode := self encoderFor:nameOfDecodedCode.
- (nameOfDecodedCode = newEncoding
- or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[
- ^ InverseEncoder new decoder:cls new.
- ]
- ].
- (cls nameOfEncoding = newEncoding
- or:[ cls alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[
- |nameOfDecodedCode encoderForDecodedCode|
-
- nameOfDecodedCode := cls nameOfDecodedCode.
- encoderForDecodedCode := self encoderFor:nameOfDecodedCode.
- (nameOfDecodedCode = oldEncoding
- or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[
- ^ cls new.
- ]
- ].
- ].
-
- "/ do it as: oldEncoding -> unicode -> newEncoding
-
- "/ something -> unicode
- decoder := self encoderFor:oldEncoding.
-
- "/ unicode -> something
- encoder := self encoderFor:newEncoding.
- ^ CompoundEncoder new encoder:encoder decoder:decoder
! !
!CharacterEncoder class methodsFor:'private'!
@@ -876,81 +989,6 @@
^ #()
!
-encoderFor:encodingNameSymbol
- "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
-
- ^ self
- encoderFor:encodingNameSymbol
- ifAbsent:[
- self error:'no encoder for ' , encodingNameSymbol mayProceed:true.
- NullEncoder new
- ]
-
- "
- CharacterEncoder encoderFor:#'latin1'
- self encoderFor:#'arabic'
- self encoderFor:#'ms-arabic'
- self encoderFor:#'iso8859-5'
- self encoderFor:#'koi8-r'
- self encoderFor:#'koi8-u'
- self encoderFor:#'jis0208'
- self encoderFor:#'jis7'
- "
-!
-
-encoderFor:encodingNameSymbol ifAbsent:exceptionValue
- "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
-
- |cls lcName name|
-
- lcName := encodingNameSymbol asLowercase asSymbolIfInterned.
- name := lcName ? encodingNameSymbol.
-
- cls := EncodersByName at:name ifAbsent:nil.
- cls notNil ifTrue:[^ cls new ].
-
- self allSubclassesDo:[:cls |
- cls nameOfDecodedCode == #unicode ifTrue:[
- cls nameOfEncoding = name ifTrue:[
- EncodersByName at:name put:cls.
- ^ cls new.
- ]
- ].
- ].
- self allSubclassesDo:[:cls |
- cls nameOfDecodedCode == #unicode ifTrue:[
- (cls alternativeNamesOfEncoding includes:name) ifTrue:[
- EncodersByName at:name put:cls.
- ^ cls new.
- ].
- ].
- ].
-
- self allSubclassesDo:[:cls |
- ((cls nameOfEncoding = name)
- or:[(cls alternativeNamesOfEncoding includes:name)]) ifTrue:[
- "/ ok, found some other encoder - need a compound encoder then.
- "/ the one found encodes into what we need, but needs something else as input.
-
- ^ TwoStepEncoder new
- encoder1:(self encoderFor:(cls nameOfDecodedCode))
- encoder2:(cls new).
- ].
- ].
- ^ exceptionValue value
-
- "
- CharacterEncoder encoderFor:#'latin1'
- self encoderFor:#'arabic'
- self encoderFor:#'ms-arabic'
- self encoderFor:#'iso8859-5'
- self encoderFor:#'koi8-r'
- self encoderFor:#'koi8-u'
- self encoderFor:#'jis0208'
- self encoderFor:#'jis7'
- "
-!
-
isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg
"return true, if superSetEncoding encoding includes all characters of subSetEncoding"
@@ -1037,42 +1075,6 @@
)
!
-unicodeEncoderFor:encodingNameSymbol
- "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
-
- self obsoleteMethodWarning.
- ^ self encoderFor:encodingNameSymbol
-
- "
- CharacterEncoder unicodeEncoderFor:#'latin1'
- self unicodeEncoderFor:#'arabic'
- self unicodeEncoderFor:#'ms-arabic'
- self unicodeEncoderFor:#'iso8859-5'
- self unicodeEncoderFor:#'koi8-r'
- self unicodeEncoderFor:#'koi8-u'
- self unicodeEncoderFor:#'jis0208'
- self unicodeEncoderFor:#'jis7'
- "
-!
-
-unicodeEncoderFor:encodingNameSymbol ifAbsent:exceptionValue
- "given the name of an encoding, return an encoder-instance which can map these from/into unicode."
-
- self obsoleteMethodWarning.
- ^ self encoderFor:encodingNameSymbol ifAbsent:exceptionValue
-
- "
- CharacterEncoder unicodeEncoderFor:#'latin1'
- self unicodeEncoderFor:#'arabic'
- self unicodeEncoderFor:#'ms-arabic'
- self unicodeEncoderFor:#'iso8859-5'
- self unicodeEncoderFor:#'koi8-r'
- self unicodeEncoderFor:#'koi8-u'
- self unicodeEncoderFor:#'jis0208'
- self unicodeEncoderFor:#'jis7'
- "
-!
-
userFriendlyNameOfEncoding
^ self nameOfEncoding
! !
@@ -52936,7 +52938,7 @@
!CharacterEncoder class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.28 2004-02-18 23:51:18 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.29 2004-02-19 17:17:00 cg Exp $'
! !
CharacterEncoder initialize!