# HG changeset patch # User Claus Gittinger # Date 1077211020 -3600 # Node ID 357e53496acc063753a7fbf5de4f5317bf53d3e8 # Parent 2ae69eb663eb80c29b83d63f6c804c9447d57082 *** empty log message *** diff -r 2ae69eb663eb -r 357e53496acc CharacterEncoder.st --- a/CharacterEncoder.st Thu Feb 19 15:13:00 2004 +0100 +++ b/CharacterEncoder.st Thu Feb 19 18:17:00 2004 +0100 @@ -558,7 +558,7 @@ howToAddMoreCoders " - Coders can be hand-written or generated via a mapping table. + Coders can be hand-written or automagically generated via a mapping table. Examples for hand-written coders are UTF8_to_ISO10464 or JIS0208_to_JIS7. The table driven encode/decode methods can be generated from a character mapping document @@ -616,6 +616,176 @@ " ! ! +!CharacterEncoder class methodsFor:'instance creation'! + +encoderFor:encodingNameSymbol + "given the name of an encoding, return an encoder-instance which can map these from/into unicode." + + ^ self + encoderFor:encodingNameSymbol + ifAbsent:[ + self error:'no encoder for ' , encodingNameSymbol mayProceed:true. + NullEncoder new + ] + + " + CharacterEncoder encoderFor:#'latin1' + self encoderFor:#'arabic' + self encoderFor:#'ms-arabic' + self encoderFor:#'iso8859-5' + self encoderFor:#'koi8-r' + self encoderFor:#'koi8-u' + self encoderFor:#'jis0208' + self encoderFor:#'jis7' + " +! + +encoderFor:encodingNameSymbol ifAbsent:exceptionValue + "given the name of an encoding, return an encoder-instance which can map these from/into unicode." + + |cls lcName name| + + lcName := encodingNameSymbol asLowercase asSymbolIfInterned. + name := lcName ? encodingNameSymbol. + + cls := EncodersByName at:name ifAbsent:nil. + cls notNil ifTrue:[^ cls new ]. + + self allSubclassesDo:[:cls | + cls nameOfDecodedCode == #unicode ifTrue:[ + cls nameOfEncoding = name ifTrue:[ + EncodersByName at:name put:cls. + ^ cls new. + ] + ]. + ]. + self allSubclassesDo:[:cls | + cls nameOfDecodedCode == #unicode ifTrue:[ + (cls alternativeNamesOfEncoding includes:name) ifTrue:[ + EncodersByName at:name put:cls. + ^ cls new. + ]. + ]. + ]. + + self allSubclassesDo:[:cls | + ((cls nameOfEncoding = name) + or:[(cls alternativeNamesOfEncoding includes:name)]) ifTrue:[ + "/ ok, found some other encoder - need a compound encoder then. + "/ the one found encodes into what we need, but needs something else as input. + + ^ TwoStepEncoder new + encoder1:(self encoderFor:(cls nameOfDecodedCode)) + encoder2:(cls new). + ]. + ]. + ^ exceptionValue value + + " + CharacterEncoder encoderFor:#'latin1' + self encoderFor:#'arabic' + self encoderFor:#'ms-arabic' + self encoderFor:#'iso8859-5' + self encoderFor:#'koi8-r' + self encoderFor:#'koi8-u' + self encoderFor:#'jis0208' + self encoderFor:#'jis7' + " +! + +encoderToEncodeFrom:oldEncodingArg into:newEncodingArg + |oldEncoding newEncoding encoder decoder| + + oldEncoding := oldEncodingArg ? #'unicode'. + newEncoding := newEncodingArg ? #'unicode'. + oldEncoding == newEncoding ifTrue:[^ NullEncoder new]. + (oldEncoding match:newEncoding) ifTrue:[^ NullEncoder new]. + "/ (newEncoding match:oldEncoding) ifTrue:[^ NullEncoder new]. + + ((oldEncoding == #unicode) or:[(oldEncoding == #'iso10646-1')]) ifTrue:[ + ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[^ NullEncoder new]. + + "/ unicode -> something + ^ self encoderFor:newEncoding. + ]. + ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[ + "/ something -> unicode + decoder := self encoderFor:oldEncoding. + ^ InverseEncoder new decoder:decoder. + ]. + + "/ look for a specialized encoder... + self allSubclassesDo:[:cls | + (cls nameOfEncoding = oldEncoding + or:[ cls alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ + |nameOfDecodedCode encoderForDecodedCode| + + nameOfDecodedCode := cls nameOfDecodedCode. + encoderForDecodedCode := self encoderFor:nameOfDecodedCode. + (nameOfDecodedCode = newEncoding + or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ + ^ InverseEncoder new decoder:cls new. + ] + ]. + (cls nameOfEncoding = newEncoding + or:[ cls alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ + |nameOfDecodedCode encoderForDecodedCode| + + nameOfDecodedCode := cls nameOfDecodedCode. + encoderForDecodedCode := self encoderFor:nameOfDecodedCode. + (nameOfDecodedCode = oldEncoding + or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ + ^ cls new. + ] + ]. + ]. + + "/ do it as: oldEncoding -> unicode -> newEncoding + + "/ something -> unicode + decoder := self encoderFor:oldEncoding. + + "/ unicode -> something + encoder := self encoderFor:newEncoding. + ^ CompoundEncoder new encoder:encoder decoder:decoder +! + +unicodeEncoderFor:encodingNameSymbol + "given the name of an encoding, return an encoder-instance which can map these from/into unicode." + + self obsoleteMethodWarning. + ^ self encoderFor:encodingNameSymbol + + " + CharacterEncoder unicodeEncoderFor:#'latin1' + self unicodeEncoderFor:#'arabic' + self unicodeEncoderFor:#'ms-arabic' + self unicodeEncoderFor:#'iso8859-5' + self unicodeEncoderFor:#'koi8-r' + self unicodeEncoderFor:#'koi8-u' + self unicodeEncoderFor:#'jis0208' + self unicodeEncoderFor:#'jis7' + " +! + +unicodeEncoderFor:encodingNameSymbol ifAbsent:exceptionValue + "given the name of an encoding, return an encoder-instance which can map these from/into unicode." + + self obsoleteMethodWarning. + ^ self encoderFor:encodingNameSymbol ifAbsent:exceptionValue + + " + CharacterEncoder unicodeEncoderFor:#'latin1' + self unicodeEncoderFor:#'arabic' + self unicodeEncoderFor:#'ms-arabic' + self unicodeEncoderFor:#'iso8859-5' + self unicodeEncoderFor:#'koi8-r' + self unicodeEncoderFor:#'koi8-u' + self unicodeEncoderFor:#'jis0208' + self unicodeEncoderFor:#'jis7' + " +! ! + !CharacterEncoder class methodsFor:'Compatibility-ST80'! encoderNamed: encoderName @@ -725,63 +895,6 @@ encodeString:aString into:newEncodingArg ^ self encodeString:aString from:'unicode' into:newEncodingArg -! - -encoderToEncodeFrom:oldEncodingArg into:newEncodingArg - |oldEncoding newEncoding encoder decoder| - - oldEncoding := oldEncodingArg ? #'unicode'. - newEncoding := newEncodingArg ? #'unicode'. - oldEncoding == newEncoding ifTrue:[^ NullEncoder new]. - (oldEncoding match:newEncoding) ifTrue:[^ NullEncoder new]. - "/ (newEncoding match:oldEncoding) ifTrue:[^ NullEncoder new]. - - ((oldEncoding == #unicode) or:[(oldEncoding == #'iso10646-1')]) ifTrue:[ - ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[^ NullEncoder new]. - - "/ unicode -> something - ^ self encoderFor:newEncoding. - ]. - ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[ - "/ something -> unicode - decoder := self encoderFor:oldEncoding. - ^ InverseEncoder new decoder:decoder. - ]. - - "/ look for a specialized encoder... - self allSubclassesDo:[:cls | - (cls nameOfEncoding = oldEncoding - or:[ cls alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ - |nameOfDecodedCode encoderForDecodedCode| - - nameOfDecodedCode := cls nameOfDecodedCode. - encoderForDecodedCode := self encoderFor:nameOfDecodedCode. - (nameOfDecodedCode = newEncoding - or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ - ^ InverseEncoder new decoder:cls new. - ] - ]. - (cls nameOfEncoding = newEncoding - or:[ cls alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ - |nameOfDecodedCode encoderForDecodedCode| - - nameOfDecodedCode := cls nameOfDecodedCode. - encoderForDecodedCode := self encoderFor:nameOfDecodedCode. - (nameOfDecodedCode = oldEncoding - or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ - ^ cls new. - ] - ]. - ]. - - "/ do it as: oldEncoding -> unicode -> newEncoding - - "/ something -> unicode - decoder := self encoderFor:oldEncoding. - - "/ unicode -> something - encoder := self encoderFor:newEncoding. - ^ CompoundEncoder new encoder:encoder decoder:decoder ! ! !CharacterEncoder class methodsFor:'private'! @@ -876,81 +989,6 @@ ^ #() ! -encoderFor:encodingNameSymbol - "given the name of an encoding, return an encoder-instance which can map these from/into unicode." - - ^ self - encoderFor:encodingNameSymbol - ifAbsent:[ - self error:'no encoder for ' , encodingNameSymbol mayProceed:true. - NullEncoder new - ] - - " - CharacterEncoder encoderFor:#'latin1' - self encoderFor:#'arabic' - self encoderFor:#'ms-arabic' - self encoderFor:#'iso8859-5' - self encoderFor:#'koi8-r' - self encoderFor:#'koi8-u' - self encoderFor:#'jis0208' - self encoderFor:#'jis7' - " -! - -encoderFor:encodingNameSymbol ifAbsent:exceptionValue - "given the name of an encoding, return an encoder-instance which can map these from/into unicode." - - |cls lcName name| - - lcName := encodingNameSymbol asLowercase asSymbolIfInterned. - name := lcName ? encodingNameSymbol. - - cls := EncodersByName at:name ifAbsent:nil. - cls notNil ifTrue:[^ cls new ]. - - self allSubclassesDo:[:cls | - cls nameOfDecodedCode == #unicode ifTrue:[ - cls nameOfEncoding = name ifTrue:[ - EncodersByName at:name put:cls. - ^ cls new. - ] - ]. - ]. - self allSubclassesDo:[:cls | - cls nameOfDecodedCode == #unicode ifTrue:[ - (cls alternativeNamesOfEncoding includes:name) ifTrue:[ - EncodersByName at:name put:cls. - ^ cls new. - ]. - ]. - ]. - - self allSubclassesDo:[:cls | - ((cls nameOfEncoding = name) - or:[(cls alternativeNamesOfEncoding includes:name)]) ifTrue:[ - "/ ok, found some other encoder - need a compound encoder then. - "/ the one found encodes into what we need, but needs something else as input. - - ^ TwoStepEncoder new - encoder1:(self encoderFor:(cls nameOfDecodedCode)) - encoder2:(cls new). - ]. - ]. - ^ exceptionValue value - - " - CharacterEncoder encoderFor:#'latin1' - self encoderFor:#'arabic' - self encoderFor:#'ms-arabic' - self encoderFor:#'iso8859-5' - self encoderFor:#'koi8-r' - self encoderFor:#'koi8-u' - self encoderFor:#'jis0208' - self encoderFor:#'jis7' - " -! - isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg "return true, if superSetEncoding encoding includes all characters of subSetEncoding" @@ -1037,42 +1075,6 @@ ) ! -unicodeEncoderFor:encodingNameSymbol - "given the name of an encoding, return an encoder-instance which can map these from/into unicode." - - self obsoleteMethodWarning. - ^ self encoderFor:encodingNameSymbol - - " - CharacterEncoder unicodeEncoderFor:#'latin1' - self unicodeEncoderFor:#'arabic' - self unicodeEncoderFor:#'ms-arabic' - self unicodeEncoderFor:#'iso8859-5' - self unicodeEncoderFor:#'koi8-r' - self unicodeEncoderFor:#'koi8-u' - self unicodeEncoderFor:#'jis0208' - self unicodeEncoderFor:#'jis7' - " -! - -unicodeEncoderFor:encodingNameSymbol ifAbsent:exceptionValue - "given the name of an encoding, return an encoder-instance which can map these from/into unicode." - - self obsoleteMethodWarning. - ^ self encoderFor:encodingNameSymbol ifAbsent:exceptionValue - - " - CharacterEncoder unicodeEncoderFor:#'latin1' - self unicodeEncoderFor:#'arabic' - self unicodeEncoderFor:#'ms-arabic' - self unicodeEncoderFor:#'iso8859-5' - self unicodeEncoderFor:#'koi8-r' - self unicodeEncoderFor:#'koi8-u' - self unicodeEncoderFor:#'jis0208' - self unicodeEncoderFor:#'jis7' - " -! - userFriendlyNameOfEncoding ^ self nameOfEncoding ! ! @@ -52936,7 +52938,7 @@ !CharacterEncoder class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.28 2004-02-18 23:51:18 cg Exp $' + ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.29 2004-02-19 17:17:00 cg Exp $' ! ! CharacterEncoder initialize!