--- a/CharacterEncoder.st Tue Feb 28 16:16:06 2017 +0100
+++ b/CharacterEncoder.st Tue Feb 28 16:18:51 2017 +0100
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
"
COPYRIGHT (c) 2004 by eXept Software AG
All Rights Reserved
@@ -17,10 +15,10 @@
Object subclass:#CharacterEncoder
instanceVariableNames:''
- classVariableNames:'EncoderClassesByName EncodersByName CachedEncoders AccessLock
- NullEncoderInstance Jis7KanjiEscapeSequence
- Jis7RomanEscapeSequence JisISO2022EscapeSequence
- Jis7KanjiOldEscapeSequence EncodingDetectors'
+ classVariableNames:'AccessLock CachedEncoders EncoderClassesByName EncodersByName
+ EncodingDetectors Jis7KanjiEscapeSequence
+ Jis7KanjiOldEscapeSequence Jis7RomanEscapeSequence
+ JisISO2022EscapeSequence NullEncoderInstance'
poolDictionaries:''
category:'Collections-Text-Encodings'
!
@@ -265,24 +263,20 @@
encoderFor:encodingNameSymbolArg ifAbsent:exceptionValue
"given the name of an encoding, return an encoder-instance which can map these from/into unicode."
- |encodingNameSymbol enc clsName cls lcName name unicodeEncoders unicodeEncoderClasses|
+ |encodingNameSymbol enc clsName cls unicodeEncoders unicodeEncoderClasses|
- encodingNameSymbol := encodingNameSymbolArg.
- encodingNameSymbol isNil ifTrue:[ ^ NullEncoderInstance].
+ encodingNameSymbolArg isNil ifTrue:[ ^ NullEncoderInstance].
- encodingNameSymbol = 'iso10646-1' ifTrue:[ encodingNameSymbol := #unicode].
+ encodingNameSymbol := encodingNameSymbolArg asLowercase asSymbolIfInternedOrSelf.
+ encodingNameSymbol == #'iso10646-1' ifTrue:[encodingNameSymbol := #unicode].
- lcName := encodingNameSymbol asLowercase.
- name := lcName asSymbolIfInterned.
- name isNil ifTrue:[name := lcName].
-
- name includesMatchCharacters ifTrue:[
+ encodingNameSymbol includesMatchCharacters ifTrue:[
AccessLock critical:[
unicodeEncoders := EncodersByName at:#unicode ifAbsent:nil.
].
unicodeEncoders notNil ifTrue:[
unicodeEncoders keysAndValuesDo:[:eachEncodingAlias :eachEncoderInstance |
- (name matches:eachEncodingAlias) ifTrue:[
+ (encodingNameSymbol matches:eachEncodingAlias) ifTrue:[
^ eachEncoderInstance.
].
].
@@ -293,7 +287,7 @@
].
unicodeEncoderClasses notNil ifTrue:[
unicodeEncoderClasses keysAndValuesDo:[:eachEncodingAlias :eachEncoderClassOrName |
- (name matches:eachEncodingAlias) ifTrue:[
+ (encodingNameSymbol matches:eachEncodingAlias) ifTrue:[
eachEncoderClassOrName isBehavior ifTrue:[
cls := eachEncoderClassOrName
] ifFalse:[
@@ -309,19 +303,13 @@
].
AccessLock critical:[
- unicodeEncoders := EncodersByName at:#unicode ifAbsent:nil.
- unicodeEncoders isNil ifTrue:[
- EncodersByName at:#unicode put:(unicodeEncoders := Dictionary new).
- ].
- enc := unicodeEncoders at:name ifAbsent:nil.
+ unicodeEncoders := EncodersByName at:#unicode ifAbsentPut:[Dictionary new].
+ enc := unicodeEncoders at:encodingNameSymbol ifAbsent:nil.
].
enc isNil ifTrue:[
AccessLock critical:[
- unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsent:nil.
- unicodeEncoderClasses isNil ifTrue:[
- self encoderClassesByName at:#unicode put:(unicodeEncoderClasses := Dictionary new).
- ].
- clsName := unicodeEncoderClasses at:name ifAbsent:nil.
+ unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsentPut:[Dictionary new].
+ clsName := unicodeEncoderClasses at:encodingNameSymbol ifAbsent:nil.
].
clsName notNil ifTrue:[
clsName isBehavior ifTrue:[
@@ -332,7 +320,7 @@
cls notNil ifTrue:[
enc := cls new.
AccessLock critical:[
- unicodeEncoders at:name put:enc.
+ unicodeEncoders at:encodingNameSymbol put:enc.
]
].
].
@@ -342,8 +330,8 @@
^ enc
].
- "/ no direct encoder from unicode->name
- "/ search for unicode->any and: any->name
+ "/ no direct encoder from unicode->encodingNameSymbol
+ "/ search for unicode->any and: any->encodingNameSymbol
AccessLock critical:[
unicodeEncoderClasses := self encoderClassesByName at:#unicode ifAbsent:nil.
].
@@ -354,7 +342,7 @@
dict2 := self encoderClassesByName at:eachEncodingAlias ifAbsent:nil.
].
dict2 notNil ifTrue:[
- clsName := dict2 at:name ifAbsent:nil.
+ clsName := dict2 at:encodingNameSymbol ifAbsent:nil.
clsName notNil ifTrue:[
clsName isBehavior ifTrue:[
cls := clsName
@@ -367,7 +355,7 @@
(enc1 notNil and:[enc2 notNil]) ifTrue:[
enc := TwoStepEncoder new encoder1:enc1 encoder2:enc2.
AccessLock critical:[
- unicodeEncoders at:name put:enc.
+ unicodeEncoders at:encodingNameSymbol put:enc.
].
^ enc.
]
@@ -424,6 +412,7 @@
"
"Modified: / 12-07-2012 / 19:45:58 / cg"
+ "Modified: / 27-02-2017 / 16:47:40 / stefan"
!
encoderForUTF8
@@ -432,17 +421,10 @@
^ self encoderFor:#utf8
"
- CharacterEncoder encoderFor:#'latin1'
- self encoderFor:#'arabic'
- self encoderFor:#'ms-arabic'
- self encoderFor:#'iso8859-5'
- self encoderFor:#'koi8-r'
- self encoderFor:#'koi8-u'
- self encoderFor:#'jis0208'
- self encoderFor:#'jis7'
- self encoderFor:#'utf8'
self encoderForUTF8'
"
+
+ "Modified (comment): / 27-02-2017 / 16:06:20 / stefan"
!
encoderToEncodeFrom:oldEncodingArg into:newEncodingArg
@@ -461,20 +443,14 @@
^ self encoderFor:newEncoding.
].
- oldEncoding isSymbol ifFalse:[oldEncoding := oldEncoding asSymbol].
- newEncoding isSymbol ifFalse:[newEncoding := newEncoding asSymbol].
+ oldEncoding := oldEncoding asSymbol.
+ newEncoding := newEncoding asSymbol.
AccessLock critical:[
- encoders := EncodersByName at:oldEncoding ifAbsent:nil.
- encoders isNil ifTrue:[
- EncodersByName at:oldEncoding put:(encoders := Dictionary new).
- ].
+ encoders := EncodersByName at:oldEncoding ifAbsentPut:[Dictionary new].
encoder := encoders at:newEncodingArg ifAbsent:nil.
encoder isNil ifTrue:[
- encoderClasses := self encoderClassesByName at:oldEncoding ifAbsent:nil.
- encoderClasses isNil ifTrue:[
- self encoderClassesByName at:oldEncoding put:(encoderClasses := Dictionary new).
- ].
+ encoderClasses := self encoderClassesByName at:oldEncoding ifAbsentPut:[Dictionary new].
clsName := encoderClasses at:newEncoding ifAbsent:nil.
clsName notNil ifTrue:[
clsName isBehavior ifTrue:[
@@ -511,7 +487,8 @@
].
^ encoder
- " CharacterEncoder initialize
+ "
+ CharacterEncoder initialize
CharacterEncoder encoderToEncodeFrom:#'latin1' into:#'jis7'
CharacterEncoder encoderToEncodeFrom:#'koi8-r' into:#'mac-cyrillic'
CharacterEncoder encoderToEncodeFrom:#'ms-arabic' into:#'mac-arabic'
@@ -520,6 +497,7 @@
"
"Modified: / 12-07-2012 / 19:45:15 / cg"
+ "Modified: / 27-02-2017 / 16:49:14 / stefan"
! !
!CharacterEncoder class methodsFor:'Compatibility-ST80'!
@@ -567,17 +545,13 @@
self initializeEncoderClassesByName.
- OperatingSystem isUNIXlike ifTrue:[
- "/Initialize OS system encoder
- OperatingSystem getCodesetEncoder.
- ].
-
"
self initialize
"
"Modified: / 01-04-2011 / 14:30:06 / cg"
"Modified (format): / 23-01-2013 / 09:56:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+ "Modified: / 27-02-2017 / 15:43:56 / stefan"
!
initializeEncoderClassesByName
@@ -590,7 +564,7 @@
EncoderClassesByName := Dictionary new.
- EncoderClassesByName at:#'unicode' put:(ud := Dictionary new).
+ EncoderClassesByName at:#'unicode' put:(ud := Dictionary new:237).
ud at:#'fontspecific' put:NullEncoder.
ud at:#'adobe-fontspecific' put:NullEncoder.
ud at:#'ms-oem' put:NullEncoder.
@@ -730,28 +704,23 @@
(ISO10646_to_SGML unicode ( 'sgml' ))
(ISO10646_to_JavaText unicode ( 'java' 'javaText' ))
) triplesDo:[:className :decodesTo :encodesTo |
- |dict|
+ |decodesToDict|
"/ notice that the encoders are not yet installed as autoloaded.
"/ Therefore, we remember their names here.
- dict := EncoderClassesByName at:decodesTo ifAbsent:nil.
- dict isNil ifTrue:[
- EncoderClassesByName at:decodesTo put:(dict := Dictionary new).
- ].
+ decodesToDict := EncoderClassesByName at:decodesTo ifAbsentPut:[Dictionary new].
encodesTo do:[:eachEncodingAlias |
- (dict includesKey:eachEncodingAlias) ifTrue:[
- self halt:'conflicting alias'
- ].
- dict at:eachEncodingAlias put:className.
+ decodesToDict at:eachEncodingAlias put:className ifPresent:[self halt:'conflicting alias'].
].
].
"
- self initialize
+ self initializeEncoderClassesByName
"
"Modified: / 01-04-2011 / 14:30:06 / cg"
"Modified (format): / 23-01-2013 / 09:56:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+ "Modified: / 27-02-2017 / 16:17:43 / stefan"
! !
!CharacterEncoder class methodsFor:'constants'!