--- a/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st Fri Feb 27 19:26:01 2015 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st Fri Feb 27 19:53:22 2015 +0100
@@ -51,6 +51,9 @@
This is a q&d hack, to at least support the first page (latin1) characters.
Will be enhanced for the 2nd and 3rd unicode page, when I find time.
+ [caveat:]
+ only a small subset of multi-composes are supported yet (for example: trema plus acute)
+
[author:]
Claus Gittinger
@@ -77,14 +80,14 @@
"/ attention: the following strings contain non-latin characters
"/ if you don't see them, change your font setting for a better font
- (16r0300 "gravis" 'AÀaàEÈeèIÌiìoòOÒUÙuùNǸnǹÜǛüǜWẀwẁYỲyỳ')
- (16r0301 "akut" 'AÁaáEÉeéIÍiíOÓoóUÚuúyýYÝCĆcćNŃnńRŔrŕSŚsśZŹzźGǴgǵÆǼæǽØǾøǿÜǗüǘMḾmḿKḰkḱPṔpṕWẂwẃ')
+ (16r0300 "gravis" 'AÀaàEÈeèIÌiìoòOÒUÙuùNǸnǹWẀwẁYỲyỳÜǛüǜ')
+ (16r0301 "akut" 'AÁaáEÉeéIÍiíOÓoóUÚuúyýYÝCĆcćNŃnńRŔrŕSŚsśZŹzźGǴgǵÆǼæǽØǾøǿMḾmḿKḰkḱPṔpṕWẂwẃÜǗüǘ')
(16r0302 "circonflex" 'AÂaâEÊeêIÎiîOÔoôUÛuûCĈcĉGĜgĝHĤhĥJĴjĵSŜsŝWŴwŵYŶyŷZẐzẑ')
(16r0303 "tilde" 'AÃaãNÑnñOÕoõUŨuũYỸyỹEẼeẽVṼvṽ')
- (16r0304 "macron" 'AĀaāEĒeēIĪiīOŌoōUŪuūÜǕüǖGḠgḡ' )
+ (16r0304 "macron" 'AĀaāEĒeēIĪiīOŌoōUŪuūGḠgḡÜǕüǖ' )
(16r0306 "breve" 'AĂaăEĔeĕGĞgğIĬiĭOŎoŏUŬuŭ')
(16r0307 "dot above" 'AȦaȧOȮoȯCĊcċEĖeėGĠgġZŻzżBḂbḃDḊdḋFḞfḟHḢhḣMṀmṁNṄnṅPṖpṗRṘrṙSṠsṡTṪtṫWẆwẇXẊxẋYẎyẏ' )
- (16r0308 "umlaut/trema" 'AÄaäEËeëOÖoöUÜuüIÏiïyÿYŸHḦhḧXẌxẍtẗ')
+ (16r0308 "umlaut/trema" 'AÄaäEËeëOÖoöUÜuüIÏiïyÿYŸHḦhḧXẌxẍtẗÙǛùǜŪǕūǖÚǗúǘǓǙǔǚ')
(16r030A "ring" 'AÅaåUŮuůwẘyẙ')
(16r030B "dbl akut" 'OŐoőUŰuű')
(16r030C "hatcheck" 'CČcčDĎEĚeěNŇnňRŘrřSŠsšZŽzžAǍaǎIǏiǐOǑoǒUǓuǔGǦgǧKǨkǩÜǙüǚ')
@@ -119,14 +122,15 @@
|cp map i|
cp := diacriticalChar codePoint.
- map := ComposeMap at:cp ifAbsent:nil.
-
- map notNil ifTrue:[
- "/ compose
- i := map indexOf: baseChar.
- i ~~ 0 ifTrue:[
- outStream nextPut: (map at:i+1).
- ^ self.
+ (cp between:16r300 and:16r328) ifTrue:[
+ map := ComposeMap at:cp ifAbsent:nil.
+ map notNil ifTrue:[
+ "/ compose
+ i := map indexOf: baseChar.
+ i ~~ 0 ifTrue:[
+ outStream nextPut: (map at:i+1).
+ ^ self.
+ ].
].
].
@@ -147,7 +151,7 @@
|s buff previous|
s := super decodeString:aStringOrByteCollection.
- (s contains:[:char | char codePoint between:16r0300 and:16r0327]) ifFalse:[^ s].
+ (s contains:[:char | char codePoint between:16r0300 and:16r0328]) ifFalse:[^ s].
ComposeMap isNil ifTrue:[
self class initializeDecomposeMap
@@ -156,8 +160,20 @@
buff := CharacterWriteStream on:''.
previous := nil.
s do:[:each |
- (each codePoint between:16r0300 and:16r0327) ifTrue:[
- self compositionOf:previous with:each to:buff.
+ (each codePoint between:16r0300 and:16r0328) ifTrue:[
+ previous isNil ifTrue:[
+ buff isEmpty ifTrue:[
+ "/ wrong - combiner not allowed here.
+ buff nextPut:each.
+ ] ifFalse:[
+ "/ ouch - a multi-compose
+ previous := buff last.
+ buff skip:-1.
+ self compositionOf:previous with:each to:buff.
+ ].
+ ] ifFalse:[
+ self compositionOf:previous with:each to:buff.
+ ].
previous := nil.
] ifFalse:[
previous notNil ifTrue:[
@@ -312,10 +328,10 @@
!ISO10646_to_UTF8_MAC class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.7 2015-02-27 18:26:01 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
!
version_CVS
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.7 2015-02-27 18:26:01 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
! !