CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st
changeset 17568 e90410336cc2
parent 17567 2d57395ef7e0
child 21478 2e63fbcbfa85
--- a/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st	Fri Feb 27 19:26:01 2015 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st	Fri Feb 27 19:53:22 2015 +0100
@@ -51,6 +51,9 @@
     This is a q&d hack, to at least support the first page (latin1) characters.
     Will be enhanced for the 2nd and 3rd unicode page, when I find time.
 
+    [caveat:]
+        only a small subset of multi-composes are supported yet (for example: trema plus acute)
+
     [author:]
         Claus Gittinger
 
@@ -77,14 +80,14 @@
         "/ attention: the following strings contain non-latin characters
         "/ if you don't see them, change your font setting for a better font
 
-        (16r0300 "gravis"       'AÀaàEÈeèIÌiìoòOÒUÙuùNǸnǹÜǛüǜWẀwẁYỲyỳ')
-        (16r0301 "akut"         'AÁaáEÉeéIÍiíOÓoóUÚuúyýYÝCĆcćNŃnńRŔrŕSŚsśZŹzźGǴgǵÆǼæǽØǾøǿÜǗüǘMḾmḿKḰkḱPṔpṕWẂwẃ')
+        (16r0300 "gravis"       'AÀaàEÈeèIÌiìoòOÒUÙuùNǸnǹWẀwẁYỲyỳÜǛüǜ')  
+        (16r0301 "akut"         'AÁaáEÉeéIÍiíOÓoóUÚuúyýYÝCĆcćNŃnńRŔrŕSŚsśZŹzźGǴgǵÆǼæǽØǾøǿMḾmḿKḰkḱPṔpṕWẂwẃÜǗüǘ') 
         (16r0302 "circonflex"   'AÂaâEÊeêIÎiîOÔoôUÛuûCĈcĉGĜgĝHĤhĥJĴjĵSŜsŝWŴwŵYŶyŷZẐzẑ')
         (16r0303 "tilde"        'AÃaãNÑnñOÕoõUŨuũYỸyỹEẼeẽVṼvṽ')
-        (16r0304 "macron"       'AĀaāEĒeēIĪiīOŌoōUŪuūÜǕüǖGḠgḡ' )
+        (16r0304 "macron"       'AĀaāEĒeēIĪiīOŌoōUŪuūGḠgḡÜǕüǖ' ) 
         (16r0306 "breve"        'AĂaăEĔeĕGĞgğIĬiĭOŎoŏUŬuŭ')
         (16r0307 "dot above"    'AȦaȧOȮoȯCĊcċEĖeėGĠgġZŻzżBḂbḃDḊdḋFḞfḟHḢhḣMṀmṁNṄnṅPṖpṗRṘrṙSṠsṡTṪtṫWẆwẇXẊxẋYẎyẏ' )
-        (16r0308 "umlaut/trema" 'AÄaäEËeëOÖoöUÜuüIÏiïyÿYŸHḦhḧXẌxẍtẗ')
+        (16r0308 "umlaut/trema" 'AÄaäEËeëOÖoöUÜuüIÏiïyÿYŸHḦhḧXẌxẍtẗÙǛùǜŪǕūǖÚǗúǘǓǙǔǚ')
         (16r030A "ring"         'AÅaåUŮuůwẘyẙ')
         (16r030B "dbl akut"     'OŐoőUŰuű')
         (16r030C "hatcheck"     'CČcčDĎEĚeěNŇnňRŘrřSŠsšZŽzžAǍaǎIǏiǐOǑoǒUǓuǔGǦgǧKǨkǩÜǙüǚ')
@@ -119,14 +122,15 @@
     |cp map i|
 
     cp := diacriticalChar codePoint.
-    map := ComposeMap at:cp ifAbsent:nil.
-
-    map notNil ifTrue:[
-        "/ compose
-        i := map indexOf: baseChar.
-        i ~~ 0 ifTrue:[
-            outStream nextPut: (map at:i+1).
-            ^ self.
+    (cp between:16r300 and:16r328) ifTrue:[
+        map := ComposeMap at:cp ifAbsent:nil.
+        map notNil ifTrue:[
+            "/ compose
+            i := map indexOf: baseChar.
+            i ~~ 0 ifTrue:[
+                outStream nextPut: (map at:i+1).
+                ^ self.
+            ].
         ].
     ].
 
@@ -147,7 +151,7 @@
     |s buff previous|
 
     s := super decodeString:aStringOrByteCollection.
-    (s contains:[:char | char codePoint between:16r0300 and:16r0327]) ifFalse:[^ s].
+    (s contains:[:char | char codePoint between:16r0300 and:16r0328]) ifFalse:[^ s].
 
     ComposeMap isNil ifTrue:[
         self class initializeDecomposeMap
@@ -156,8 +160,20 @@
     buff := CharacterWriteStream on:''.
     previous := nil.
     s do:[:each |
-        (each codePoint between:16r0300 and:16r0327) ifTrue:[
-            self compositionOf:previous with:each to:buff.
+        (each codePoint between:16r0300 and:16r0328) ifTrue:[
+            previous isNil ifTrue:[
+                buff isEmpty ifTrue:[
+                    "/ wrong - combiner not allowed here.
+                    buff nextPut:each.
+                ] ifFalse:[
+                    "/ ouch - a multi-compose
+                    previous := buff last.
+                    buff skip:-1.
+                    self compositionOf:previous with:each to:buff.
+                ].
+            ] ifFalse:[
+                self compositionOf:previous with:each to:buff.
+            ].
             previous := nil.
         ] ifFalse:[
             previous notNil ifTrue:[
@@ -312,10 +328,10 @@
 !ISO10646_to_UTF8_MAC class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.7 2015-02-27 18:26:01 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
 !
 
 version_CVS
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.7 2015-02-27 18:26:01 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8_MAC.st,v 1.8 2015-02-27 18:53:22 cg Exp $'
 ! !