CharacterEncoderImplementations__MS_Ansi.st
changeset 22585 cae54b54d329
parent 22583 879bd9713cb9
child 22589 a54279da036e
--- a/CharacterEncoderImplementations__MS_Ansi.st	Wed Mar 07 16:36:35 2018 +0100
+++ b/CharacterEncoderImplementations__MS_Ansi.st	Wed Mar 07 16:58:40 2018 +0100
@@ -40,15 +40,18 @@
 
 documentation
 "
-    Microsoft ANSI - which is wahat Microft thought what is ANSI coding in the 80s (whatever that should be).
+    Microsoft ANSI - which is what Microsoft thought what is ANSI coding in the 80s (whatever that should be).
     It is definitely not an ANSI standard!!
 
-    In fact it is CP1252 eincoding which is based on iso8859-1. 
+    In fact it is CP1252 encoding which is based on iso8859-1. 
     Codepoints 0x80–0x9F which are control characters
     in iso8859 are mapped to special windows characters.
 
     Apparently, meanwhile Microsoft supports codepoints above 0xff as unicode.
-    We map unicode codepoints which are defined in CP1552 to CP1552, and leave others unchanged.
+    We map unicode codepoints which are defined in CP1552 to CP1552, 
+    and leave others unchanged.
+    So this encoder will return characters above 0xFF if required.
+    (compare to MS_CP1252, which does not)
 
     [see with:]
         CharacterEncoderImplementations::MS_Ansi showCharacterSet
@@ -438,12 +441,13 @@
 encode:unicodeArg
     |unicode "{ Class: SmallInteger }" t|
 
-false ifTrue:[
-    "/ mh - it seems that microsoft has fixed ms-ansi to be unicode compatible
-    "/ with XP, Vista etc.
-    "/ as W95 is not supported anyhow, simply return identity here...
-    ^ unicodeArg.
-].
+    "/ the comment in mh... is not correct.
+    false ifTrue:[
+        "/ mh - it seems that microsoft has fixed ms-ansi to be unicode compatible
+        "/ with XP, Vista etc.
+        "/ as W95 is not supported anyhow, simply return identity here...
+        ^ unicodeArg.
+    ].
 
     "we map unicode chars to CP1252 where a mapping exists.
      If no mapping exists, we keep the unicode char"
@@ -566,14 +570,13 @@
 
     |newString myCode bits size "{ Class:SmallInteger }"|
 
-
-    "/ mh - it seems that microsoft has fixed ms-ansi to be unicode compatible
-    "/ with XP, Vista etc.
-    "/ as W95 is not supported anyhow, simply return identity here...
-
-false ifTrue:[
-    ^ aStringOrUnicodeString.
-].
+    "/ the comment in mh... is not correct.
+    false ifTrue:[
+        "/ mh - it seems that microsoft has fixed ms-ansi to be unicode compatible
+        "/ with XP, Vista etc.
+        "/ as W95 is not supported anyhow, simply return identity here...
+        ^ aStringOrUnicodeString.
+    ].
 
     "/ all between 0 and 7F ?
     (aStringOrUnicodeString containsNon7BitAscii) ifFalse:[
@@ -607,6 +610,19 @@
     "Modified: / 17-01-2018 / 14:15:39 / stefan"
 ! !
 
+!MS_Ansi methodsFor:'queries'!
+
+characterSize:charOrCodePoint
+    "return the number of bytes required to encode codePoint"
+
+    |code|
+
+    code := self encode:charOrCodePoint asInteger.
+    code <= 16rFF ifTrue:[^ 1].
+    code <= 16rFFFF ifTrue:[^ 2].
+    ^ 4
+! !
+
 !MS_Ansi class methodsFor:'documentation'!
 
 version