CharacterEncoderImplementations__ISO10646_to_UTF8.st
branchjv
changeset 21387 e3865533e6a6
parent 19863 513bd7237fe7
parent 21301 f33ff66e5fff
child 23107 40173e082cbc
--- a/CharacterEncoderImplementations__ISO10646_to_UTF8.st	Sun Jan 29 11:04:01 2017 +0000
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8.st	Wed Feb 01 11:28:48 2017 +0000
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
 "
  COPYRIGHT (c) 2004 by eXept Software AG
 	      All Rights Reserved
@@ -45,6 +43,21 @@
 "
 !
 
+documentation
+"
+    I can encode characters into/from UTF8
+    
+    Notice the naming (many are confused):
+        Unicode is the set of number-to-glyph assignments
+    whereas:
+        UTF8 is a concrete way of xmitting Unicode codePoints (numbers).
+    UTF16 is another concrete encoding, for example.    
+        
+    ST/X NEVER uses UTF8 internally - all characters are full 24bit characters.
+    Only when exchanging data, are these converted into UTF8 (or other) byte sequences.
+"
+!
+
 examples
 "
   Encoding (unicode to utf8)
@@ -54,7 +67,7 @@
   Decoding (utf8 to unicode):
      |t|
 
-     t := ISO10646_to_UTF8 encodeString:'Helloœ'.
+     t := ISO10646_to_UTF8 encodeString:'Hello'.
      ISO10646_to_UTF8 decodeString:t.
 "
 ! !
@@ -198,6 +211,10 @@
     ^ '$Header$'
 !
 
+version_CVS
+    ^ '$Header$'
+!
+
 version_HG
 
     ^ '$Changeset: <not expanded> $'