added utf8Decode with 8-bit chars only
authorClaus Gittinger <cg@exept.de>
Thu, 07 Nov 2002 19:59:42 +0100
changeset 6835 bdb2cb03200c
parent 6834 eba4b58b8692
child 6836 11fd536a8e02
added utf8Decode with 8-bit chars only
CharacterArray.st
--- a/CharacterArray.st	Thu Nov 07 18:55:43 2002 +0100
+++ b/CharacterArray.st	Thu Nov 07 19:59:42 2002 +0100
@@ -4087,36 +4087,6 @@
     ^ self
 ! !
 
-!CharacterArray methodsFor:'encoding'!
-
-encodeOn:anEncoder with:aParameter
-
-    anEncoder encodeString:self with:aParameter
-
-!
-
-rot13
-     "Usenet: from `rotate alphabet 13 places'] 
-      The simple Caesar-cypher encryption that replaces each English
-      letter with the one 13 places forward or back along the alphabet, 
-      so that 'The butler did it!!' becomes 'Gur ohgyre qvq vg!!'
-      Most Usenet news reading and posting programs include a rot13 feature. 
-      It is used to enclose the text in a sealed wrapper that the reader must choose
-      to open -- e.g., for posting things that might offend some readers, or spoilers. 
-      A major advantage of rot13 over rot(N) for other N is that it
-      is self-inverse, so the same code can be used for encoding and decoding."
-
-    ^ self species 
-        streamContents:[:aStream | 
-            self do:[:char | 
-                aStream nextPut:char rot13 ]]
-
-    "
-     'hello world' rot13    
-     'hello world' rot13 rot13   
-    "
-! !
-
 !CharacterArray methodsFor:'encoding/decoding'!
 
 decodeFrom:encodingSymbol
@@ -4369,6 +4339,12 @@
     "Modified: 16.7.1997 / 13:56:21 / cg"
 !
 
+encodeOn:anEncoder with:aParameter
+
+    anEncoder encodeString:self with:aParameter
+
+!
+
 replaceFrom:aString decode:encoding 
     "this is an experimental interface - unfinished"
 
@@ -4416,6 +4392,117 @@
 
     "Created: 22.2.1996 / 16:07:26 / cg"
     "Modified: 1.7.1997 / 17:18:06 / cg"
+!
+
+rot13
+     "Usenet: from `rotate alphabet 13 places'] 
+      The simple Caesar-cypher encryption that replaces each English
+      letter with the one 13 places forward or back along the alphabet, 
+      so that 'The butler did it!!' becomes 'Gur ohgyre qvq vg!!'
+      Most Usenet news reading and posting programs include a rot13 feature. 
+      It is used to enclose the text in a sealed wrapper that the reader must choose
+      to open -- e.g., for posting things that might offend some readers, or spoilers. 
+      A major advantage of rot13 over rot(N) for other N is that it
+      is self-inverse, so the same code can be used for encoding and decoding."
+
+    ^ self species 
+        streamContents:[:aStream | 
+            self do:[:char | 
+                aStream nextPut:char rot13 ]]
+
+    "
+     'hello world' rot13    
+     'hello world' rot13 rot13   
+    "
+!
+
+utf8Decoded
+    "Interpreting myself as an UTF-8 representation, decode and return
+     the decoded string."
+
+    |in out is16Bit c|
+
+    is16Bit := false.
+    out := (String new:self size) writeStream.
+    in := self readStream.
+    [in atEnd] whileFalse:[
+        c := Character utf8DecodeFrom:in.
+        is16Bit ifFalse:[
+            c asciiValue > 16rFF ifTrue:[
+"/ c := Character space.
+                out := WriteStream with:(UnicodeString fromString:out contents).
+                is16Bit := true.
+            ].
+        ].
+        out nextPut:c.
+    ].        
+    ^ out contents
+
+    "
+     #[16rC8 16rA0] asString utf8Decoded 
+     (Character value:16r220) utf8Encoded   
+     (Character value:16r220) utf8Encoded utf8Decoded  
+
+     (Character value:16r800) utf8Encoded      
+     (Character value:16r220) utf8Encoded utf8Decoded  
+    "
+
+    "test:
+
+      |utf8Encoding original readBack|
+
+      1 to:16rFFFF do:[:ascii |
+        original := (Character value:ascii) asString.
+        utf8Encoding := original utf8Encoded.
+        readBack := utf8Encoding utf8Decoded.
+        readBack = original ifFalse:[
+            self halt
+        ]
+      ]
+    "
+!
+
+utf8DecodedWithTwoByteCharactersReplacedBy:replacementCharacter
+    "Interpreting myself as an UTF-8 representation, decode and return
+     the decoded string. Suppress all 2-byte (above 16rFF) characters,
+     and replace them with replacementCharacter"
+
+    |in out c|
+
+    out := (String new:self size) writeStream.
+    in := self readStream.
+    [in atEnd] whileFalse:[
+        c := Character utf8DecodeFrom:in.
+        c asciiValue > 16rFF ifTrue:[
+            c := replacementCharacter
+        ].
+        out nextPut:c.
+    ].        
+    ^ out contents
+
+    "
+     (Character value:16r220) utf8Encoded 
+        utf8DecodedWithTwoByteCharactersReplacedBy:(Character space)  
+    "
+!
+
+utf8Encoded
+    "Return my UTF-8 representation as a new String"
+
+    |s|
+
+    s := (String new:self size) writeStream.
+    self utf8EncodedOn:s.
+    ^ s contents
+!
+
+utf8EncodedOn:aStream
+    "append my UTF-8 representation to the argument, aStream."
+
+
+    self do:[:c|
+        c utf8EncodedOn:aStream.
+    ].
 ! !
 
 !CharacterArray methodsFor:'padded copying'!
@@ -4885,71 +4972,6 @@
 
     "Modified: / 15.6.1998 / 17:21:17 / cg"
     "Created: / 15.6.1998 / 17:22:13 / cg"
-!
-
-utf8Decoded
-    "Interpreting myself as an UTF-8 representation, decode and return
-     the decoded string."
-
-    |in out is16Bit c|
-
-    is16Bit := false.
-    out := (String new:self size) writeStream.
-    in := self readStream.
-    [in atEnd] whileFalse:[
-        c := Character utf8DecodeFrom:in.
-        is16Bit ifFalse:[
-            c asciiValue > 16rFF ifTrue:[
-"/ c := Character space.
-                out := WriteStream with:(UnicodeString fromString:out contents).
-                is16Bit := true.
-            ].
-        ].
-        out nextPut:c.
-    ].        
-    ^ out contents
-
-    "
-     #[16rC8 16rA0] asString utf8Decoded 
-     (Character value:16r220) utf8Encoded   
-     (Character value:16r220) utf8Encoded utf8Decoded  
-
-     (Character value:16r800) utf8Encoded      
-     (Character value:16r220) utf8Encoded utf8Decoded  
-    "
-
-    "test:
-
-      |utf8Encoding original readBack|
-
-      1 to:16rFFFF do:[:ascii |
-        original := (Character value:ascii) asString.
-        utf8Encoding := original utf8Encoded.
-        readBack := utf8Encoding utf8Decoded.
-        readBack = original ifFalse:[
-            self halt
-        ]
-      ]
-    "
-!
-
-utf8Encoded
-    "Return my UTF-8 representation as a new String"
-
-    |s|
-
-    s := (String new:self size) writeStream.
-    self utf8EncodedOn:s.
-    ^ s contents
-!
-
-utf8EncodedOn:aStream
-    "append my UTF-8 representation to the argument, aStream."
-
-
-    self do:[:c|
-        c utf8EncodedOn:aStream.
-    ].
 ! !
 
 !CharacterArray methodsFor:'queries'!
@@ -6234,7 +6256,7 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.230 2002-11-07 17:55:43 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.231 2002-11-07 18:59:42 cg Exp $'
 ! !
 
 CharacterArray initialize!