class: ByteArray
authorStefan Vogel <sv@exept.de>
Sat, 18 Jul 2015 01:24:24 +0200
changeset 18596 d6abf6a67de4
parent 18595 aa02a050b351
child 18597 5349c840a5a4
class: ByteArray added: #utf8Decoded comment/format in: #containsNon7BitAscii changed: #asString #displayOn:
ByteArray.st
--- a/ByteArray.st	Sat Jul 18 01:24:04 2015 +0200
+++ b/ByteArray.st	Sat Jul 18 01:24:24 2015 +0200
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1989 by Claus Gittinger
 	      All Rights Reserved
@@ -165,8 +167,6 @@
 
 
 
-
-
 !ByteArray class methodsFor:'queries'!
 
 elementByteSize
@@ -188,7 +188,6 @@
 ! !
 
 
-
 !ByteArray methodsFor:'Compatibility-Squeak'!
 
 bitXor:aByteArray
@@ -1161,16 +1160,17 @@
 asString
     "speed up string conversions"
 
-    |size|
-
-    self class == ByteArray ifTrue:[
-	size := self size.
-	^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
+    |cls size|
+
+    cls := self class.
+    (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+        size := self size.
+        ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
     ].
     ^ super asString
 
     "
-      #[60 61 62 63] asString
+      #[16r41 16r42 16r43] asString
     "
 !
 
@@ -1203,6 +1203,51 @@
     "
      #[1 2 3] literalArrayEncoding
     "
+!
+
+utf8Decoded
+    "Interpreting myself as an UTF-8 representation, decode and return the decoded string."
+
+    |cls in out|
+
+    cls := self class.
+    (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+        self containsNon7BitAscii ifFalse:[
+            "speed up common case"
+            ^ self asSingleByteString.
+        ].
+        out := CharacterWriteStream on:(String uninitializedNew:self size).
+        in := self readStream.
+        [in atEnd] whileFalse:[
+            out nextPut:(Character utf8DecodeFrom:in).
+        ].
+        ^ out contents
+    ].
+    ^ super utf8Decoded
+
+    "
+     #[16r41 16r42] utf8Decoded
+     #[16rC8 16rA0] utf8Decoded
+     (Character value:16r220) utf8Encoded
+     (Character value:16r220) utf8Encoded utf8Decoded
+
+     (Character value:16r800) utf8Encoded
+     (Character value:16r220) utf8Encoded utf8Decoded
+    "
+
+    "test:
+
+      |utf8Encoding original readBack|
+
+      1 to:16rFFFF do:[:ascii |
+        original := (Character value:ascii) asString.
+        utf8Encoding := original utf8Encoded.
+        readBack := utf8Encoding utf8Decoded.
+        readBack = original ifFalse:[
+            self halt
+        ]
+      ]
+    "
 ! !
 
 !ByteArray methodsFor:'copying'!
@@ -2611,7 +2656,10 @@
 
     "/ what a kludge - Dolphin and Squeak mean: printOn: a stream;
     "/ ST/X (and some old ST80's) mean: draw-yourself on a GC.
-    (self class == ByteArray and:[aGCOrStream isStream]) ifTrue:[
+    |cls|
+
+    cls := self class.
+    ((cls == ByteArray or:[cls == ImmutableByteArray]) and:[aGCOrStream isStream]) ifTrue:[
         self storeOn:aGCOrStream.
         ^ self
     ].
@@ -2808,7 +2856,7 @@
      This my look as a too specific operation to be put here,
      put it is very helpful for UTF8 string reading (Java class reader),
      to quickly determine, if UTF8 decoding is needed or not.
-     As most strings in a class fiel are in fact only containing 7bit ascii,
+     As most strings in a class file are in fact only containing 7bit ascii,
      this should speedup class file reading considerably"
 
 %{  /* NOCONTEXT */
@@ -3105,7 +3153,6 @@
     "Modified: 22.4.1996 / 12:55:30 / cg"
 ! !
 
-
 !ByteArray class methodsFor:'documentation'!
 
 version