ByteArray.st
branchjv
changeset 18608 7d521f25267c
parent 18120 e3a375d5f6a8
parent 18596 d6abf6a67de4
child 18617 fbfd2d411738
--- a/ByteArray.st	Sun Jul 12 06:35:39 2015 +0200
+++ b/ByteArray.st	Mon Jul 20 17:01:03 2015 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1989 by Claus Gittinger
 	      All Rights Reserved
@@ -164,6 +166,7 @@
 ! !
 
 
+
 !ByteArray class methodsFor:'queries'!
 
 elementByteSize
@@ -1101,7 +1104,7 @@
      mod       "{ Class:SmallInteger}"
      cpl|
 
-    outStream := WriteStream on:String new.
+    outStream := WriteStream on:(String new:((self size + 2) * 4 // 3)).
     index := 1.
     stop := self size.
 
@@ -1157,16 +1160,17 @@
 asString
     "speed up string conversions"
 
-    |size|
-
-    self class == ByteArray ifTrue:[
-	size := self size.
-	^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
+    |cls size|
+
+    cls := self class.
+    (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+        size := self size.
+        ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
     ].
     ^ super asString
 
     "
-      #[60 61 62 63] asString
+      #[16r41 16r42 16r43] asString
     "
 !
 
@@ -1199,6 +1203,51 @@
     "
      #[1 2 3] literalArrayEncoding
     "
+!
+
+utf8Decoded
+    "Interpreting myself as an UTF-8 representation, decode and return the decoded string."
+
+    |cls in out|
+
+    cls := self class.
+    (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+        self containsNon7BitAscii ifFalse:[
+            "speed up common case"
+            ^ self asSingleByteString.
+        ].
+        out := CharacterWriteStream on:(String uninitializedNew:self size).
+        in := self readStream.
+        [in atEnd] whileFalse:[
+            out nextPut:(Character utf8DecodeFrom:in).
+        ].
+        ^ out contents
+    ].
+    ^ super utf8Decoded
+
+    "
+     #[16r41 16r42] utf8Decoded
+     #[16rC8 16rA0] utf8Decoded
+     (Character value:16r220) utf8Encoded
+     (Character value:16r220) utf8Encoded utf8Decoded
+
+     (Character value:16r800) utf8Encoded
+     (Character value:16r220) utf8Encoded utf8Decoded
+    "
+
+    "test:
+
+      |utf8Encoding original readBack|
+
+      1 to:16rFFFF do:[:ascii |
+        original := (Character value:ascii) asString.
+        utf8Encoding := original utf8Encoded.
+        readBack := utf8Encoding utf8Decoded.
+        readBack = original ifFalse:[
+            self halt
+        ]
+      ]
+    "
 ! !
 
 !ByteArray methodsFor:'copying'!
@@ -2607,7 +2656,10 @@
 
     "/ what a kludge - Dolphin and Squeak mean: printOn: a stream;
     "/ ST/X (and some old ST80's) mean: draw-yourself on a GC.
-    (self class == ByteArray and:[aGCOrStream isStream]) ifTrue:[
+    |cls|
+
+    cls := self class.
+    ((cls == ByteArray or:[cls == ImmutableByteArray]) and:[aGCOrStream isStream]) ifTrue:[
         self storeOn:aGCOrStream.
         ^ self
     ].
@@ -2804,7 +2856,7 @@
      This my look as a too specific operation to be put here,
      put it is very helpful for UTF8 string reading (Java class reader),
      to quickly determine, if UTF8 decoding is needed or not.
-     As most strings in a class fiel are in fact only containing 7bit ascii,
+     As most strings in a class file are in fact only containing 7bit ascii,
      this should speedup class file reading considerably"
 
 %{  /* NOCONTEXT */
@@ -3010,6 +3062,7 @@
     "
 ! !
 
+
 !ByteArray methodsFor:'searching'!
 
 indexOf:aByte startingAt:start
@@ -3103,10 +3156,10 @@
 !ByteArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/ByteArray.st,v 1.225 2015-02-20 23:55:55 cg Exp $'
+    ^ '$Header$'
 !
 
 version_CVS
-    ^ '$Header: /cvs/stx/stx/libbasic/ByteArray.st,v 1.225 2015-02-20 23:55:55 cg Exp $'
+    ^ '$Header$'
 ! !