class: ByteArray
added: #utf8Decoded
comment/format in: #containsNon7BitAscii
changed:
#asString
#displayOn:
--- a/ByteArray.st Sat Jul 18 01:24:04 2015 +0200
+++ b/ByteArray.st Sat Jul 18 01:24:24 2015 +0200
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
"
COPYRIGHT (c) 1989 by Claus Gittinger
All Rights Reserved
@@ -165,8 +167,6 @@
-
-
!ByteArray class methodsFor:'queries'!
elementByteSize
@@ -188,7 +188,6 @@
! !
-
!ByteArray methodsFor:'Compatibility-Squeak'!
bitXor:aByteArray
@@ -1161,16 +1160,17 @@
asString
"speed up string conversions"
- |size|
-
- self class == ByteArray ifTrue:[
- size := self size.
- ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
+ |cls size|
+
+ cls := self class.
+ (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+ size := self size.
+ ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
].
^ super asString
"
- #[60 61 62 63] asString
+ #[16r41 16r42 16r43] asString
"
!
@@ -1203,6 +1203,51 @@
"
#[1 2 3] literalArrayEncoding
"
+!
+
+utf8Decoded
+ "Interpreting myself as an UTF-8 representation, decode and return the decoded string."
+
+ |cls in out|
+
+ cls := self class.
+ (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+ self containsNon7BitAscii ifFalse:[
+ "speed up common case"
+ ^ self asSingleByteString.
+ ].
+ out := CharacterWriteStream on:(String uninitializedNew:self size).
+ in := self readStream.
+ [in atEnd] whileFalse:[
+ out nextPut:(Character utf8DecodeFrom:in).
+ ].
+ ^ out contents
+ ].
+ ^ super utf8Decoded
+
+ "
+ #[16r41 16r42] utf8Decoded
+ #[16rC8 16rA0] utf8Decoded
+ (Character value:16r220) utf8Encoded
+ (Character value:16r220) utf8Encoded utf8Decoded
+
+ (Character value:16r800) utf8Encoded
+ (Character value:16r220) utf8Encoded utf8Decoded
+ "
+
+ "test:
+
+ |utf8Encoding original readBack|
+
+ 1 to:16rFFFF do:[:ascii |
+ original := (Character value:ascii) asString.
+ utf8Encoding := original utf8Encoded.
+ readBack := utf8Encoding utf8Decoded.
+ readBack = original ifFalse:[
+ self halt
+ ]
+ ]
+ "
! !
!ByteArray methodsFor:'copying'!
@@ -2611,7 +2656,10 @@
"/ what a kludge - Dolphin and Squeak mean: printOn: a stream;
"/ ST/X (and some old ST80's) mean: draw-yourself on a GC.
- (self class == ByteArray and:[aGCOrStream isStream]) ifTrue:[
+ |cls|
+
+ cls := self class.
+ ((cls == ByteArray or:[cls == ImmutableByteArray]) and:[aGCOrStream isStream]) ifTrue:[
self storeOn:aGCOrStream.
^ self
].
@@ -2808,7 +2856,7 @@
This my look as a too specific operation to be put here,
put it is very helpful for UTF8 string reading (Java class reader),
to quickly determine, if UTF8 decoding is needed or not.
- As most strings in a class fiel are in fact only containing 7bit ascii,
+ As most strings in a class file are in fact only containing 7bit ascii,
this should speedup class file reading considerably"
%{ /* NOCONTEXT */
@@ -3105,7 +3153,6 @@
"Modified: 22.4.1996 / 12:55:30 / cg"
! !
-
!ByteArray class methodsFor:'documentation'!
version