--- a/ByteArray.st Sun Jul 12 06:35:39 2015 +0200
+++ b/ByteArray.st Mon Jul 20 17:01:03 2015 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
"
COPYRIGHT (c) 1989 by Claus Gittinger
All Rights Reserved
@@ -164,6 +166,7 @@
! !
+
!ByteArray class methodsFor:'queries'!
elementByteSize
@@ -1101,7 +1104,7 @@
mod "{ Class:SmallInteger}"
cpl|
- outStream := WriteStream on:String new.
+ outStream := WriteStream on:(String new:((self size + 2) * 4 // 3)).
index := 1.
stop := self size.
@@ -1157,16 +1160,17 @@
asString
"speed up string conversions"
- |size|
-
- self class == ByteArray ifTrue:[
- size := self size.
- ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
+ |cls size|
+
+ cls := self class.
+ (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+ size := self size.
+ ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1.
].
^ super asString
"
- #[60 61 62 63] asString
+ #[16r41 16r42 16r43] asString
"
!
@@ -1199,6 +1203,51 @@
"
#[1 2 3] literalArrayEncoding
"
+!
+
+utf8Decoded
+ "Interpreting myself as an UTF-8 representation, decode and return the decoded string."
+
+ |cls in out|
+
+ cls := self class.
+ (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[
+ self containsNon7BitAscii ifFalse:[
+ "speed up common case"
+ ^ self asSingleByteString.
+ ].
+ out := CharacterWriteStream on:(String uninitializedNew:self size).
+ in := self readStream.
+ [in atEnd] whileFalse:[
+ out nextPut:(Character utf8DecodeFrom:in).
+ ].
+ ^ out contents
+ ].
+ ^ super utf8Decoded
+
+ "
+ #[16r41 16r42] utf8Decoded
+ #[16rC8 16rA0] utf8Decoded
+ (Character value:16r220) utf8Encoded
+ (Character value:16r220) utf8Encoded utf8Decoded
+
+ (Character value:16r800) utf8Encoded
+ (Character value:16r220) utf8Encoded utf8Decoded
+ "
+
+ "test:
+
+ |utf8Encoding original readBack|
+
+ 1 to:16rFFFF do:[:ascii |
+ original := (Character value:ascii) asString.
+ utf8Encoding := original utf8Encoded.
+ readBack := utf8Encoding utf8Decoded.
+ readBack = original ifFalse:[
+ self halt
+ ]
+ ]
+ "
! !
!ByteArray methodsFor:'copying'!
@@ -2607,7 +2656,10 @@
"/ what a kludge - Dolphin and Squeak mean: printOn: a stream;
"/ ST/X (and some old ST80's) mean: draw-yourself on a GC.
- (self class == ByteArray and:[aGCOrStream isStream]) ifTrue:[
+ |cls|
+
+ cls := self class.
+ ((cls == ByteArray or:[cls == ImmutableByteArray]) and:[aGCOrStream isStream]) ifTrue:[
self storeOn:aGCOrStream.
^ self
].
@@ -2804,7 +2856,7 @@
This my look as a too specific operation to be put here,
put it is very helpful for UTF8 string reading (Java class reader),
to quickly determine, if UTF8 decoding is needed or not.
- As most strings in a class fiel are in fact only containing 7bit ascii,
+ As most strings in a class file are in fact only containing 7bit ascii,
this should speedup class file reading considerably"
%{ /* NOCONTEXT */
@@ -3010,6 +3062,7 @@
"
! !
+
!ByteArray methodsFor:'searching'!
indexOf:aByte startingAt:start
@@ -3103,10 +3156,10 @@
!ByteArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/ByteArray.st,v 1.225 2015-02-20 23:55:55 cg Exp $'
+ ^ '$Header$'
!
version_CVS
- ^ '$Header: /cvs/stx/stx/libbasic/ByteArray.st,v 1.225 2015-02-20 23:55:55 cg Exp $'
+ ^ '$Header$'
! !