# HG changeset patch # User Stefan Vogel # Date 1437175464 -7200 # Node ID d6abf6a67de492550982760f0e5420de421f961a # Parent aa02a050b351df377266f2a83d7421de382cdefc class: ByteArray added: #utf8Decoded comment/format in: #containsNon7BitAscii changed: #asString #displayOn: diff -r aa02a050b351 -r d6abf6a67de4 ByteArray.st --- a/ByteArray.st Sat Jul 18 01:24:04 2015 +0200 +++ b/ByteArray.st Sat Jul 18 01:24:24 2015 +0200 @@ -1,3 +1,5 @@ +"{ Encoding: utf8 }" + " COPYRIGHT (c) 1989 by Claus Gittinger All Rights Reserved @@ -165,8 +167,6 @@ - - !ByteArray class methodsFor:'queries'! elementByteSize @@ -188,7 +188,6 @@ ! ! - !ByteArray methodsFor:'Compatibility-Squeak'! bitXor:aByteArray @@ -1161,16 +1160,17 @@ asString "speed up string conversions" - |size| - - self class == ByteArray ifTrue:[ - size := self size. - ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1. + |cls size| + + cls := self class. + (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[ + size := self size. + ^ (String uninitializedNew:size) replaceBytesFrom:1 to:size with:self startingAt:1. ]. ^ super asString " - #[60 61 62 63] asString + #[16r41 16r42 16r43] asString " ! @@ -1203,6 +1203,51 @@ " #[1 2 3] literalArrayEncoding " +! + +utf8Decoded + "Interpreting myself as an UTF-8 representation, decode and return the decoded string." + + |cls in out| + + cls := self class. + (cls == ByteArray or:[cls == ImmutableByteArray]) ifTrue:[ + self containsNon7BitAscii ifFalse:[ + "speed up common case" + ^ self asSingleByteString. + ]. + out := CharacterWriteStream on:(String uninitializedNew:self size). + in := self readStream. + [in atEnd] whileFalse:[ + out nextPut:(Character utf8DecodeFrom:in). + ]. + ^ out contents + ]. + ^ super utf8Decoded + + " + #[16r41 16r42] utf8Decoded + #[16rC8 16rA0] utf8Decoded + (Character value:16r220) utf8Encoded + (Character value:16r220) utf8Encoded utf8Decoded + + (Character value:16r800) utf8Encoded + (Character value:16r220) utf8Encoded utf8Decoded + " + + "test: + + |utf8Encoding original readBack| + + 1 to:16rFFFF do:[:ascii | + original := (Character value:ascii) asString. + utf8Encoding := original utf8Encoded. + readBack := utf8Encoding utf8Decoded. + readBack = original ifFalse:[ + self halt + ] + ] + " ! ! !ByteArray methodsFor:'copying'! @@ -2611,7 +2656,10 @@ "/ what a kludge - Dolphin and Squeak mean: printOn: a stream; "/ ST/X (and some old ST80's) mean: draw-yourself on a GC. - (self class == ByteArray and:[aGCOrStream isStream]) ifTrue:[ + |cls| + + cls := self class. + ((cls == ByteArray or:[cls == ImmutableByteArray]) and:[aGCOrStream isStream]) ifTrue:[ self storeOn:aGCOrStream. ^ self ]. @@ -2808,7 +2856,7 @@ This my look as a too specific operation to be put here, put it is very helpful for UTF8 string reading (Java class reader), to quickly determine, if UTF8 decoding is needed or not. - As most strings in a class fiel are in fact only containing 7bit ascii, + As most strings in a class file are in fact only containing 7bit ascii, this should speedup class file reading considerably" %{ /* NOCONTEXT */ @@ -3105,7 +3153,6 @@ "Modified: 22.4.1996 / 12:55:30 / cg" ! ! - !ByteArray class methodsFor:'documentation'! version