# HG changeset patch # User Merge Script # Date 1449122239 -3600 # Node ID 13360506ef811188ac5b41da06effd59f2d50d25 # Parent 0b89780d78105d78f341730fe1530c61b64038ca# Parent f4c3afd137e36f41a366537372d974a8862ae1ee Merge diff -r 0b89780d7810 -r 13360506ef81 ByteArray.st --- a/ByteArray.st Tue Dec 01 06:37:23 2015 +0100 +++ b/ByteArray.st Thu Dec 03 06:57:19 2015 +0100 @@ -164,6 +164,7 @@ ! ! + !ByteArray class methodsFor:'queries'! elementByteSize @@ -184,6 +185,7 @@ "Modified: 23.4.1996 / 15:56:25 / cg" ! ! + !ByteArray methodsFor:'Compatibility-Squeak'! bitXor:aByteArray @@ -2750,6 +2752,13 @@ " ! +containsNon8BitElements + "return true, if one of my elements is larger than a single byte. + Per definition not." + + ^ false. +! + max "return the maximum value in the receiver - redefined to speedup image processing and sound-player @@ -2914,6 +2923,7 @@ " ! ! + !ByteArray methodsFor:'searching'! indexOf:aByte startingAt:start @@ -2979,6 +2989,7 @@ " ! ! + !ByteArray methodsFor:'testing'! isByteArray diff -r 0b89780d7810 -r 13360506ef81 CharacterArray.st --- a/CharacterArray.st Tue Dec 01 06:37:23 2015 +0100 +++ b/CharacterArray.st Thu Dec 03 06:57:19 2015 +0100 @@ -136,10 +136,13 @@ fromString:aString "return a copy of the argument, aString" - ^ (self uninitializedNew:aString size) replaceFrom:1 with:aString - - " - Unicode16String fromString:'hello' + |sz| + + sz := aString size. + ^ (self uninitializedNew:sz) replaceFrom:1 to:sz with:aString startingAt:1 + + " + Unicode16String fromString:'hello' " ! @@ -3273,11 +3276,13 @@ If not possible, the (wideString) receiver is returned." self isWideString ifFalse:[^ self]. - (self contains:[:char | char codePoint > 255]) ifFalse:[^ self asSingleByteString]. - ^ self + self containsNon8BitElements ifTrue:[^ self]. + ^ self asSingleByteString " 'hello' asSingleByteStringIfPossible + 'hello' asText asSingleByteStringIfPossible + 'hello' asUnicodeString asText asSingleByteStringIfPossible 'hello' asUnicodeString asSingleByteStringIfPossible " ! @@ -3335,19 +3340,30 @@ asSymbolIfInterned "If a symbol with the receiver's characters is already known, return it. Otherwise, return nil. This can be used to query for an existing symbol and is the same as: - self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil] + self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil] but slightly faster, since the symbol lookup operation is only performed once. The receiver must be a singleByte-String. TwoByte- and FourByteSymbols are (currently ?) not allowed." - |s| - - - s := self string. - s ~~ self ifTrue:[ - ^ s asSymbolIfInterned + |str| + + str := self string. + str == self ifTrue:[ + "must be some kind of N-ByteString" + str := self asSingleByteStringIfPossible. + str == self ifTrue:[ + "single byte string conversion is not possible" + ^ nil. + ]. ]. - ^ nil. + ^ str asSymbolIfInterned + + " + (Unicode16String with:(Character value:16rFFFF)) asSymbolIfInterned + 'new' asUnicodeString asSymbolIfInterned + 'new' asText asSymbolIfInterned + 'new' asUnicodeString asText asSymbolIfInterned + " "Created: 22.5.1996 / 16:37:04 / cg" ! @@ -5507,6 +5523,20 @@ " ! +containsNon8BitElements + "return true, if the underlying string contains elements larger than a single byte" + + |sz "{ Class:SmallInteger }"| + + sz := self size. + 1 to:sz do:[:idx| + (self at:idx) codePoint > 16rFF ifTrue:[ + ^ true. + ]. + ]. + ^ false. +! + continuesWith:aString startingAt:startIndex "return true, if the receiver beginning at startIndex contains the characters in aString." diff -r 0b89780d7810 -r 13360506ef81 String.st --- a/String.st Tue Dec 01 06:37:23 2015 +0100 +++ b/String.st Thu Dec 03 06:57:19 2015 +0100 @@ -530,6 +530,8 @@ + + !String methodsFor:'Compatibility-VW5.4'! asByteString @@ -549,6 +551,7 @@ "Modified: / 12-01-2011 / 12:33:58 / cg" ! ! + !String methodsFor:'accessing'! at:index @@ -3893,6 +3896,12 @@ " ! +containsNon8BitElements + "return true, if the underlying string contains elements larger than a single byte" + + ^ false. +! + isWideString ^ false ! @@ -3986,6 +3995,7 @@ ^ super reverse ! ! + !String methodsFor:'substring searching'! indexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive @@ -4497,6 +4507,7 @@ ! ! + !String class methodsFor:'documentation'! version diff -r 0b89780d7810 -r 13360506ef81 Unicode16String.st --- a/Unicode16String.st Tue Dec 01 06:37:23 2015 +0100 +++ b/Unicode16String.st Thu Dec 03 06:57:19 2015 +0100 @@ -61,6 +61,7 @@ "Modified: 30.6.1997 / 15:39:21 / cg" ! ! + !Unicode16String class methodsFor:'reading'! readFrom:aStreamOrString onError:exceptionBlock @@ -86,27 +87,6 @@ !Unicode16String methodsFor:'conversion'! -asSymbolIfInterned - "If a symbol with the receiver's characters is already known, return it. Otherwise, return nil. - Because ST/X does not support non-8-bit symbols, this method - has been redefined to only return a symbol, if the receiver does NOT contain - any non-8 bit characters." - - |s| - - [ - s := self asSingleByteString. - ] on:Error do:[:ex| - ^ nil. - ]. - ^ s asSymbolIfInterned - - " - (Unicode16String with:(Character value:16rFFFF)) asSymbolIfInterned - 'new' asUnicodeString asSymbolIfInterned - " -! - asUnicode16String "as the receiver already is a unicode-16 string, return it" @@ -149,8 +129,23 @@ storeOn:aStream "put the storeString of myself on aStream" - self utf8Encoded storeOn:aStream. - aStream nextPutAll:' utf8Decoded'. + self containsNon7BitAscii ifTrue:[ + "non-7bit string has been utf8Encoded" + self utf8Encoded storeOn:aStream. + aStream nextPutAll:' utf8Decoded'. + ] ifFalse:[ + self asSingleByteString storeOn:aStream. + ]. + + " + String streamContents:[:s| + 'hello' asUnicodeString storeOn:s + ]. + + String streamContents:[:s| + 'hello -öäüß' asUnicodeString storeOn:s + ]. + " "/ aStream nextPut:$'. "/ (self includes:$') ifTrue:[ @@ -169,9 +164,18 @@ storeString "return a String for storing myself" -"/ ^ self basicStoreString. -"/ ^ (self withCEscapes storeString),' withoutCEscapes'. - ^ (self utf8Encoded storeString),' utf8Decoded'. + self containsNon7BitAscii ifTrue:[ + "non-7bit string has been utf8Encoded" + ^ (self utf8Encoded storeString),' utf8Decoded'. + ] ifFalse:[ + ^ self asSingleByteString storeString. + ]. + + " + 'hello' asUnicodeString storeString + 'hello -öäüß' storeString + 'hello -öäüß' asUnicodeString storeString + " "Modified: / 25-01-2012 / 11:59:34 / cg" ! @@ -213,11 +217,11 @@ !Unicode16String class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/libbasic/Unicode16String.st,v 1.23 2015-02-20 23:59:30 cg Exp $' + ^ '$Header$' ! version_CVS - ^ '$Header: /cvs/stx/stx/libbasic/Unicode16String.st,v 1.23 2015-02-20 23:59:30 cg Exp $' + ^ '$Header$' ! ! diff -r 0b89780d7810 -r 13360506ef81 UninterpretedBytes.st --- a/UninterpretedBytes.st Tue Dec 01 06:37:23 2015 +0100 +++ b/UninterpretedBytes.st Thu Dec 03 06:57:19 2015 +0100 @@ -1,5 +1,3 @@ -"{ Encoding: utf8 }" - " COPYRIGHT (c) 1993 by Claus Gittinger All Rights Reserved @@ -2838,11 +2836,27 @@ " #[60 61 62 63] asSingleByteString #[60 61 62 63] asExternalBytes asSingleByteString + #[67 68 69 70] asIntegerArray asSingleByteString (Unicode16String with:(Character value:16rFF)) asSingleByteString (Unicode16String with:(Character value:16rFFFF)) asSingleByteString " ! +asSingleByteStringIfPossible + "if possible, return the receiver converted to a 'normal' string. + It is only possible, if there are no characters with codePoints above 255 in the receiver. + If not possible, the (wideString) receiver is returned." + + self containsNon8BitElements ifTrue:[^ self asString]. + ^ self asSingleByteString. + + " + #[67 68 69 70] asSingleByteStringIfPossible + #[67 68 69 70] asIntegerArray asSingleByteStringIfPossible + 'hello' asUnicodeString asSingleByteStringIfPossible + " +! + asString "speed up string conversions" @@ -3426,31 +3440,50 @@ !UninterpretedBytes methodsFor:'private'! -slowReplaceBytesFrom:start to:stop with:sourceBytes startingAt:sourceIndex +slowReplaceBytesFrom:startArg to:stopArg with:sourceBytes startingAt:sourceIndex "fallback if primitive code fails" - |srcIdx| - + |srcIdx "{ Class:SmallInteger }" + start "{ Class:SmallInteger }" + stop "{ Class:SmallInteger }"| + + start := startArg. + stop := stopArg. srcIdx := sourceIndex. + start to:stop do:[:dstIdx | - self at:dstIdx put:(sourceBytes at:srcIdx). - srcIdx := srcIdx + 1 + self at:dstIdx put:(sourceBytes at:srcIdx). + srcIdx := srcIdx + 1 ]. ! ! !UninterpretedBytes methodsFor:'queries'! containsNon7BitAscii - "return true, if the underlying string contains 8BitCharacters (or widers) + "return true, if the underlying collection contains elements longer than 7 bits (i.e. if it is non-ascii)" |sz "{ Class:SmallInteger }"| sz := self size. 1 to:sz do:[:idx| - (self at:idx) > 16r7F ifTrue:[ - ^ true. - ]. + (self at:idx) > 16r7F ifTrue:[ + ^ true. + ]. + ]. + ^ false. +! + +containsNon8BitElements + "return true, if the underlying structure contains elements larger than a single byte" + + |sz "{ Class:SmallInteger }"| + + sz := self size. + 1 to:sz do:[:idx| + (self at:idx) > 16rFF ifTrue:[ + ^ true. + ]. ]. ^ false. !