--- a/TwoByteString.st Sat Mar 14 22:29:05 2015 +0100
+++ b/TwoByteString.st Sat Mar 14 22:30:22 2015 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
"
COPYRIGHT (c) 1993 by Claus Gittinger
All Rights Reserved
@@ -11,6 +13,8 @@
"
"{ Package: 'stx:libbasic' }"
+"{ NameSpace: Smalltalk }"
+
CharacterArray variableWordSubclass:#TwoByteString
instanceVariableNames:''
classVariableNames:''
@@ -70,6 +74,16 @@
^ (super basicNew:anInteger) atAllPut:(Character space)
"Modified: 26.2.1996 / 14:38:47 / cg"
+!
+
+uninitializedNew:anInteger
+ "return a new empty string with anInteger characters"
+
+ ^ super basicNew:anInteger
+
+ "
+ self uninitializedNew:10
+ "
! !
!TwoByteString methodsFor:'accessing'!
@@ -103,6 +117,53 @@
^ super basicAt:index.
! !
+!TwoByteString methodsFor:'encoding'!
+
+utf8Encoded
+ "Return my UTF-8 representation as a new String"
+
+ self contains8BitCharacters ifTrue:[
+ ^ self basicUtf8Encoded.
+ ].
+
+ ^ self asSingleByteString.
+
+
+ "
+ 'abcdef' asUnicode16String utf8Encoded
+ 'abcdefäöü' asUnicode16String utf8Encoded
+ "
+!
+
+utf8EncodedOn:aStream
+ "write to aStream in utf8 encoding"
+
+ self contains8BitCharacters ifTrue:[
+ aStream nextPutAllUtf8:self.
+ ] ifFalse:[
+ |sz "{Class: SmallInteger}"|
+
+ sz := self size.
+ 1 to:sz do:[:idx|
+ aStream nextPut:(self basicAt:idx).
+ ].
+ ].
+
+ "
+ |s|
+ s := '' writeStream.
+ 'abcdef' asUnicode16String utf8EncodedOn:s.
+ s contents
+ "
+
+ "
+ |s|
+ s := '' writeStream.
+ 'abcdefäöü' asUnicode16String utf8EncodedOn:s.
+ s contents
+ "
+! !
+
!TwoByteString methodsFor:'filling and replacing'!
replaceFrom:start to:stop with:aString startingAt:repStart
@@ -202,6 +263,61 @@
"Modified: 20.4.1996 / 23:08:38 / cg"
!
+contains8BitCharacters
+ "return true, if the underlying string contains 8BitCharacters (or widers)
+ (i.e. if it is non-ascii)"
+
+%{ /* NOCONTEXT */
+
+ REGISTER unsigned short *sp, *last;
+ OBJ cls;
+
+ sp = __twoByteStringVal(self);
+ last = sp + __twoByteStringSize(self);
+ if ((cls = __qClass(self)) != TwoByteString && cls != Unicode16String) {
+ sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2;
+ }
+#if __POINTER_SIZE__ == 8
+ /* assume sizeof(long) == 4
+ * if __POINTER_SIZE__ == 4
+ */
+ if (sizeof(long) == 8) {
+ while ((sp+4) <= last) {
+ if (*(unsigned long *)sp & 0xFF80FF80FF80FF80) {
+ RETURN ( true );
+ }
+ sp += 4;
+ }
+ }
+#endif
+ if (sizeof(int) == 4) {
+ while ((sp+2) <= last) {
+ if (*(unsigned int *)sp & 0xFF80FF80) {
+ RETURN ( true );
+ }
+ sp += 2;
+ }
+ }
+ while (sp <= last) {
+ if (*sp & 0xFF80) {
+ RETURN ( true );
+ }
+ sp++;
+ }
+ RETURN (false);
+%}.
+
+ "
+ 'hello world' asUnicode16String contains8BitCharacters
+ 'hello worldüäö' asUnicode16String contains8BitCharacters
+ 'ü' asUnicode16String contains8BitCharacters
+ 'aü' asUnicode16String contains8BitCharacters
+ 'aaü' asUnicode16String contains8BitCharacters
+ 'aaaü' asUnicode16String contains8BitCharacters
+ 'aaaaü' asUnicode16String contains8BitCharacters
+ "
+!
+
isWideString
^ true
! !
@@ -209,7 +325,7 @@
!TwoByteString class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/TwoByteString.st,v 1.35 2014-07-10 12:28:32 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/TwoByteString.st,v 1.36 2015-03-14 21:30:22 stefan Exp $'
! !