TwoByteString.st
changeset 17621 098adea6b2d4
parent 16750 394404a33691
child 18120 e3a375d5f6a8
child 18576 724284508398
--- a/TwoByteString.st	Sat Mar 14 22:29:05 2015 +0100
+++ b/TwoByteString.st	Sat Mar 14 22:30:22 2015 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1993 by Claus Gittinger
 	      All Rights Reserved
@@ -11,6 +13,8 @@
 "
 "{ Package: 'stx:libbasic' }"
 
+"{ NameSpace: Smalltalk }"
+
 CharacterArray variableWordSubclass:#TwoByteString
 	instanceVariableNames:''
 	classVariableNames:''
@@ -70,6 +74,16 @@
     ^ (super basicNew:anInteger) atAllPut:(Character space)
 
     "Modified: 26.2.1996 / 14:38:47 / cg"
+!
+
+uninitializedNew:anInteger
+    "return a new empty string with anInteger characters"
+
+    ^ super basicNew:anInteger
+
+    "
+        self uninitializedNew:10
+    "
 ! !
 
 !TwoByteString methodsFor:'accessing'!
@@ -103,6 +117,53 @@
     ^ super basicAt:index.
 ! !
 
+!TwoByteString methodsFor:'encoding'!
+
+utf8Encoded
+    "Return my UTF-8 representation as a new String"
+
+    self contains8BitCharacters ifTrue:[
+        ^ self basicUtf8Encoded.
+    ].
+
+    ^ self asSingleByteString.
+
+
+    "
+        'abcdef' asUnicode16String utf8Encoded
+        'abcdefäöü' asUnicode16String utf8Encoded
+    "
+!
+
+utf8EncodedOn:aStream
+    "write to aStream in utf8 encoding"
+
+    self contains8BitCharacters ifTrue:[
+        aStream nextPutAllUtf8:self.
+    ] ifFalse:[
+        |sz "{Class: SmallInteger}"|
+
+        sz := self size.
+        1 to:sz do:[:idx|
+            aStream nextPut:(self basicAt:idx).
+        ].
+    ].
+
+    "
+        |s|
+        s := '' writeStream.
+        'abcdef' asUnicode16String utf8EncodedOn:s.
+        s contents
+    "
+
+    "
+        |s|
+        s := '' writeStream.
+        'abcdefäöü' asUnicode16String utf8EncodedOn:s.
+        s contents
+    "
+! !
+
 !TwoByteString methodsFor:'filling and replacing'!
 
 replaceFrom:start to:stop with:aString startingAt:repStart
@@ -202,6 +263,61 @@
     "Modified: 20.4.1996 / 23:08:38 / cg"
 !
 
+contains8BitCharacters
+    "return true, if the underlying string contains 8BitCharacters (or widers)
+     (i.e. if it is non-ascii)"
+
+%{  /* NOCONTEXT */
+
+    REGISTER unsigned short *sp, *last;
+    OBJ cls;
+
+    sp = __twoByteStringVal(self);
+    last = sp + __twoByteStringSize(self);
+    if ((cls = __qClass(self)) != TwoByteString && cls != Unicode16String) {
+        sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2;
+    }
+#if __POINTER_SIZE__ == 8
+    /* assume sizeof(long) == 4
+     * if __POINTER_SIZE__ == 4
+     */
+    if (sizeof(long) == 8) {
+        while ((sp+4) <= last) {
+            if (*(unsigned long *)sp & 0xFF80FF80FF80FF80) {
+                RETURN ( true );
+            }
+            sp += 4;
+        }
+    }
+#endif
+    if (sizeof(int) == 4) {
+        while ((sp+2) <= last) {
+            if (*(unsigned int *)sp & 0xFF80FF80) {
+                RETURN ( true );
+            }
+            sp += 2;
+        }
+    }
+    while (sp <= last) {
+        if (*sp & 0xFF80) {
+            RETURN ( true );
+        }
+        sp++;
+    }
+    RETURN (false);
+%}.
+
+    "
+     'hello world' asUnicode16String contains8BitCharacters
+     'hello worldüäö' asUnicode16String contains8BitCharacters
+     'ü' asUnicode16String contains8BitCharacters
+     'aü' asUnicode16String contains8BitCharacters
+     'aaü' asUnicode16String contains8BitCharacters
+     'aaaü' asUnicode16String contains8BitCharacters
+     'aaaaü' asUnicode16String contains8BitCharacters
+    "
+!
+
 isWideString
     ^ true
 ! !
@@ -209,7 +325,7 @@
 !TwoByteString class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/TwoByteString.st,v 1.35 2014-07-10 12:28:32 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/TwoByteString.st,v 1.36 2015-03-14 21:30:22 stefan Exp $'
 ! !