TwoByteString.st
changeset 18593 e69a425e3823
parent 18586 b8798de459f4
child 18606 5d9eaf7bf761
child 18608 7d521f25267c
equal deleted inserted replaced
18592:43c80b6eb3f9 18593:e69a425e3823
   120 !TwoByteString methodsFor:'encoding'!
   120 !TwoByteString methodsFor:'encoding'!
   121 
   121 
   122 utf8Encoded
   122 utf8Encoded
   123     "Return my UTF-8 representation as a new String"
   123     "Return my UTF-8 representation as a new String"
   124 
   124 
   125     self contains8BitCharacters ifTrue:[
   125     self containsNon7BitAscii ifTrue:[
   126         ^ self basicUtf8Encoded.
   126         ^ self basicUtf8Encoded.
   127     ].
   127     ].
   128 
   128 
   129     ^ self asSingleByteString.
   129     ^ self asSingleByteString.
   130 
   130 
   136 !
   136 !
   137 
   137 
   138 utf8EncodedOn:aStream
   138 utf8EncodedOn:aStream
   139     "write to aStream in utf8 encoding"
   139     "write to aStream in utf8 encoding"
   140 
   140 
   141     self contains8BitCharacters ifTrue:[
   141     self containsNon7BitAscii ifTrue:[
   142         aStream nextPutAllUtf8:self.
   142         aStream nextPutAllUtf8:self.
   143     ] ifFalse:[
   143     ] ifFalse:[
   144         |sz "{Class: SmallInteger}"|
   144         |sz "{Class: SmallInteger}"|
   145 
   145 
   146         sz := self size.
   146         sz := self size.
   148             aStream nextPut:(self basicAt:idx).
   148             aStream nextPut:(self basicAt:idx).
   149         ].
   149         ].
   150     ].
   150     ].
   151 
   151 
   152     "
   152     "
   153         |s|
   153      String streamContents:[:w|
   154         s := '' writeStream.
   154         'abcde1234' asUnicode16String utf8EncodedOn:w
   155         'abcdef' asUnicode16String utf8EncodedOn:s.
   155      ].
   156         s contents
   156      String streamContents:[:w|
   157     "
   157          'abcdeäöüß' asUnicode16String utf8EncodedOn:w
   158 
   158      ].
   159     "
       
   160         |s|
       
   161         s := '' writeStream.
       
   162         'abcdefäöü' asUnicode16String utf8EncodedOn:s.
       
   163         s contents
       
   164     "
   159     "
   165 ! !
   160 ! !
   166 
   161 
   167 !TwoByteString methodsFor:'filling and replacing'!
   162 !TwoByteString methodsFor:'filling and replacing'!
   168 
   163 
   247     ^ 16
   242     ^ 16
   248 
   243 
   249     "Modified: 20.4.1996 / 23:08:38 / cg"
   244     "Modified: 20.4.1996 / 23:08:38 / cg"
   250 !
   245 !
   251 
   246 
   252 bitsPerCharacterInString
   247 characterSize
   253     |max|
   248     "answer the size in bits of my largest character (actually only 7, 8 or 16)"
   254 
   249 
   255     max := 0.
   250 %{  /* NOCONTEXT */
   256     self do:[:eachCharacter | 
   251 
   257         max := max max:eachCharacter bitsPerCharacterInString.
   252     REGISTER unsigned short *sp = __twoByteStringVal(self);
   258         max == 16 ifTrue:[ ^ max ].
   253     REGISTER unsigned short *last = sp + __twoByteStringSize(self);
   259     ].
   254     OBJ cls = __qClass(self);
   260     ^ max
   255     int has8BitChars = 0;
   261 !
   256 
   262 
   257     if (cls != Unicode16String && cls != TwoByteString) {
   263 contains8BitCharacters
   258         sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2;
       
   259     }
       
   260 
       
   261 #if __POINTER_SIZE__ == 8
       
   262     if (sizeof(unsigned INT) == 8) {
       
   263         if (!has8BitChars) {
       
   264             for ( ; (sp+4) <= last; sp += 4) {
       
   265                 if (*(unsigned INT *)sp & 0xFF80FF80FF80FF80) {
       
   266                     /* there are at least 8-bit chars - check for more */
       
   267                     has8BitChars = 1;
       
   268                     break;
       
   269                 }
       
   270             }
       
   271         }
       
   272         for ( ; (sp+4) <= last; sp += 4) {
       
   273             if (*(unsigned INT *)sp & 0xFF00FF00FF00FF00) {
       
   274                 RETURN(__mkSmallInteger(16));
       
   275             }
       
   276         }
       
   277     }
       
   278 #endif
       
   279     if (sizeof(unsigned int) == 4) {
       
   280         if (!has8BitChars) {
       
   281             for ( ; (sp+2) <= last; sp += 2) {
       
   282                 if (*(unsigned int *)sp & 0xFF80FF80) {
       
   283                     /* there are at least 8-bit chars - check for more */
       
   284                     has8BitChars = 1;
       
   285                     break;
       
   286                 }
       
   287             }
       
   288         }
       
   289         for ( ; (sp+2) <= last; sp += 2) {
       
   290             if (*(unsigned int *)sp & 0xFF00FF00) {
       
   291                 RETURN(__mkSmallInteger(16));
       
   292             }
       
   293         }
       
   294     }
       
   295     if (!has8BitChars) {
       
   296         for ( ; sp < last; sp++) {
       
   297             if (*sp & 0xFF80) {
       
   298                 /* there are at least 8-bit chars - check for more */
       
   299                 has8BitChars = 1;
       
   300                 break;
       
   301             }
       
   302         }
       
   303     }
       
   304     for ( ; sp < last; sp++) {
       
   305         if (*sp & 0xFF00) {
       
   306             RETURN(__mkSmallInteger(16));
       
   307         }
       
   308     }
       
   309     RETURN (__mkSmallInteger(has8BitChars ? 8 : 7));
       
   310 %}.
       
   311 
       
   312     "
       
   313      'hello world' asUnicode16String characterSize
       
   314      'hello worldüäö' asUnicode16String characterSize
       
   315      'a' asUnicode16String characterSize
       
   316      'ü' asUnicode16String characterSize
       
   317      'aa' asUnicode16String characterSize
       
   318      'aü' asUnicode16String characterSize
       
   319      'aaa' asUnicode16String characterSize
       
   320      'aaü' asUnicode16String characterSize
       
   321      'aaaü' asUnicode16String characterSize
       
   322      'aaaa' asUnicode16String characterSize
       
   323      'aaaaü' asUnicode16String characterSize
       
   324     "
       
   325 !
       
   326 
       
   327 containsNon7BitAscii
   264     "return true, if the underlying string contains 8BitCharacters (or widers)
   328     "return true, if the underlying string contains 8BitCharacters (or widers)
   265      (i.e. if it is non-ascii)"
   329      (i.e. if it is non-ascii)"
   266 
   330 
   267 %{  /* NOCONTEXT */
   331 %{  /* NOCONTEXT */
   268 
   332 
   269     REGISTER unsigned short *sp, *last;
   333     REGISTER unsigned short *sp = __twoByteStringVal(self);
   270     OBJ cls;
   334     REGISTER unsigned short *last = sp + __twoByteStringSize(self);
   271 
   335     OBJ cls = __qClass(self);
   272     sp = __twoByteStringVal(self);
   336 
   273     last = sp + __twoByteStringSize(self);
   337     if ( cls != Unicode16String && cls != TwoByteString) {
   274     if ((cls = __qClass(self)) != TwoByteString && cls != Unicode16String) {
       
   275         sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2;
   338         sp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)) / 2;
   276     }
   339     }
   277 #if __POINTER_SIZE__ == 8
   340 #if __POINTER_SIZE__ == 8
   278     /* assume sizeof(long) == 4
   341     if (sizeof(unsigned INT) == 8) {
   279      * if __POINTER_SIZE__ == 4
   342         for ( ; (sp+4) <= last; sp += 4) {
   280      */
   343             if (*(unsigned INT *)sp & 0xFF80FF80FF80FF80) {
   281     if (sizeof(long) == 8) {
       
   282         while ((sp+4) <= last) {
       
   283             if (*(unsigned long *)sp & 0xFF80FF80FF80FF80) {
       
   284                 RETURN ( true );
   344                 RETURN ( true );
   285             }
   345             }
   286             sp += 4;
       
   287         }
   346         }
   288     }
   347     }
   289 #endif
   348 #endif
   290     if (sizeof(int) == 4) {
   349     if (sizeof(unsigned int) == 4) {
   291         while ((sp+2) <= last) {
   350         for ( ; (sp+2) <= last; sp += 2) {
   292             if (*(unsigned int *)sp & 0xFF80FF80) {
   351             if (*(unsigned int *)sp & 0xFF80FF80) {
   293                 RETURN ( true );
   352                 RETURN ( true );
   294             }
   353             }
   295             sp += 2;
   354         }
   296         }
   355     }
   297     }
   356     for ( ; sp < last; sp++) {
   298     while (sp <= last) {
       
   299         if (*sp & 0xFF80) {
   357         if (*sp & 0xFF80) {
   300             RETURN ( true );
   358             RETURN ( true );
   301         }
   359         }
   302         sp++;
       
   303     }
   360     }
   304     RETURN (false);
   361     RETURN (false);
   305 %}.
   362 %}.
   306 
   363 
   307     "
   364     "
   308      'hello world' asUnicode16String contains8BitCharacters
   365      'hello world' asUnicode16String containsNon7BitAscii
   309      'hello worldüäö' asUnicode16String contains8BitCharacters
   366      'hello worldüäö' asUnicode16String containsNon7BitAscii
   310      'ü' asUnicode16String contains8BitCharacters
   367      'ü' asUnicode16String containsNon7BitAscii
   311      'aü' asUnicode16String contains8BitCharacters
   368      'aü' asUnicode16String containsNon7BitAscii
   312      'aaü' asUnicode16String contains8BitCharacters
   369      'aaü' asUnicode16String containsNon7BitAscii
   313      'aaaü' asUnicode16String contains8BitCharacters
   370      'aaaü' asUnicode16String containsNon7BitAscii
   314      'aaaaü' asUnicode16String contains8BitCharacters
   371      'aaaaü' asUnicode16String containsNon7BitAscii
       
   372      'aaaaa' asUnicode16String containsNon7BitAscii
   315     "
   373     "
   316 !
   374 !
   317 
   375 
   318 isWideString
   376 isWideString
   319     ^ true
   377     ^ true