String.st
branchjv
changeset 20131 4118d61ddba0
parent 20083 196706395bbc
parent 20126 8341bd725f11
child 20136 8e3509beb85d
equal deleted inserted replaced
20083:196706395bbc 20131:4118d61ddba0
   534 
   534 
   535     ^ self == String
   535     ^ self == String
   536 
   536 
   537     "Modified: 23.4.1996 / 16:00:38 / cg"
   537     "Modified: 23.4.1996 / 16:00:38 / cg"
   538 ! !
   538 ! !
       
   539 
       
   540 
   539 
   541 
   540 
   542 
   541 
   543 
   542 
   544 
   543 
   545 
  2465 
  2467 
  2466     ^ self copy changeClassTo:ImmutableString
  2468     ^ self copy changeClassTo:ImmutableString
  2467 !
  2469 !
  2468 
  2470 
  2469 asLowercase
  2471 asLowercase
  2470     "a tuned version. Some apps call this very heavily"
  2472     "a tuned version for Strings with size < 255. Some apps call this very heavily.
  2471 
  2473      We can do this for 8-bit strings, since the mapping is well known and lowercase chars 
  2472 %{  /* NOCONTEXT */
  2474      fit in one byte also."
  2473 #if 0
  2475 
       
  2476 %{  /* NOCONTEXT */
  2474 #ifndef __SCHTEAM__
  2477 #ifndef __SCHTEAM__
  2475     REGISTER OBJ slf = self;
  2478     REGISTER OBJ slf = self;
  2476 
  2479 
  2477     if (__qClass(slf) == String) {
  2480     if (__isStringLike(slf)) {
  2478 	char quickBuffer[256];
  2481         char quickBuffer[256];
  2479 	int sz = __stringSize(slf);
  2482         int sz = __stringSize(slf);
  2480 
  2483 
  2481 	if (sz < (sizeof(quickBuffer)-1)) {
  2484         if (sz < (sizeof(quickBuffer)-1)) {
  2482 	    REGISTER int i = 0;
  2485             REGISTER int i = 0;
  2483 	    int anyChange = 0;
  2486             int anyChange = 0;
  2484 	    REGISTER unsigned char *cp = __stringVal(slf);
  2487             REGISTER unsigned char *cp = __stringVal(slf);
  2485 
  2488 
  2486 	    // fast advance
  2489             // fast advance
  2487 	    // all uppercase chars are in the ranges 0x41 .. 0x5A (A..Z)
  2490             // all uppercase chars are in the ranges 0x41 .. 0x5A (A..Z)
  2488 	    // or 0xC0 .. 0xDF.
  2491             // or 0xC0 .. 0xDF.
  2489 	    // I.e. they have the 0x20 bit clear.
  2492             // I.e. they have the 0x20 bit clear.
  2490 	    // Thus, we can fast skip over lowercase, spaces and some puctuation,
  2493             // Thus, we can fast skip over lowercase, spaces and some punctuation,
  2491 	    // if all bytes of a word have the x20 bit set.
  2494             // if all bytes of a word have the x20 bit set.
  2492 	    //
  2495 
  2493 #if __POINTER_SIZE__ == 8
  2496 #if __POINTER_SIZE__ == 8
  2494 	    for (; i<(sz-8); i+=8) {
  2497             for (; i < (sz-8); i += 8) {
  2495 		unsigned INT eightChars;
  2498                 unsigned INT eightChars = *(unsigned INT *)(cp+i);
  2496 
  2499                 if ((eightChars & 0x2020202020202020ULL) != 0x2020202020202020ULL) goto convert;
  2497 		eightChars = *((unsigned INT *)(cp));
  2500                 *(unsigned INT *)(quickBuffer+i) = eightChars;
  2498 		if ((eightChars & 0x2020202020202020ULL) != 0x2020202020202020ULL) break;
  2501             }
  2499 		*((unsigned INT *)(&quickBuffer[i])) = eightChars;
       
  2500 	    }
       
  2501 #endif
  2502 #endif
  2502 	    for (; i<(sz-4); i+=4) {
  2503             for (; i < (sz-4); i += 4) {
  2503 		unsigned int fourChars;
  2504                 unsigned int fourChars = *(unsigned int *)(cp+i);
  2504 
  2505                 if ((fourChars & 0x20202020U) != 0x20202020U) break;
  2505 		fourChars = *((unsigned int *)(cp));
  2506                 *(unsigned int *)(quickBuffer+i) = fourChars;
  2506 		if ((fourChars & 0x20202020U) != 0x20202020U) break;
  2507             }
  2507 		*((unsigned int *)(&quickBuffer[i])) = fourChars;
  2508 convert:
  2508 	    }
  2509             for (; i<sz; i++) {
  2509 
  2510                 unsigned char ch = cp[i];
  2510 	    for (; i<sz; i++) {
  2511 
  2511 		unsigned char ch = cp[i];
  2512                 quickBuffer[i] = ch;
  2512 
  2513                 if ((ch & 0x60) == 0x40) {
  2513 		quickBuffer[i] = ch;
  2514                     if (ch >= 'A' && ch <= 'Z') {
  2514 		if ((ch & 0x60) == 0x40) {
  2515                         quickBuffer[i] = ch - 'A' + 'a';
  2515 		    if (ch >= 'A') {
  2516                         anyChange = 1;
  2516 			if (ch <= 'Z') {
  2517                     } else {
  2517 			    quickBuffer[i] = ch - 'A' + 'a';
  2518                         // deal with national latin1 characters
  2518 			    anyChange = 1;
  2519                         if (ch >= 0xC0 && ch <= 0xDE && ch != 0xD7) {
  2519 			} else {
  2520                             quickBuffer[i] = ch + 0x20;
  2520 			    // deal with national latin1 characters
  2521                             anyChange = 1;
  2521 			    if (ch >= 0xC0) {
  2522                         }
  2522 				if (ch <= 0xDE) {
  2523                     }
  2523 				    if (ch != 0xD7) {
  2524                 }
  2524 					quickBuffer[i] = ch + 0x20;
  2525             }
  2525 					anyChange = 1;
  2526             if (! anyChange) {
  2526 				    }
  2527                 RETURN(slf);
  2527 				}
  2528             }
  2528 			    }
  2529             quickBuffer[i] = '\0';
  2529 			}
  2530             RETURN (__MKSTRING_L(quickBuffer, i));
  2530 		    }
  2531         }
  2531 		}
       
  2532 	    }
       
  2533 	    quickBuffer[i] = '\0';
       
  2534 	    if (! anyChange) {
       
  2535 		RETURN(slf);
       
  2536 	    }
       
  2537 	    RETURN (__MKSTRING_L(quickBuffer, i));
       
  2538 	}
       
  2539     }
  2532     }
  2540 #endif /* ! __SCHTEAM__ */
  2533 #endif /* ! __SCHTEAM__ */
  2541 #endif
       
  2542 %}.
  2534 %}.
  2543     ^ super asLowercase
  2535     ^ super asLowercase
       
  2536 
       
  2537     "
       
  2538         'Hello WORLD' asLowercase
       
  2539         (String new:300) asLowercase
       
  2540     "
  2544 !
  2541 !
  2545 
  2542 
  2546 asPackageId
  2543 asPackageId
  2547     "given a package-string as receiver, return a packageId object.
  2544     "given a package-string as receiver, return a packageId object.
  2548      packageIds hide the details of module/directory handling inside the path.
  2545      packageIds hide the details of module/directory handling inside the path.
  2785      */
  2782      */
  2786     if (__qIsStringLike(self)) {
  2783     if (__qIsStringLike(self)) {
  2787         char *cp1 = (char *) __stringVal(self);
  2784         char *cp1 = (char *) __stringVal(self);
  2788         int l1 = __stringSize(self);
  2785         int l1 = __stringSize(self);
  2789         int l2;
  2786         int l2;
  2790         char *cp2;
  2787         char *cp2 = 0;
  2791         int sz;
  2788         int sz;
  2792         OBJ newString;
  2789         OBJ newString;
  2793         char character;
  2790         char character;
  2794 
  2791 
  2795         if (__isCharacter(aStringOrCharacter)) {
  2792         if (__isCharacter(aStringOrCharacter)) {
  2799             character = __intVal(__characterVal(aStringOrCharacter));
  2796             character = __intVal(__characterVal(aStringOrCharacter));
  2800             l2 = 1;
  2797             l2 = 1;
  2801             cp2 = &character;
  2798             cp2 = &character;
  2802         } else if (__isStringLike(aStringOrCharacter)) {
  2799         } else if (__isStringLike(aStringOrCharacter)) {
  2803             l2 = __stringSize(aStringOrCharacter);
  2800             l2 = __stringSize(aStringOrCharacter);
  2804             cp2 = (char *) __stringVal(aStringOrCharacter);
       
  2805         } else
  2801         } else
  2806             goto out;
  2802             goto out;
  2807 
  2803 
  2808         sz = OHDR_SIZE + l1 + l2 + 1;
  2804         sz = OHDR_SIZE + l1 + l2 + 1;
  2809         __qNew(newString, sz);      /* OBJECT ALLOCATION */
  2805         __qNew(newString, sz);      /* OBJECT ALLOCATION */
       
  2806 
       
  2807         cp1 = (char *) __stringVal(self);
       
  2808         if (cp2 == 0) 
       
  2809             cp2 = (char *) __stringVal(aStringOrCharacter);
       
  2810 
  2810         if (newString != nil) {
  2811         if (newString != nil) {
  2811             REGISTER unsigned char *dstp;
  2812             REGISTER unsigned char *dstp;
  2812 
  2813 
  2813             __InstPtr(newString)->o_class = String;
  2814             __InstPtr(newString)->o_class = String;
  2814             __qSTORE(newString, String);
  2815             __qSTORE(newString, String);
  2815             dstp = __stringVal(newString);
  2816             dstp = __stringVal(newString);
  2816 
  2817 
  2817 # ifdef bcopy4
  2818 # if defined(bcopy4)
  2818             /* knowing that allocation is 4-byte aligned and
  2819             /* knowing that allocation is 4-byte aligned and
  2819              * size rounded up to next 4-byte, the first copy
  2820              * size rounded up to next 4-byte, the first copy
  2820              * can be done word-wise.
  2821              * can be done word-wise.
  2821              * that speeds up size-10-string , size-10-string
  2822              * that speeds up size-10-string , size-10-string
  2822              * by 10% on a P5/200.
  2823              * by 10% on a P5/200.
  2826 
  2827 
  2827                 if (l1 & 3) nw++;
  2828                 if (l1 & 3) nw++;
  2828                 bcopy4(cp1, dstp, nw);
  2829                 bcopy4(cp1, dstp, nw);
  2829                 dstp += l1;
  2830                 dstp += l1;
  2830             }
  2831             }
  2831 # else
  2832 # elif defined(FAST_MEMCPY)
  2832 #  ifdef FAST_MEMCPY
       
  2833             memcpy(dstp, cp1, l1);
  2833             memcpy(dstp, cp1, l1);
  2834             dstp += l1;
  2834             dstp += l1;
  2835 #  else
  2835 # else
  2836             while (l1 >= 4) {
  2836             while (l1 >= 4) {
  2837                 *(int *)dstp = *(int *)cp1;
  2837                 *(int *)dstp = *(int *)cp1;
  2838                 dstp += 4; cp1 += 4;
  2838                 dstp += 4; cp1 += 4;
  2839                 l1 -= 4;
  2839                 l1 -= 4;
  2840             }
  2840             }
  2841             while (l1--) *dstp++ = *cp1++;
  2841             while (l1--) *dstp++ = *cp1++;
  2842 #  endif
       
  2843 # endif
  2842 # endif
  2844 
  2843 
  2845 # ifdef bcopy4
  2844 # ifdef bcopy4
  2846             if (((INT)dstp & 3) == 0) {
  2845             if (((INT)dstp & 3) == 0) {
  2847                 int nw = l2 >> 2;
  2846                 int nw = l2 >> 2;