2465 |
2467 |
2466 ^ self copy changeClassTo:ImmutableString |
2468 ^ self copy changeClassTo:ImmutableString |
2467 ! |
2469 ! |
2468 |
2470 |
2469 asLowercase |
2471 asLowercase |
2470 "a tuned version. Some apps call this very heavily" |
2472 "a tuned version for Strings with size < 255. Some apps call this very heavily. |
2471 |
2473 We can do this for 8-bit strings, since the mapping is well known and lowercase chars |
2472 %{ /* NOCONTEXT */ |
2474 fit in one byte also." |
2473 #if 0 |
2475 |
|
2476 %{ /* NOCONTEXT */ |
2474 #ifndef __SCHTEAM__ |
2477 #ifndef __SCHTEAM__ |
2475 REGISTER OBJ slf = self; |
2478 REGISTER OBJ slf = self; |
2476 |
2479 |
2477 if (__qClass(slf) == String) { |
2480 if (__isStringLike(slf)) { |
2478 char quickBuffer[256]; |
2481 char quickBuffer[256]; |
2479 int sz = __stringSize(slf); |
2482 int sz = __stringSize(slf); |
2480 |
2483 |
2481 if (sz < (sizeof(quickBuffer)-1)) { |
2484 if (sz < (sizeof(quickBuffer)-1)) { |
2482 REGISTER int i = 0; |
2485 REGISTER int i = 0; |
2483 int anyChange = 0; |
2486 int anyChange = 0; |
2484 REGISTER unsigned char *cp = __stringVal(slf); |
2487 REGISTER unsigned char *cp = __stringVal(slf); |
2485 |
2488 |
2486 // fast advance |
2489 // fast advance |
2487 // all uppercase chars are in the ranges 0x41 .. 0x5A (A..Z) |
2490 // all uppercase chars are in the ranges 0x41 .. 0x5A (A..Z) |
2488 // or 0xC0 .. 0xDF. |
2491 // or 0xC0 .. 0xDF. |
2489 // I.e. they have the 0x20 bit clear. |
2492 // I.e. they have the 0x20 bit clear. |
2490 // Thus, we can fast skip over lowercase, spaces and some puctuation, |
2493 // Thus, we can fast skip over lowercase, spaces and some punctuation, |
2491 // if all bytes of a word have the x20 bit set. |
2494 // if all bytes of a word have the x20 bit set. |
2492 // |
2495 |
2493 #if __POINTER_SIZE__ == 8 |
2496 #if __POINTER_SIZE__ == 8 |
2494 for (; i<(sz-8); i+=8) { |
2497 for (; i < (sz-8); i += 8) { |
2495 unsigned INT eightChars; |
2498 unsigned INT eightChars = *(unsigned INT *)(cp+i); |
2496 |
2499 if ((eightChars & 0x2020202020202020ULL) != 0x2020202020202020ULL) goto convert; |
2497 eightChars = *((unsigned INT *)(cp)); |
2500 *(unsigned INT *)(quickBuffer+i) = eightChars; |
2498 if ((eightChars & 0x2020202020202020ULL) != 0x2020202020202020ULL) break; |
2501 } |
2499 *((unsigned INT *)(&quickBuffer[i])) = eightChars; |
|
2500 } |
|
2501 #endif |
2502 #endif |
2502 for (; i<(sz-4); i+=4) { |
2503 for (; i < (sz-4); i += 4) { |
2503 unsigned int fourChars; |
2504 unsigned int fourChars = *(unsigned int *)(cp+i); |
2504 |
2505 if ((fourChars & 0x20202020U) != 0x20202020U) break; |
2505 fourChars = *((unsigned int *)(cp)); |
2506 *(unsigned int *)(quickBuffer+i) = fourChars; |
2506 if ((fourChars & 0x20202020U) != 0x20202020U) break; |
2507 } |
2507 *((unsigned int *)(&quickBuffer[i])) = fourChars; |
2508 convert: |
2508 } |
2509 for (; i<sz; i++) { |
2509 |
2510 unsigned char ch = cp[i]; |
2510 for (; i<sz; i++) { |
2511 |
2511 unsigned char ch = cp[i]; |
2512 quickBuffer[i] = ch; |
2512 |
2513 if ((ch & 0x60) == 0x40) { |
2513 quickBuffer[i] = ch; |
2514 if (ch >= 'A' && ch <= 'Z') { |
2514 if ((ch & 0x60) == 0x40) { |
2515 quickBuffer[i] = ch - 'A' + 'a'; |
2515 if (ch >= 'A') { |
2516 anyChange = 1; |
2516 if (ch <= 'Z') { |
2517 } else { |
2517 quickBuffer[i] = ch - 'A' + 'a'; |
2518 // deal with national latin1 characters |
2518 anyChange = 1; |
2519 if (ch >= 0xC0 && ch <= 0xDE && ch != 0xD7) { |
2519 } else { |
2520 quickBuffer[i] = ch + 0x20; |
2520 // deal with national latin1 characters |
2521 anyChange = 1; |
2521 if (ch >= 0xC0) { |
2522 } |
2522 if (ch <= 0xDE) { |
2523 } |
2523 if (ch != 0xD7) { |
2524 } |
2524 quickBuffer[i] = ch + 0x20; |
2525 } |
2525 anyChange = 1; |
2526 if (! anyChange) { |
2526 } |
2527 RETURN(slf); |
2527 } |
2528 } |
2528 } |
2529 quickBuffer[i] = '\0'; |
2529 } |
2530 RETURN (__MKSTRING_L(quickBuffer, i)); |
2530 } |
2531 } |
2531 } |
|
2532 } |
|
2533 quickBuffer[i] = '\0'; |
|
2534 if (! anyChange) { |
|
2535 RETURN(slf); |
|
2536 } |
|
2537 RETURN (__MKSTRING_L(quickBuffer, i)); |
|
2538 } |
|
2539 } |
2532 } |
2540 #endif /* ! __SCHTEAM__ */ |
2533 #endif /* ! __SCHTEAM__ */ |
2541 #endif |
|
2542 %}. |
2534 %}. |
2543 ^ super asLowercase |
2535 ^ super asLowercase |
|
2536 |
|
2537 " |
|
2538 'Hello WORLD' asLowercase |
|
2539 (String new:300) asLowercase |
|
2540 " |
2544 ! |
2541 ! |
2545 |
2542 |
2546 asPackageId |
2543 asPackageId |
2547 "given a package-string as receiver, return a packageId object. |
2544 "given a package-string as receiver, return a packageId object. |
2548 packageIds hide the details of module/directory handling inside the path. |
2545 packageIds hide the details of module/directory handling inside the path. |
2799 character = __intVal(__characterVal(aStringOrCharacter)); |
2796 character = __intVal(__characterVal(aStringOrCharacter)); |
2800 l2 = 1; |
2797 l2 = 1; |
2801 cp2 = &character; |
2798 cp2 = &character; |
2802 } else if (__isStringLike(aStringOrCharacter)) { |
2799 } else if (__isStringLike(aStringOrCharacter)) { |
2803 l2 = __stringSize(aStringOrCharacter); |
2800 l2 = __stringSize(aStringOrCharacter); |
2804 cp2 = (char *) __stringVal(aStringOrCharacter); |
|
2805 } else |
2801 } else |
2806 goto out; |
2802 goto out; |
2807 |
2803 |
2808 sz = OHDR_SIZE + l1 + l2 + 1; |
2804 sz = OHDR_SIZE + l1 + l2 + 1; |
2809 __qNew(newString, sz); /* OBJECT ALLOCATION */ |
2805 __qNew(newString, sz); /* OBJECT ALLOCATION */ |
|
2806 |
|
2807 cp1 = (char *) __stringVal(self); |
|
2808 if (cp2 == 0) |
|
2809 cp2 = (char *) __stringVal(aStringOrCharacter); |
|
2810 |
2810 if (newString != nil) { |
2811 if (newString != nil) { |
2811 REGISTER unsigned char *dstp; |
2812 REGISTER unsigned char *dstp; |
2812 |
2813 |
2813 __InstPtr(newString)->o_class = String; |
2814 __InstPtr(newString)->o_class = String; |
2814 __qSTORE(newString, String); |
2815 __qSTORE(newString, String); |
2815 dstp = __stringVal(newString); |
2816 dstp = __stringVal(newString); |
2816 |
2817 |
2817 # ifdef bcopy4 |
2818 # if defined(bcopy4) |
2818 /* knowing that allocation is 4-byte aligned and |
2819 /* knowing that allocation is 4-byte aligned and |
2819 * size rounded up to next 4-byte, the first copy |
2820 * size rounded up to next 4-byte, the first copy |
2820 * can be done word-wise. |
2821 * can be done word-wise. |
2821 * that speeds up size-10-string , size-10-string |
2822 * that speeds up size-10-string , size-10-string |
2822 * by 10% on a P5/200. |
2823 * by 10% on a P5/200. |