--- a/String.st Tue Mar 28 16:20:49 2017 +0200
+++ b/String.st Tue Mar 28 16:40:46 2017 +0200
@@ -525,7 +525,6 @@
-
!String class methodsFor:'queries'!
defaultPlatformClass
@@ -550,6 +549,8 @@
+
+
!String methodsFor:'accessing'!
at:index
@@ -4312,136 +4313,10 @@
!String methodsFor:'substring searching'!
-indexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
- "redefined as primitive for maximum speed (BM)"
-
- |notFound|
-
-%{ /* STACK:4000 */
-#ifndef __SCHTEAM__
- if (__qIsStringLike(self)
- && __isStringLike(aSubString)
- && (caseSensitive == true)
- && (__isSmallInteger(startIndex))
- && (__intVal(startIndex) > 0)
- ) {
- unsigned char *y = __stringVal(self);
- unsigned char *x = __stringVal(aSubString);
- int m = __stringSize(aSubString);
- int n = __stringSize(self);
-# define MAX_PATTERN_SIZE 128
-# define XSIZE 256
-# define ASIZE 256
-# define MAX(a,b) (a>b ? a : b)
-
- if (m == 0) {
-#if 1
- /* empty string does not match */
- RETURN(__mkSmallInteger(0));
-#else
- /* empty string matches */
- RETURN(startIndex);
-#endif
- }
- if (m <= XSIZE) {
- int i, j;
- static int lastPatternSize = 0;
- static char lastPattern[MAX_PATTERN_SIZE+1] = { 0 };
- static int bmGs[XSIZE+1], bmBc[ASIZE];
-
-# define preBmBc(x, m, bmBc) { \
- int i; \
- \
- for (i = 0; i < ASIZE; ++i) \
- bmBc[i] = m; \
- for (i = 0; i < m - 1; ++i) \
- bmBc[x[i]] = m - i - 1; \
- }
-
-# define suffixes(x, m, suff) { \
- int f, g, i; \
- \
- suff[m - 1] = m; \
- g = m - 1; \
- for (i = m - 2; i >= 0; --i) { \
- if (i > g && suff[i + m - 1 - f] < i - g) \
- suff[i] = suff[i + m - 1 - f]; \
- else { \
- if (i < g) \
- g = i; \
- f = i; \
- while (g >= 0 && x[g] == x[g + m - 1 - f]) \
- --g; \
- suff[i] = f - g; \
- } \
- } \
- }
-
-# define preBmGs(x, m, bmGs) { \
- int i, j, suff[XSIZE]; \
- \
- suffixes(x, m, suff); \
- \
- for (i = 0; i < m; ++i) \
- bmGs[i] = m; \
- j = 0; \
- for (i = m - 1; i >= 0; --i) \
- if (suff[i] == i + 1) \
- for (; j < m - 1 - i; ++j) \
- if (bmGs[j] == m) \
- bmGs[j] = m - 1 - i; \
- for (i = 0; i <= m - 2; ++i) \
- bmGs[m - 1 - suff[i]] = m - 1 - i; \
- }
-
- /* tables only depend on pattern; so we can cache them in case the same string is searched again */
- if ((m == lastPatternSize)
- && (strcmp(lastPattern, x) == 0)) {
- /* tables are still valid */
- // printf("valid: \"%s\"\n", lastPattern);
- } else {
- /* Preprocessing */
- // printf("compute: \"%s\"\n", lastPattern);
- preBmGs(x, m, bmGs);
- preBmBc(x, m, bmBc);
- if (m <= MAX_PATTERN_SIZE) {
- // printf("cache for: \"%s\"\n", lastPattern);
- strcpy(lastPattern, x);
- lastPatternSize = m;
- }
- }
-
- /* Searching */
- j = __intVal(startIndex) - 1;
- while (j <= n - m) {
- for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
- if (i < 0) {
- RETURN (__mkSmallInteger(j+1));
- j += bmGs[0];
- } else {
- int s1 = bmGs[i];
- int s2 = bmBc[y[i + j]] - m + 1 + i;
- j += MAX(s1, s2);
- }
- }
- notFound = true;
- }
- }
-#endif /* ! __SCHTEAM__ */
-%}.
- notFound == true ifTrue:[
- ^ exceptionValue value.
- ].
- ^ self slowIndexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
-
- "Modified: / 05-08-2012 / 12:27:31 / cg"
-!
-
-slowIndexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
+caseInsensitiveIndexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue
"naive search fallback (non-BM).
- Use this for short searchStrings (<= 2)
- or for caseInSensitive searches"
-
+ Private method to speed up caseInSensitive searches"
+
|notFound|
%{
@@ -4451,157 +4326,264 @@
&& (__isSmallInteger(startIndex))
&& (__intVal(startIndex) > 0)
) {
- unsigned char *c_pSelf = __stringVal(self);
- unsigned char *c_substring = __stringVal(aSubString);
- unsigned char *c_pSelfI, *c_pSelfMax;
- int c_lenSelf = __stringSize(self);
- int c_lenSubstring = __stringSize(aSubString);
- int c_idx0Max = c_lenSelf - c_lenSubstring;
- unsigned char c_first;
- unsigned char c_ucFirst;
- unsigned char c_lcFirst;
- unsigned char c_oppositeCaseFirst;
- int i;
-
- if (c_lenSubstring == 0) {
+ unsigned char *c_pSelf = __stringVal(self);
+ unsigned char *c_substring = __stringVal(aSubString);
+ unsigned char *c_pSelfI, *c_pSelfMax;
+ int c_lenSelf = __stringSize(self);
+ int c_lenSubstring = __stringSize(aSubString);
+ int c_idx0Max = c_lenSelf - c_lenSubstring;
+ unsigned char c_first;
+ unsigned char c_oppositeCaseFirst = 0;
+ int i;
+
+ if (c_lenSubstring == 0) {
#if 1
- /* empty string does not match */
- notFound = true;
- goto getOutOfHere;
+ /* empty string does not match */
+ notFound = true;
+ goto getOutOfHere;
#else
- /* empty string matches */
- RETURN(startIndex);
+ /* empty string matches */
+ RETURN(startIndex);
#endif
- }
-
- // searched string's length > string
- if (c_idx0Max < 0) {
- notFound = true;
- goto getOutOfHere;
- }
-
- c_first = c_lcFirst = c_substring[0];
- if (((c_first >= 'A') && (c_first <= 'Z'))
- || ((c_first >= 0xC0) && (c_first <= 0xDE))) {
- c_ucFirst = c_first;
- c_lcFirst = c_oppositeCaseFirst = c_first - 'A' + 'a';
- } else {
- if (((c_first >= 'a') && (c_first <= 'z'))
- || ((c_first >= 0xE0) && (c_first <= 0xFE))) {
- c_lcFirst = c_first;
- c_ucFirst = c_oppositeCaseFirst = c_first - 'a' + 'A';
- }
- }
-
- // idx:
- // 0123456789
-
- // 1234567890 - lenSelf = 10
- // abc - lenSubstring = 3
- // - idx0Max = 7 (last legal startIndex)
-
- i = __intVal(startIndex) - 1;
- c_pSelfI = c_pSelf + i;
- c_pSelfMax = c_pSelf + c_idx0Max;
-
- for (; c_pSelfI <= c_pSelfMax; c_pSelfI++) {
- int j;
- unsigned char c_selfChar;
-
- // find the first char
- c_selfChar = c_pSelfI[0];
- if (c_selfChar != c_first) {
- if (caseSensitive == true) continue;
- if (c_selfChar != c_oppositeCaseFirst) {
+ }
+
+ // searched string's length > string
+ if (c_idx0Max < 0) {
+ notFound = true;
+ goto getOutOfHere;
+ }
+
+ c_first = c_substring[0];
+ if (((c_first >= 'A') && (c_first <= 'Z'))
+ || ((c_first >= 0xC0) && (c_first <= 0xDE) && (c_first != 0xD7))) {
+ c_oppositeCaseFirst = c_first - 'A' + 'a';
+ } else {
+ if (((c_first >= 'a') && (c_first <= 'z'))
+ || ((c_first >= 0xE0) && (c_first <= 0xFE) && (c_first != 0xF7))) {
+ c_oppositeCaseFirst = c_first - 'a' + 'A';
+ }
+ }
+
+ // idx:
+ // 0123456789
+
+ // 1234567890 - lenSelf = 10
+ // abc - lenSubstring = 3
+ // - idx0Max = 7 (last legal startIndex)
+
+ i = __intVal(startIndex) - 1;
+ c_pSelfI = c_pSelf + i;
+ c_pSelfMax = c_pSelf + c_idx0Max;
+
+ for (; c_pSelfI <= c_pSelfMax; c_pSelfI++) {
+ int j;
+ unsigned char c_selfChar;
+
+ // find the first char
+ c_selfChar = c_pSelfI[0];
+ if (c_selfChar != c_first && c_selfChar != c_oppositeCaseFirst) {
searchNext: ;
- continue;
- }
- }
-
- // first char matches
- // compare rest
- for (j=1; j<c_lenSubstring; j++) {
- unsigned char c_selfChar, c_subChar, c_lcSubChar, c_ucSubChar;
-
- c_subChar = c_substring[j];
- c_selfChar = c_pSelfI[j];
-
- if (c_selfChar == c_subChar) continue;
- if (caseSensitive == true) goto searchNext;
-
- c_lcSubChar = c_subChar;
- if (((c_lcSubChar >= 'A') && (c_lcSubChar <= 'Z'))
- || ((c_lcSubChar >= 0xC0) && (c_lcSubChar <= 0xDE))) {
- c_lcSubChar = c_subChar - 'A' + 'a';
- if (c_selfChar != c_lcSubChar) goto searchNext;
- } else {
- if (((c_lcSubChar >= 'a') && (c_lcSubChar <= 'z'))
- || ((c_lcSubChar >= 0xE0) && (c_lcSubChar <= 0xFE))) {
- c_ucSubChar = c_subChar - 'a' + 'A';
- if (c_selfChar != c_ucSubChar) goto searchNext;
- } else {
- goto searchNext;
- }
- }
- }
- // if we arrive here, we have a match at i
- RETURN( __mkSmallInteger( c_pSelfI - c_pSelf + 1 ) );
- }
- notFound = true;
+ continue;
+ }
+
+ // first char matches
+ // compare rest
+ for (j=1; j<c_lenSubstring; j++) {
+ unsigned char c_subChar = c_substring[j];
+ unsigned char c_selfChar = c_pSelfI[j];
+
+ if (c_selfChar == c_subChar) continue;
+
+ if (((c_subChar >= 'A') && (c_subChar <= 'Z'))
+ || ((c_subChar >= 0xC0) && (c_subChar <= 0xDE) && (c_subChar != 0xD7))) {
+ unsigned char c_lcSubChar = c_subChar - 'A' + 'a';
+ if (c_selfChar != c_lcSubChar) goto searchNext;
+ } else {
+ if (((c_subChar >= 'a') && (c_subChar <= 'z'))
+ || ((c_subChar >= 0xE0) && (c_subChar <= 0xFE) && (c_subChar != 0xF7))) {
+ unsigned char c_ucSubChar = c_subChar - 'a' + 'A';
+ if (c_selfChar != c_ucSubChar) goto searchNext;
+ } else {
+ goto searchNext;
+ }
+ }
+ }
+ // if we arrive here, we have a match at i
+ RETURN( __mkSmallInteger( c_pSelfI - c_pSelf + 1 ) );
+ }
+ notFound = true;
}
getOutOfHere: ;
#endif /* ! __SCHTEAM__ */
%}.
- "/ arrive here if either not found, or invalid arguments
+
notFound == true ifTrue:[
- ^ exceptionValue value.
+ ^ exceptionValue value.
].
- ^ super indexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
+
+ "/ arrive here aSubstring is a UnicodeString or arguments are invalid
+ ^ super
+ indexOfSubCollection:aSubString
+ startingAt:startIndex
+ ifAbsent:exceptionValue
+ caseSensitive:false
"
- 'abcdefg' slowIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil caseSensitive:false
- 'abcdefg' slowIndexOfSubCollection:'bcd' startingAt:1 ifAbsent:nil caseSensitive:false
- 'abcdefg' slowIndexOfSubCollection:'cde' startingAt:1 ifAbsent:nil caseSensitive:false
- 'abcabcg' slowIndexOfSubCollection:'abc' startingAt:2 ifAbsent:nil caseSensitive:false
-
- 'ABCDEFG' slowIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil caseSensitive:false
- 'ABCDEFG' slowIndexOfSubCollection:'Abc' startingAt:1 ifAbsent:nil caseSensitive:false
- 'ABCDEFG' slowIndexOfSubCollection:'aBC' startingAt:1 ifAbsent:nil caseSensitive:false
- 'ABCDEFG' slowIndexOfSubCollection:'ABC' startingAt:1 ifAbsent:nil caseSensitive:false
-
- 'ABCDEFG' slowIndexOfSubCollection:'a' startingAt:1 ifAbsent:nil caseSensitive:false
- 'ABCDEFG' slowIndexOfSubCollection:'A' startingAt:1 ifAbsent:nil caseSensitive:false
-
- 'ABCDEFG' slowIndexOfSubCollection:'bcd' startingAt:1 ifAbsent:nil caseSensitive:false
- 'ABCDEFG' slowIndexOfSubCollection:'cde' startingAt:1 ifAbsent:nil caseSensitive:false
- 'ABCABCG' slowIndexOfSubCollection:'abc' startingAt:2 ifAbsent:nil caseSensitive:false
-
- '1234567890' slowIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil caseSensitive:false
- '1234567890' slowIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil caseSensitive:false
- '1234567890' slowIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil caseSensitive:true
- '1234567890' slowIndexOfSubCollection:'234' startingAt:1 ifAbsent:nil caseSensitive:true
- '1234567890' slowIndexOfSubCollection:'345' startingAt:1 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:2 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:3 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:4 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:5 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:6 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:7 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:8 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:9 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:10 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'123' startingAt:11 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'231' startingAt:1 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'231' startingAt:3 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'231' startingAt:5 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'231' startingAt:6 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'231' startingAt:8 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'231' startingAt:9 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'2310' startingAt:6 ifAbsent:nil caseSensitive:true
- '1231231231' slowIndexOfSubCollection:'2310' startingAt:8 ifAbsent:nil caseSensitive:true
+ 'abcdefg' caseInsensitiveIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil
+ 'abcdefg' caseInsensitiveIndexOfSubCollection:'bcd' startingAt:1 ifAbsent:nil
+ 'abcdefg' caseInsensitiveIndexOfSubCollection:'cde' startingAt:1 ifAbsent:nil
+ 'abcabcg' caseInsensitiveIndexOfSubCollection:'abc' startingAt:2 ifAbsent:nil
+
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'Abc' startingAt:1 ifAbsent:nil
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'aBC' startingAt:1 ifAbsent:nil
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'ABC' startingAt:1 ifAbsent:nil
+
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'a' startingAt:1 ifAbsent:nil
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'A' startingAt:1 ifAbsent:nil
+
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'bcd' startingAt:1 ifAbsent:nil
+ 'ABCDEFG' caseInsensitiveIndexOfSubCollection:'cde' startingAt:1 ifAbsent:nil
+ 'ABCABCG' caseInsensitiveIndexOfSubCollection:'abc' startingAt:2 ifAbsent:nil
+
+ '1234567890' caseInsensitiveIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil
+ '1234567890' caseInsensitiveIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil
"
+
+ "Created: / 28-03-2017 / 15:33:50 / stefan"
+ "Modified (comment): / 28-03-2017 / 16:35:16 / stefan"
+!
+
+indexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
+ "redefined as primitive for maximum speed (BM)"
+
+ |notFound|
+
+ caseSensitive ifFalse:[
+ ^ self caseInsensitiveIndexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue.
+ ].
+
+%{ /* STACK:4000 */
+#ifndef __SCHTEAM__
+ if (__qIsStringLike(self)
+ && __isStringLike(aSubString)
+ && (__isSmallInteger(startIndex))
+ && (__intVal(startIndex) > 0)
+ ) {
+ unsigned char *y = __stringVal(self);
+ unsigned char *x = __stringVal(aSubString);
+ int m = __stringSize(aSubString);
+ int n = __stringSize(self);
+# define MAX_PATTERN_SIZE 128
+# define XSIZE 256
+# define ASIZE 256
+# define MAX(a,b) (a>b ? a : b)
+
+ if (m == 0) {
+#if 1
+ /* empty string does not match */
+ RETURN(__mkSmallInteger(0));
+#else
+ /* empty string matches */
+ RETURN(startIndex);
+#endif
+ }
+ if (m <= XSIZE) {
+ int i, j;
+ static int lastPatternSize = 0;
+ static char lastPattern[MAX_PATTERN_SIZE+1] = { 0 };
+ static int bmGs[XSIZE+1], bmBc[ASIZE];
+
+# define preBmBc(x, m, bmBc) { \
+ int i; \
+ \
+ for (i = 0; i < ASIZE; ++i) \
+ bmBc[i] = m; \
+ for (i = 0; i < m - 1; ++i) \
+ bmBc[x[i]] = m - i - 1; \
+ }
+
+# define suffixes(x, m, suff) { \
+ int f, g, i; \
+ \
+ suff[m - 1] = m; \
+ g = m - 1; \
+ for (i = m - 2; i >= 0; --i) { \
+ if (i > g && suff[i + m - 1 - f] < i - g) \
+ suff[i] = suff[i + m - 1 - f]; \
+ else { \
+ if (i < g) \
+ g = i; \
+ f = i; \
+ while (g >= 0 && x[g] == x[g + m - 1 - f]) \
+ --g; \
+ suff[i] = f - g; \
+ } \
+ } \
+ }
+
+# define preBmGs(x, m, bmGs) { \
+ int i, j, suff[XSIZE]; \
+ \
+ suffixes(x, m, suff); \
+ \
+ for (i = 0; i < m; ++i) \
+ bmGs[i] = m; \
+ j = 0; \
+ for (i = m - 1; i >= 0; --i) \
+ if (suff[i] == i + 1) \
+ for (; j < m - 1 - i; ++j) \
+ if (bmGs[j] == m) \
+ bmGs[j] = m - 1 - i; \
+ for (i = 0; i <= m - 2; ++i) \
+ bmGs[m - 1 - suff[i]] = m - 1 - i; \
+ }
+
+ /* tables only depend on pattern; so we can cache them in case the same string is searched again */
+ if ((m == lastPatternSize)
+ && (strcmp(lastPattern, x) == 0)) {
+ /* tables are still valid */
+ // printf("valid: \"%s\"\n", lastPattern);
+ } else {
+ /* Preprocessing */
+ // printf("compute: \"%s\"\n", lastPattern);
+ preBmGs(x, m, bmGs);
+ preBmBc(x, m, bmBc);
+ if (m <= MAX_PATTERN_SIZE) {
+ // printf("cache for: \"%s\"\n", lastPattern);
+ strcpy(lastPattern, x);
+ lastPatternSize = m;
+ }
+ }
+
+ /* Searching */
+ j = __intVal(startIndex) - 1;
+ while (j <= n - m) {
+ for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
+ if (i < 0) {
+ RETURN (__mkSmallInteger(j+1));
+ j += bmGs[0];
+ } else {
+ int s1 = bmGs[i];
+ int s2 = bmBc[y[i + j]] - m + 1 + i;
+ j += MAX(s1, s2);
+ }
+ }
+ notFound = true;
+ }
+ }
+#endif /* ! __SCHTEAM__ */
+%}.
+ notFound == true ifTrue:[
+ ^ exceptionValue value.
+ ].
+
+ "/ arrive here aSubstring is a UnicodeString or arguments are invalid
+ ^ super indexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:true
+
+ "Modified: / 05-08-2012 / 12:27:31 / cg"
+ "Modified (comment): / 28-03-2017 / 16:31:54 / stefan"
! !
!String methodsFor:'testing'!