--- a/String.st Fri May 06 04:22:40 2016 +0200
+++ b/String.st Fri May 06 04:52:00 2016 +0200
@@ -3978,116 +3978,283 @@
&& (__isSmallInteger(startIndex))
&& (__intVal(startIndex) > 0)
) {
- unsigned char *y = __stringVal(self);
- unsigned char *x = __stringVal(aSubString);
- int m = __stringSize(aSubString);
- int n = __stringSize(self);
+ unsigned char *y = __stringVal(self);
+ unsigned char *x = __stringVal(aSubString);
+ int m = __stringSize(aSubString);
+ int n = __stringSize(self);
# define MAX_PATTERN_SIZE 128
# define XSIZE 256
# define ASIZE 256
# define MAX(a,b) (a>b ? a : b)
- if (m == 0) {
+ if (m == 0) {
#if 1
- /* empty string does not match */
- RETURN(__mkSmallInteger(0));
+ /* empty string does not match */
+ RETURN(__mkSmallInteger(0));
#else
- /* empty string matches */
- RETURN(startIndex);
+ /* empty string matches */
+ RETURN(startIndex);
#endif
- }
- if (m <= XSIZE) {
- int i, j;
- static int lastPatternSize = 0;
- static char lastPattern[MAX_PATTERN_SIZE+1] = { 0 };
- static int bmGs[XSIZE+1], bmBc[ASIZE];
+ }
+ if (m <= XSIZE) {
+ int i, j;
+ static int lastPatternSize = 0;
+ static char lastPattern[MAX_PATTERN_SIZE+1] = { 0 };
+ static int bmGs[XSIZE+1], bmBc[ASIZE];
# define preBmBc(x, m, bmBc) { \
- int i; \
- \
- for (i = 0; i < ASIZE; ++i) \
- bmBc[i] = m; \
- for (i = 0; i < m - 1; ++i) \
- bmBc[x[i]] = m - i - 1; \
- }
+ int i; \
+ \
+ for (i = 0; i < ASIZE; ++i) \
+ bmBc[i] = m; \
+ for (i = 0; i < m - 1; ++i) \
+ bmBc[x[i]] = m - i - 1; \
+ }
# define suffixes(x, m, suff) { \
- int f, g, i; \
- \
- suff[m - 1] = m; \
- g = m - 1; \
- for (i = m - 2; i >= 0; --i) { \
- if (i > g && suff[i + m - 1 - f] < i - g) \
- suff[i] = suff[i + m - 1 - f]; \
- else { \
- if (i < g) \
- g = i; \
- f = i; \
- while (g >= 0 && x[g] == x[g + m - 1 - f]) \
- --g; \
- suff[i] = f - g; \
- } \
- } \
- }
+ int f, g, i; \
+ \
+ suff[m - 1] = m; \
+ g = m - 1; \
+ for (i = m - 2; i >= 0; --i) { \
+ if (i > g && suff[i + m - 1 - f] < i - g) \
+ suff[i] = suff[i + m - 1 - f]; \
+ else { \
+ if (i < g) \
+ g = i; \
+ f = i; \
+ while (g >= 0 && x[g] == x[g + m - 1 - f]) \
+ --g; \
+ suff[i] = f - g; \
+ } \
+ } \
+ }
# define preBmGs(x, m, bmGs) { \
- int i, j, suff[XSIZE]; \
- \
- suffixes(x, m, suff); \
- \
- for (i = 0; i < m; ++i) \
- bmGs[i] = m; \
- j = 0; \
- for (i = m - 1; i >= 0; --i) \
- if (suff[i] == i + 1) \
- for (; j < m - 1 - i; ++j) \
- if (bmGs[j] == m) \
- bmGs[j] = m - 1 - i; \
- for (i = 0; i <= m - 2; ++i) \
- bmGs[m - 1 - suff[i]] = m - 1 - i; \
- }
-
- /* tables only depend on pattern; so we can cache them in case the same string is searched again */
- if ((m == lastPatternSize)
- && (strcmp(lastPattern, x) == 0)) {
- /* tables are still valid */
- // printf("valid: \"%s\"\n", lastPattern);
- } else {
- /* Preprocessing */
- // printf("compute: \"%s\"\n", lastPattern);
- preBmGs(x, m, bmGs);
- preBmBc(x, m, bmBc);
- if (m <= MAX_PATTERN_SIZE) {
- // printf("cache for: \"%s\"\n", lastPattern);
- strcpy(lastPattern, x);
- lastPatternSize = m;
- }
- }
-
- /* Searching */
- j = __intVal(startIndex) - 1;
- while (j <= n - m) {
- for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
- if (i < 0) {
- RETURN (__mkSmallInteger(j+1));
- j += bmGs[0];
- } else {
- int s1 = bmGs[i];
- int s2 = bmBc[y[i + j]] - m + 1 + i;
- j += MAX(s1, s2);
- }
- }
- notFound = true;
- }
+ int i, j, suff[XSIZE]; \
+ \
+ suffixes(x, m, suff); \
+ \
+ for (i = 0; i < m; ++i) \
+ bmGs[i] = m; \
+ j = 0; \
+ for (i = m - 1; i >= 0; --i) \
+ if (suff[i] == i + 1) \
+ for (; j < m - 1 - i; ++j) \
+ if (bmGs[j] == m) \
+ bmGs[j] = m - 1 - i; \
+ for (i = 0; i <= m - 2; ++i) \
+ bmGs[m - 1 - suff[i]] = m - 1 - i; \
+ }
+
+ /* tables only depend on pattern; so we can cache them in case the same string is searched again */
+ if ((m == lastPatternSize)
+ && (strcmp(lastPattern, x) == 0)) {
+ /* tables are still valid */
+ // printf("valid: \"%s\"\n", lastPattern);
+ } else {
+ /* Preprocessing */
+ // printf("compute: \"%s\"\n", lastPattern);
+ preBmGs(x, m, bmGs);
+ preBmBc(x, m, bmBc);
+ if (m <= MAX_PATTERN_SIZE) {
+ // printf("cache for: \"%s\"\n", lastPattern);
+ strcpy(lastPattern, x);
+ lastPatternSize = m;
+ }
+ }
+
+ /* Searching */
+ j = __intVal(startIndex) - 1;
+ while (j <= n - m) {
+ for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
+ if (i < 0) {
+ RETURN (__mkSmallInteger(j+1));
+ j += bmGs[0];
+ } else {
+ int s1 = bmGs[i];
+ int s2 = bmBc[y[i + j]] - m + 1 + i;
+ j += MAX(s1, s2);
+ }
+ }
+ notFound = true;
+ }
}
#endif /* ! __SCHTEAM__ */
%}.
notFound == true ifTrue:[
- ^ exceptionValue value.
+ ^ exceptionValue value.
+ ].
+ ^ self slowIndexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
+
+ "Modified: / 05-08-2012 / 12:27:31 / cg"
+!
+
+slowIndexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
+ "naive search fallback (non-BM).
+ Use this for short searchStrings (<= 2)
+ or for caseSensitive searches"
+
+ |notFound|
+
+%{
+#ifndef __SCHTEAM__
+ if (__qIsStringLike(self)
+ && __isStringLike(aSubString)
+ && (__isSmallInteger(startIndex))
+ && (__intVal(startIndex) > 0)
+ ) {
+ unsigned char *_pSelf = __stringVal(self);
+ unsigned char *_substring = __stringVal(aSubString);
+ unsigned char *_pSelfI, *_pSelfMax;
+ int _lenSelf = __stringSize(self);
+ int _lenSubstring = __stringSize(aSubString);
+ int _idx0Max = _lenSelf - _lenSubstring;
+ unsigned char _first;
+ unsigned char _ucFirst;
+ unsigned char _lcFirst;
+ unsigned char _oppositeCaseFirst;
+ int i;
+
+ if (_lenSubstring == 0) {
+#if 1
+ /* empty string does not match */
+ notFound = true;
+ goto getOutOfHere;
+#else
+ /* empty string matches */
+ RETURN(startIndex);
+#endif
+ }
+
+ // searched string's length > string
+ if (_idx0Max < 0) {
+ notFound = true;
+ goto getOutOfHere;
+ }
+
+ _first = _lcFirst = _substring[0];
+ if (((_first >= 'A') && (_first <= 'Z'))
+ || ((_first >= 0xC0) && (_first <= 0xDE))) {
+ _ucFirst = _first;
+ _lcFirst = _oppositeCaseFirst = _first - 'A' + 'a';
+ } else {
+ if (((_first >= 'a') && (_first <= 'z'))
+ || ((_first >= 0xE0) && (_first <= 0xFE))) {
+ _lcFirst = _first;
+ _ucFirst = _oppositeCaseFirst = _first - 'a' + 'A';
+ }
+ }
+
+ // idx:
+ // 0123456789
+
+ // 1234567890 - lenSelf = 10
+ // abc - lenSubstring = 3
+ // - idx0Max = 7 (last legal startIndex)
+
+ i = __intVal(startIndex) - 1;
+ _pSelfI = _pSelf + i;
+ _pSelfMax = _pSelf + _idx0Max;
+
+ for (; _pSelfI <= _pSelfMax; _pSelfI++) {
+ int j;
+ unsigned char _selfChar;
+
+ // find the first char
+ _selfChar = _pSelfI[0];
+ if (_selfChar != _first) {
+ if (caseSensitive == true) continue;
+ if (_selfChar != _oppositeCaseFirst) {
+searchNext: ;
+ continue;
+ }
+ }
+
+ // first char matches
+ // compare rest
+ for (j=1; j<_lenSubstring; j++) {
+ char _selfChar, _subChar, _lcSubChar, _ucSubChar;
+
+ _subChar = _substring[j];
+ _selfChar = _pSelfI[j];
+
+ if (_selfChar == _subChar) continue;
+ if (caseSensitive == true) goto searchNext;
+
+ _lcSubChar = _subChar;
+ if (((_lcSubChar >= 'A') && (_lcSubChar <= 'Z'))
+ || ((_lcSubChar >= 0xC0) && (_lcSubChar <= 0xDE))) {
+ _lcSubChar = _subChar - 'A' + 'a';
+ if (_selfChar != _lcSubChar) goto searchNext;
+ } else {
+ if (((_lcSubChar >= 'a') && (_lcSubChar <= 'z'))
+ || ((_lcSubChar >= 0xE0) && (_lcSubChar <= 0xFE))) {
+ _ucSubChar = _subChar - 'a' + 'A';
+ if (_selfChar != _ucSubChar) goto searchNext;
+ } else {
+ goto searchNext;
+ }
+ }
+ }
+ // if we arrive here, we have a match at i
+ RETURN( __mkSmallInteger( _pSelfI - _pSelf + 1 ) );
+ }
+ notFound = true;
+ }
+
+ getOutOfHere: ;
+#endif /* ! __SCHTEAM__ */
+%}.
+ "/ arrive here if either not found, or invalid arguments
+ notFound == true ifTrue:[
+ ^ exceptionValue value.
].
^ super indexOfSubCollection:aSubString startingAt:startIndex ifAbsent:exceptionValue caseSensitive:caseSensitive
- "Modified: / 05-08-2012 / 12:27:31 / cg"
+ "
+ 'abcdefg' slowIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'abcdefg' slowIndexOfSubCollection:'bcd' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'abcdefg' slowIndexOfSubCollection:'cde' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'abcabcg' slowIndexOfSubCollection:'abc' startingAt:2 ifAbsent:nil caseSensitive:false
+
+ 'ABCDEFG' slowIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'ABCDEFG' slowIndexOfSubCollection:'Abc' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'ABCDEFG' slowIndexOfSubCollection:'aBC' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'ABCDEFG' slowIndexOfSubCollection:'ABC' startingAt:1 ifAbsent:nil caseSensitive:false
+
+ 'ABCDEFG' slowIndexOfSubCollection:'a' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'ABCDEFG' slowIndexOfSubCollection:'A' startingAt:1 ifAbsent:nil caseSensitive:false
+
+ 'ABCDEFG' slowIndexOfSubCollection:'bcd' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'ABCDEFG' slowIndexOfSubCollection:'cde' startingAt:1 ifAbsent:nil caseSensitive:false
+ 'ABCABCG' slowIndexOfSubCollection:'abc' startingAt:2 ifAbsent:nil caseSensitive:false
+
+ '1234567890' slowIndexOfSubCollection:'abc' startingAt:1 ifAbsent:nil caseSensitive:false
+ '1234567890' slowIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil caseSensitive:false
+ '1234567890' slowIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil caseSensitive:true
+ '1234567890' slowIndexOfSubCollection:'234' startingAt:1 ifAbsent:nil caseSensitive:true
+ '1234567890' slowIndexOfSubCollection:'345' startingAt:1 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:1 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:2 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:3 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:4 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:5 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:6 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:7 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:8 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:9 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:10 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'123' startingAt:11 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'231' startingAt:1 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'231' startingAt:3 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'231' startingAt:5 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'231' startingAt:6 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'231' startingAt:8 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'231' startingAt:9 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'2310' startingAt:6 ifAbsent:nil caseSensitive:true
+ '1231231231' slowIndexOfSubCollection:'2310' startingAt:8 ifAbsent:nil caseSensitive:true
+ "
! !
!String methodsFor:'testing'!