String.st
branchjv
changeset 17910 8d796ca8bd1d
parent 17909 0ab1deab8e9c
child 17911 a99f15c5efa5
equal deleted inserted replaced
17909:0ab1deab8e9c 17910:8d796ca8bd1d
   712 
   712 
   713 identityIndexOf:aCharacter
   713 identityIndexOf:aCharacter
   714     "return the index of the first occurrences of the argument, aCharacter
   714     "return the index of the first occurrences of the argument, aCharacter
   715      in the receiver or 0 if not found - reimplemented here for speed."
   715      in the receiver or 0 if not found - reimplemented here for speed."
   716 
   716 
   717 %{  /* NOCONTEXT */
   717     ^ self indexOf:aCharacter
   718 
       
   719     REGISTER unsigned char *cp;
       
   720     REGISTER unsigned byteValue;
       
   721 #ifdef FAST_MEMCHR
       
   722     unsigned char *ncp;
       
   723 #else
       
   724     REGISTER INT index;
       
   725 #endif
       
   726     REGISTER int lastIdx;
       
   727     OBJ cls;
       
   728 
       
   729     if (__isCharacter(aCharacter)) {
       
   730 	byteValue = __intVal(__characterVal(aCharacter));
       
   731 	if (byteValue <= 0xFF) {
       
   732 	    cp = __stringVal(self);
       
   733 	    lastIdx = __stringSize(self);
       
   734 	    if ((cls = __qClass(self)) != String) {
       
   735 		int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
       
   736 
       
   737 		cp += numInstBytes;
       
   738 		lastIdx -= numInstBytes;
       
   739 	    }
       
   740 
       
   741 #ifdef FAST_MEMCHR
       
   742 	    ncp = (unsigned char *) memchr(cp, byteValue, lastIdx);
       
   743 	    if (ncp) {
       
   744 		RETURN ( __mkSmallInteger(ncp - cp + 1) );
       
   745 	    }
       
   746 #else
       
   747 	    for (index=1; index <= lastIdx; index++) {
       
   748 		if (*cp++ == byteValue) { RETURN ( __mkSmallInteger(index) ); }
       
   749 	   }
       
   750 #endif
       
   751 	}
       
   752 	RETURN ( __mkSmallInteger(0));
       
   753     }
       
   754     /* with identity compares, only characters can be in myself */
       
   755     RETURN ( __mkSmallInteger(0));
       
   756 %}.
       
   757     ^ self primitiveFailed
       
   758 
   718 
   759     "
   719     "
   760      'hello world' identityIndexOf:(Character space)
   720      'hello world' identityIndexOf:(Character space)
   761      'hello world' identityIndexOf:$d
   721      'hello world' identityIndexOf:$d
   762      'hello world' identityIndexOf:1
   722      'hello world' identityIndexOf:1
   763      #[0 0 1 0 0] asString identityIndexOf:(Character value:1)
   723      #[0 0 1 0 0] asString identityIndexOf:(Character value:1)
   764      #[0 0 1 0 0] asString identityIndexOf:(Character value:0)
   724      #[0 0 1 0 0] asString identityIndexOf:(Character value:0)
   765     "
   725     "
   766 !
   726 
   767 
   727     "Modified: / 10-01-2012 / 17:07:12 / cg"
   768 includes:aCharacter
   728 !
   769     "return true if the argument, aCharacter is included in the receiver
   729 
   770      - reimplemented here for speed"
   730 identityIndexOf:aCharacter startingAt:index
   771 
   731     "return the index of the first occurrences of the argument, aCharacter
   772 %{  /* NOCONTEXT */
   732      in the receiver or 0 if not found - reimplemented here for speed."
   773 
   733 
   774     REGISTER unsigned char *cp;
   734     ^ self indexOf:aCharacter startingAt:index
   775     REGISTER int byteValue;
   735 
   776 #ifndef FAST_MEMCHR
   736     "
   777     REGISTER unsigned int index;
   737      'hello world' identityIndexOf:(Character space)
   778 #endif
   738      'hello world' identityIndexOf:$d
   779     REGISTER unsigned int lastIdx;
   739      'hello world' identityIndexOf:1
   780     OBJ cls;
   740      #[0 0 1 0 0] asString identityIndexOf:(Character value:1)
   781 
   741      #[0 0 1 0 0] asString identityIndexOf:(Character value:0)
   782     if (__isCharacter(aCharacter)) {
   742     "
   783 	byteValue = __intVal(__characterVal(aCharacter));
   743 
   784 	if (byteValue <= 0xFF) {
   744     "Created: / 10-01-2012 / 17:10:54 / cg"
   785 	    cp = __stringVal(self);
       
   786 	    lastIdx = __stringSize(self);
       
   787 	    if ((cls = __qClass(self)) != String) {
       
   788 		int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
       
   789 
       
   790 		cp += numInstBytes;
       
   791 		lastIdx -= numInstBytes;
       
   792 	    }
       
   793 
       
   794 #ifdef FAST_MEMCHR
       
   795 	    if (memchr(cp, byteValue, lastIdx) != NULL) {
       
   796 		RETURN ( true );
       
   797 	    }
       
   798 #else
       
   799 	    for (index=0; index < lastIdx; index++) {
       
   800 		if (cp[index] == byteValue) { RETURN ( true ); }
       
   801 	    }
       
   802 #endif
       
   803 	}
       
   804 	RETURN (false);
       
   805     }
       
   806 %}.
       
   807     ^ super includes:aCharacter
       
   808 
       
   809     "
       
   810      'hello world' includes:$d
       
   811      'hello world' includes:$o
       
   812      'hello world' includes:$x
       
   813      'hello world' includes:1
       
   814      'hello world' asTwoByteString includes:$o
       
   815      #[0 0 1 0 0] asString includes:(Character value:1)
       
   816      #[0 0 1 0 0] asString includes:(Character value:0)
       
   817      #[1 2 3 4 5] asString includes:(Character value:0)
       
   818     "
       
   819 !
   745 !
   820 
   746 
   821 includesAny:aCollection
   747 includesAny:aCollection
   822     "return true, if the receiver includes any of the characters in the
   748     "return true, if the receiver includes any of the characters in the
   823      argument, aCollection.
   749      argument, aCollection.
   879      'hello world' includesAny:(Array with:$x with:$y)
   805      'hello world' includesAny:(Array with:$x with:$y)
   880      'hello world' includesAny:(Array with:1 with:2)
   806      'hello world' includesAny:(Array with:1 with:2)
   881     "
   807     "
   882 !
   808 !
   883 
   809 
   884 indexOf:aCharacter
       
   885     "return the index of the first occurrences of the argument, aCharacter
       
   886      in the receiver or 0 if not found - reimplemented here for speed."
       
   887 
       
   888 %{  /* NOCONTEXT */
       
   889 
       
   890     REGISTER unsigned char *cp;
       
   891     REGISTER unsigned byteValue;
       
   892 #ifdef FAST_MEMCHR
       
   893     REGISTER unsigned char *ncp;
       
   894 #else
       
   895     REGISTER INT index;
       
   896 #endif
       
   897     REGISTER int last;
       
   898     OBJ cls;
       
   899 
       
   900     if (__isCharacter(aCharacter)) {
       
   901 	byteValue = __intVal(__characterVal(aCharacter));
       
   902 	if (byteValue <= 0xFF) {
       
   903 	    cp = __stringVal(self);
       
   904 	    last = __stringSize(self);
       
   905 	    if ((cls = __qClass(self)) != String) {
       
   906 		int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
       
   907 
       
   908 		cp += numInstBytes;
       
   909 		last -= numInstBytes;
       
   910 	    }
       
   911 
       
   912 #ifdef FAST_MEMCHR
       
   913 	    ncp = (unsigned char *) memchr(cp, byteValue, last);
       
   914 	    if (ncp) {
       
   915 		RETURN ( __mkSmallInteger(ncp - cp + 1) );
       
   916 	    }
       
   917 #else
       
   918 	    for (index=0; index < last; index++) {
       
   919 		if (cp[index] == byteValue) { RETURN ( __mkSmallInteger(index+1) ); }
       
   920 	    }
       
   921 #endif
       
   922 	}
       
   923 	RETURN ( __mkSmallInteger(0));
       
   924     }
       
   925 %}.
       
   926     ^ super indexOf:aCharacter
       
   927 
       
   928     "
       
   929      'hello world' indexOf:(Character space)
       
   930      'hello world' indexOf:$A
       
   931      'hello world' indexOf:$d
       
   932      #[0 0 1 0 0] asString indexOf:(Character value:1)
       
   933      #[0 0 1 0 0] asString indexOf:(Character value:0)
       
   934     "
       
   935 !
       
   936 
       
   937 indexOf:aCharacter startingAt:start
   810 indexOf:aCharacter startingAt:start
   938     "return the index of the first occurrence of the argument, aCharacter
   811     "return the index of the first occurrence of the argument, aCharacter
   939      in myself starting at start, anInteger or 0 if not found;
   812      in myself starting at start, anInteger or 0 if not found;
   940      - reimplemented here for speed"
   813      - reimplemented here for speed"
   941 
   814 
   942 %{  /* NOCONTEXT */
   815 %{  /* NOCONTEXT */
       
   816 #undef __UNROLL_LOOPS__
       
   817 #undef FAST_MEMCHR
       
   818 #define V2
       
   819 
   943     REGISTER unsigned char *cp;
   820     REGISTER unsigned char *cp;
   944 #ifdef FAST_MEMCHR
   821 #ifdef FAST_MEMCHR
   945     REGISTER unsigned char *ncp;
   822     REGISTER unsigned char *ncp;
   946 #endif
   823 #endif
   947     REGISTER INT index;
   824     REGISTER INT index;
   948     REGISTER unsigned byteValue;
   825     REGISTER unsigned byteValue;
   949     int last;
   826     int last;
   950     OBJ cls;
   827     OBJ cls;
   951 
   828 
   952     if (__isSmallInteger(start)) {
   829     if (__isSmallInteger(start)) {
   953 	index = __intVal(start);
   830         index = __intVal(start);
   954 	if (index > 0) {
   831         if (index > 0) {
   955 	    if (__isCharacter(aCharacter)) {
   832             if (__isCharacter(aCharacter)) {
   956 		byteValue = __intVal(_characterVal(aCharacter));
   833                 byteValue = __intVal(_characterVal(aCharacter));
   957 		if (byteValue <= 0xFF) {
   834                 if (byteValue <= 0xFF) {
   958 		    last = __stringSize(self);
   835                     last = __stringSize(self);
   959 		    cp = __stringVal(self);
   836                     cp = __stringVal(self);
   960 		    if ((cls = __qClass(self)) != String) {
   837                     if ((cls = __qClass(self)) != String) {
   961 			int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
   838                         int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
   962 
   839 
   963 			cp += numInstBytes;
   840                         cp += numInstBytes;
   964 			last += numInstBytes;
   841                         last += numInstBytes;
   965 		    }
   842                     }
   966 		    if (index <= last) {
   843                     if (index <= last) {
   967 #ifdef FAST_MEMCHR
   844 #ifdef FAST_MEMCHR    
   968 			ncp = (unsigned char *) memchr(cp+index-1, byteValue, last+1-index);
   845                         ncp = (unsigned char *) memchr(cp+index-1, byteValue, last+1-index);
   969 			if (ncp) {
   846                         if (ncp) {
   970 			    RETURN ( __mkSmallInteger(ncp - cp + 1) );
   847                             RETURN ( __mkSmallInteger(ncp - cp + 1) );
   971 			}
   848                         }
   972 #else
   849 #else
   973 # ifdef __UNROLL_LOOPS__
   850 # ifdef __UNROLL_LOOPS__
   974 			for (; (index+3) <= last; index += 4) {
   851                         {
   975 			    if (cp[index-1] == byteValue) { RETURN ( __mkSmallInteger(index) ); }
   852                             int last3 = last-3;
   976 			    if (cp[index-1+1] == byteValue) { RETURN ( __mkSmallInteger(index+1) ); }
   853 
   977 			    if (cp[index-1+2] == byteValue) { RETURN ( __mkSmallInteger(index+2) ); }
   854                             for (; index <= last3; index += 4) {
   978 			    if (cp[index-1+3] == byteValue) { RETURN ( __mkSmallInteger(index+3) ); }
   855                                 if (cp[index-1] == byteValue) { RETURN ( __mkSmallInteger(index) ); }
   979 			}
   856                                 if (cp[index-1+1] == byteValue) { RETURN ( __mkSmallInteger(index+1) ); }
       
   857                                 if (cp[index-1+2] == byteValue) { RETURN ( __mkSmallInteger(index+2) ); }
       
   858                                 if (cp[index-1+3] == byteValue) { RETURN ( __mkSmallInteger(index+3) ); }
       
   859                             }
       
   860                         }
   980 # endif
   861 # endif
   981 			for (; index <= last; index++) {
   862 # ifdef V1
   982 			    if (cp[index-1] == byteValue) {
   863                         for (; index <= last; index++) {
   983 				RETURN ( __mkSmallInteger(index) );
   864                             if (cp[index-1] == byteValue) {
   984 			    }
   865                                 RETURN ( __mkSmallInteger(index) );
   985 			}
   866                             }
       
   867                         }
       
   868 # endif
       
   869 # ifdef V2
       
   870                         {
       
   871                             // see bit twiddling hacks
       
   872 #                           define hasZeroByte(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)
       
   873 #                           define hasByteM(v,m)   hasZeroByte( (v) ^ m)
       
   874 
       
   875                             // the following loop checks four bytes at once
       
   876                             if (((index-1) & 0x3) == 0) {
       
   877                                 int last4 = last-4;
       
   878                                 int m = (~0UL/255 * (byteValue));
       
   879 
       
   880                                 while (index <= last4) {
       
   881                                     unsigned int v = *(unsigned int *)(cp+index-1);
       
   882 
       
   883                                     if (hasByteM(v,m)) break;
       
   884                                     index += 4;
       
   885                                 }
       
   886                             }
       
   887                             while (index <= last) {
       
   888                                 if (cp[index-1] == byteValue) {
       
   889                                     RETURN ( __mkSmallInteger(index) );
       
   890                                 }
       
   891                                 index++;
       
   892                             }
       
   893                         }
       
   894 # endif
   986 #endif
   895 #endif
   987 		    }
   896                     }
   988 		}
   897                 }
   989 	    }
   898             }
   990 	    RETURN ( __mkSmallInteger(0) );
   899             RETURN ( __mkSmallInteger(0) );
   991 	}
   900         }
   992     }
   901     }
       
   902 #undef V2
   993 %}.
   903 %}.
   994     ^ super indexOf:aCharacter startingAt:start
   904     ^ super indexOf:aCharacter startingAt:start
   995 
   905 
   996     "
   906     "
   997      'hello world' indexOf:$0 startingAt:1
   907      'hello world' indexOf:$0 startingAt:1   
   998      'hello world' indexOf:$l startingAt:1
   908      'hello world' indexOf:$l startingAt:1   
   999      'hello world' indexOf:$l startingAt:5
   909      'hello world' indexOf:$l startingAt:5   
  1000      'hello world' indexOf:$d startingAt:5
   910      'hello world' indexOf:$d startingAt:5   
  1001      #[0 0 1 0 0] asString indexOf:(Character value:1) startingAt:1
   911      #[0 0 1 0 0] asString indexOf:(Character value:1) startingAt:1  
  1002      #[0 0 1 0 0] asString indexOf:(Character value:0) startingAt:3
   912      #[0 0 1 0 0] asString indexOf:(Character value:0) startingAt:3
  1003     "
   913 
       
   914      '1234567890123456a' indexOf:$a      
       
   915      '1234567890123456a' indexOf:$b      
       
   916 
       
   917      |s|
       
   918      s := '12345678901234b'.
       
   919      self assert:(s indexOf:$x) == 0.
       
   920      self assert:(s indexOf:$1) == 1.
       
   921      self assert:(s indexOf:$2) == 2.
       
   922      self assert:(s indexOf:$3) == 3.
       
   923      self assert:(s indexOf:$4) == 4.
       
   924      self assert:(s indexOf:$5) == 5.
       
   925      self assert:(s indexOf:$0) == 10.
       
   926      self assert:(s indexOf:$b) == 15.   
       
   927 
       
   928      |s|
       
   929      s := ''.
       
   930      self assert:(s indexOf:$1) == 0.
       
   931      s := '1'.
       
   932      self assert:(s indexOf:$1) == 1.
       
   933      self assert:(s indexOf:$2) == 0.
       
   934      s := '12'.
       
   935      self assert:(s indexOf:$1) == 1.
       
   936      self assert:(s indexOf:$2) == 2.
       
   937      self assert:(s indexOf:$3) == 0.
       
   938      s := '123'.
       
   939      self assert:(s indexOf:$1) == 1.
       
   940      self assert:(s indexOf:$2) == 2.
       
   941      self assert:(s indexOf:$3) == 3.
       
   942      self assert:(s indexOf:$4) == 0.
       
   943      s := '1234'.
       
   944      self assert:(s indexOf:$1) == 1.
       
   945      self assert:(s indexOf:$2) == 2.
       
   946      self assert:(s indexOf:$3) == 3.
       
   947      self assert:(s indexOf:$4) == 4.
       
   948      self assert:(s indexOf:$5) == 0.
       
   949      s := '12345'.
       
   950      self assert:(s indexOf:$1) == 1.
       
   951      self assert:(s indexOf:$2) == 2.
       
   952      self assert:(s indexOf:$3) == 3.
       
   953      self assert:(s indexOf:$4) == 4.
       
   954      self assert:(s indexOf:$5) == 5.
       
   955      self assert:(s indexOf:$6) == 0.
       
   956      s := '123456'.
       
   957      self assert:(s indexOf:$1) == 1.
       
   958      self assert:(s indexOf:$2) == 2.
       
   959      self assert:(s indexOf:$3) == 3.
       
   960      self assert:(s indexOf:$4) == 4.
       
   961      self assert:(s indexOf:$5) == 5.
       
   962      self assert:(s indexOf:$6) == 6.
       
   963      self assert:(s indexOf:$7) == 0.
       
   964      s := '1234567'.
       
   965      self assert:(s indexOf:$1) == 1.
       
   966      self assert:(s indexOf:$2) == 2.
       
   967      self assert:(s indexOf:$3) == 3.
       
   968      self assert:(s indexOf:$4) == 4.
       
   969      self assert:(s indexOf:$5) == 5.
       
   970      self assert:(s indexOf:$6) == 6.
       
   971      self assert:(s indexOf:$7) == 7.
       
   972      self assert:(s indexOf:$8) == 0.
       
   973      s := '12345678'.
       
   974      self assert:(s indexOf:$1) == 1.
       
   975      self assert:(s indexOf:$2) == 2.
       
   976      self assert:(s indexOf:$3) == 3.
       
   977      self assert:(s indexOf:$4) == 4.
       
   978      self assert:(s indexOf:$5) == 5.
       
   979      self assert:(s indexOf:$6) == 6.
       
   980      self assert:(s indexOf:$7) == 7.
       
   981      self assert:(s indexOf:$8) == 8.
       
   982      self assert:(s indexOf:$9) == 0.
       
   983      s := '123456789'.
       
   984      self assert:(s indexOf:$1) == 1.
       
   985      self assert:(s indexOf:$2) == 2.
       
   986      self assert:(s indexOf:$3) == 3.
       
   987      self assert:(s indexOf:$4) == 4.
       
   988      self assert:(s indexOf:$5) == 5.
       
   989      self assert:(s indexOf:$6) == 6.
       
   990      self assert:(s indexOf:$7) == 7.
       
   991      self assert:(s indexOf:$8) == 8.
       
   992      self assert:(s indexOf:$9) == 9.
       
   993 
       
   994      self assert:(s indexOf:$0) == 0.
       
   995      self assert:(s indexOf:$b) == 0.   
       
   996 
       
   997      |s|
       
   998      s := String new:1024.
       
   999      s atAllPut:$a.
       
  1000      s at:512 put:(Character space).
       
  1001      Time millisecondsToRun:[
       
  1002         1000000 timesRepeat:[ s indexOf:(Character space) ]
       
  1003      ]         
       
  1004 
       
  1005      timing (ms):
       
  1006         v1: 1763 normal   
       
  1007             2340 +unroll   
       
  1008             3308 memsrch !!
       
  1009         v2: 1045
       
  1010     "
       
  1011 
       
  1012     "Modified: / 10-01-2012 / 17:09:34 / cg"
  1004 !
  1013 !
  1005 
  1014 
  1006 indexOfAny:aCollectionOfCharacters startingAt:start
  1015 indexOfAny:aCollectionOfCharacters startingAt:start
  1007     "return the index of the first occurrence of any character in aCollectionOfCharacters,
  1016     "return the index of the first occurrence of any character in aCollectionOfCharacters,
  1008      in myself starting at start, anInteger or 0 if not found;
  1017      in myself starting at start, anInteger or 0 if not found;
  3766 ! !
  3775 ! !
  3767 
  3776 
  3768 !String class methodsFor:'documentation'!
  3777 !String class methodsFor:'documentation'!
  3769 
  3778 
  3770 version
  3779 version
  3771     ^ '$Header: /cvs/stx/stx/libbasic/String.st,v 1.276 2011/12/26 13:18:56 cg Exp $'
  3780     ^ '$Header: /cvs/stx/stx/libbasic/String.st,v 1.280 2012/01/10 16:11:13 cg Exp $'
  3772 !
  3781 !
  3773 
  3782 
  3774 version_CVS
  3783 version_CVS
  3775     ^ 'Header: /cvs/stx/stx/libbasic/String.st,v 1.276 2011/12/26 13:18:56 cg Exp '
  3784     ^ 'Header: /cvs/stx/stx/libbasic/String.st,v 1.280 2012/01/10 16:11:13 cg Exp '
  3776 !
  3785 !
  3777 
  3786 
  3778 version_SVN
  3787 version_SVN
  3779     ^ '$Id: String.st 10754 2012-01-06 08:53:28Z vranyj1 $'
  3788     ^ '$Id: String.st 10758 2012-01-19 10:06:02Z vranyj1 $'
  3780 ! !
  3789 ! !
  3781 
  3790 
  3782 
  3791 
       
  3792