712 |
712 |
713 identityIndexOf:aCharacter |
713 identityIndexOf:aCharacter |
714 "return the index of the first occurrences of the argument, aCharacter |
714 "return the index of the first occurrences of the argument, aCharacter |
715 in the receiver or 0 if not found - reimplemented here for speed." |
715 in the receiver or 0 if not found - reimplemented here for speed." |
716 |
716 |
717 %{ /* NOCONTEXT */ |
717 ^ self indexOf:aCharacter |
718 |
|
719 REGISTER unsigned char *cp; |
|
720 REGISTER unsigned byteValue; |
|
721 #ifdef FAST_MEMCHR |
|
722 unsigned char *ncp; |
|
723 #else |
|
724 REGISTER INT index; |
|
725 #endif |
|
726 REGISTER int lastIdx; |
|
727 OBJ cls; |
|
728 |
|
729 if (__isCharacter(aCharacter)) { |
|
730 byteValue = __intVal(__characterVal(aCharacter)); |
|
731 if (byteValue <= 0xFF) { |
|
732 cp = __stringVal(self); |
|
733 lastIdx = __stringSize(self); |
|
734 if ((cls = __qClass(self)) != String) { |
|
735 int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
|
736 |
|
737 cp += numInstBytes; |
|
738 lastIdx -= numInstBytes; |
|
739 } |
|
740 |
|
741 #ifdef FAST_MEMCHR |
|
742 ncp = (unsigned char *) memchr(cp, byteValue, lastIdx); |
|
743 if (ncp) { |
|
744 RETURN ( __mkSmallInteger(ncp - cp + 1) ); |
|
745 } |
|
746 #else |
|
747 for (index=1; index <= lastIdx; index++) { |
|
748 if (*cp++ == byteValue) { RETURN ( __mkSmallInteger(index) ); } |
|
749 } |
|
750 #endif |
|
751 } |
|
752 RETURN ( __mkSmallInteger(0)); |
|
753 } |
|
754 /* with identity compares, only characters can be in myself */ |
|
755 RETURN ( __mkSmallInteger(0)); |
|
756 %}. |
|
757 ^ self primitiveFailed |
|
758 |
718 |
759 " |
719 " |
760 'hello world' identityIndexOf:(Character space) |
720 'hello world' identityIndexOf:(Character space) |
761 'hello world' identityIndexOf:$d |
721 'hello world' identityIndexOf:$d |
762 'hello world' identityIndexOf:1 |
722 'hello world' identityIndexOf:1 |
763 #[0 0 1 0 0] asString identityIndexOf:(Character value:1) |
723 #[0 0 1 0 0] asString identityIndexOf:(Character value:1) |
764 #[0 0 1 0 0] asString identityIndexOf:(Character value:0) |
724 #[0 0 1 0 0] asString identityIndexOf:(Character value:0) |
765 " |
725 " |
766 ! |
726 |
767 |
727 "Modified: / 10-01-2012 / 17:07:12 / cg" |
768 includes:aCharacter |
728 ! |
769 "return true if the argument, aCharacter is included in the receiver |
729 |
770 - reimplemented here for speed" |
730 identityIndexOf:aCharacter startingAt:index |
771 |
731 "return the index of the first occurrences of the argument, aCharacter |
772 %{ /* NOCONTEXT */ |
732 in the receiver or 0 if not found - reimplemented here for speed." |
773 |
733 |
774 REGISTER unsigned char *cp; |
734 ^ self indexOf:aCharacter startingAt:index |
775 REGISTER int byteValue; |
735 |
776 #ifndef FAST_MEMCHR |
736 " |
777 REGISTER unsigned int index; |
737 'hello world' identityIndexOf:(Character space) |
778 #endif |
738 'hello world' identityIndexOf:$d |
779 REGISTER unsigned int lastIdx; |
739 'hello world' identityIndexOf:1 |
780 OBJ cls; |
740 #[0 0 1 0 0] asString identityIndexOf:(Character value:1) |
781 |
741 #[0 0 1 0 0] asString identityIndexOf:(Character value:0) |
782 if (__isCharacter(aCharacter)) { |
742 " |
783 byteValue = __intVal(__characterVal(aCharacter)); |
743 |
784 if (byteValue <= 0xFF) { |
744 "Created: / 10-01-2012 / 17:10:54 / cg" |
785 cp = __stringVal(self); |
|
786 lastIdx = __stringSize(self); |
|
787 if ((cls = __qClass(self)) != String) { |
|
788 int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
|
789 |
|
790 cp += numInstBytes; |
|
791 lastIdx -= numInstBytes; |
|
792 } |
|
793 |
|
794 #ifdef FAST_MEMCHR |
|
795 if (memchr(cp, byteValue, lastIdx) != NULL) { |
|
796 RETURN ( true ); |
|
797 } |
|
798 #else |
|
799 for (index=0; index < lastIdx; index++) { |
|
800 if (cp[index] == byteValue) { RETURN ( true ); } |
|
801 } |
|
802 #endif |
|
803 } |
|
804 RETURN (false); |
|
805 } |
|
806 %}. |
|
807 ^ super includes:aCharacter |
|
808 |
|
809 " |
|
810 'hello world' includes:$d |
|
811 'hello world' includes:$o |
|
812 'hello world' includes:$x |
|
813 'hello world' includes:1 |
|
814 'hello world' asTwoByteString includes:$o |
|
815 #[0 0 1 0 0] asString includes:(Character value:1) |
|
816 #[0 0 1 0 0] asString includes:(Character value:0) |
|
817 #[1 2 3 4 5] asString includes:(Character value:0) |
|
818 " |
|
819 ! |
745 ! |
820 |
746 |
821 includesAny:aCollection |
747 includesAny:aCollection |
822 "return true, if the receiver includes any of the characters in the |
748 "return true, if the receiver includes any of the characters in the |
823 argument, aCollection. |
749 argument, aCollection. |
879 'hello world' includesAny:(Array with:$x with:$y) |
805 'hello world' includesAny:(Array with:$x with:$y) |
880 'hello world' includesAny:(Array with:1 with:2) |
806 'hello world' includesAny:(Array with:1 with:2) |
881 " |
807 " |
882 ! |
808 ! |
883 |
809 |
884 indexOf:aCharacter |
|
885 "return the index of the first occurrences of the argument, aCharacter |
|
886 in the receiver or 0 if not found - reimplemented here for speed." |
|
887 |
|
888 %{ /* NOCONTEXT */ |
|
889 |
|
890 REGISTER unsigned char *cp; |
|
891 REGISTER unsigned byteValue; |
|
892 #ifdef FAST_MEMCHR |
|
893 REGISTER unsigned char *ncp; |
|
894 #else |
|
895 REGISTER INT index; |
|
896 #endif |
|
897 REGISTER int last; |
|
898 OBJ cls; |
|
899 |
|
900 if (__isCharacter(aCharacter)) { |
|
901 byteValue = __intVal(__characterVal(aCharacter)); |
|
902 if (byteValue <= 0xFF) { |
|
903 cp = __stringVal(self); |
|
904 last = __stringSize(self); |
|
905 if ((cls = __qClass(self)) != String) { |
|
906 int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
|
907 |
|
908 cp += numInstBytes; |
|
909 last -= numInstBytes; |
|
910 } |
|
911 |
|
912 #ifdef FAST_MEMCHR |
|
913 ncp = (unsigned char *) memchr(cp, byteValue, last); |
|
914 if (ncp) { |
|
915 RETURN ( __mkSmallInteger(ncp - cp + 1) ); |
|
916 } |
|
917 #else |
|
918 for (index=0; index < last; index++) { |
|
919 if (cp[index] == byteValue) { RETURN ( __mkSmallInteger(index+1) ); } |
|
920 } |
|
921 #endif |
|
922 } |
|
923 RETURN ( __mkSmallInteger(0)); |
|
924 } |
|
925 %}. |
|
926 ^ super indexOf:aCharacter |
|
927 |
|
928 " |
|
929 'hello world' indexOf:(Character space) |
|
930 'hello world' indexOf:$A |
|
931 'hello world' indexOf:$d |
|
932 #[0 0 1 0 0] asString indexOf:(Character value:1) |
|
933 #[0 0 1 0 0] asString indexOf:(Character value:0) |
|
934 " |
|
935 ! |
|
936 |
|
937 indexOf:aCharacter startingAt:start |
810 indexOf:aCharacter startingAt:start |
938 "return the index of the first occurrence of the argument, aCharacter |
811 "return the index of the first occurrence of the argument, aCharacter |
939 in myself starting at start, anInteger or 0 if not found; |
812 in myself starting at start, anInteger or 0 if not found; |
940 - reimplemented here for speed" |
813 - reimplemented here for speed" |
941 |
814 |
942 %{ /* NOCONTEXT */ |
815 %{ /* NOCONTEXT */ |
|
816 #undef __UNROLL_LOOPS__ |
|
817 #undef FAST_MEMCHR |
|
818 #define V2 |
|
819 |
943 REGISTER unsigned char *cp; |
820 REGISTER unsigned char *cp; |
944 #ifdef FAST_MEMCHR |
821 #ifdef FAST_MEMCHR |
945 REGISTER unsigned char *ncp; |
822 REGISTER unsigned char *ncp; |
946 #endif |
823 #endif |
947 REGISTER INT index; |
824 REGISTER INT index; |
948 REGISTER unsigned byteValue; |
825 REGISTER unsigned byteValue; |
949 int last; |
826 int last; |
950 OBJ cls; |
827 OBJ cls; |
951 |
828 |
952 if (__isSmallInteger(start)) { |
829 if (__isSmallInteger(start)) { |
953 index = __intVal(start); |
830 index = __intVal(start); |
954 if (index > 0) { |
831 if (index > 0) { |
955 if (__isCharacter(aCharacter)) { |
832 if (__isCharacter(aCharacter)) { |
956 byteValue = __intVal(_characterVal(aCharacter)); |
833 byteValue = __intVal(_characterVal(aCharacter)); |
957 if (byteValue <= 0xFF) { |
834 if (byteValue <= 0xFF) { |
958 last = __stringSize(self); |
835 last = __stringSize(self); |
959 cp = __stringVal(self); |
836 cp = __stringVal(self); |
960 if ((cls = __qClass(self)) != String) { |
837 if ((cls = __qClass(self)) != String) { |
961 int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
838 int numInstBytes = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars)); |
962 |
839 |
963 cp += numInstBytes; |
840 cp += numInstBytes; |
964 last += numInstBytes; |
841 last += numInstBytes; |
965 } |
842 } |
966 if (index <= last) { |
843 if (index <= last) { |
967 #ifdef FAST_MEMCHR |
844 #ifdef FAST_MEMCHR |
968 ncp = (unsigned char *) memchr(cp+index-1, byteValue, last+1-index); |
845 ncp = (unsigned char *) memchr(cp+index-1, byteValue, last+1-index); |
969 if (ncp) { |
846 if (ncp) { |
970 RETURN ( __mkSmallInteger(ncp - cp + 1) ); |
847 RETURN ( __mkSmallInteger(ncp - cp + 1) ); |
971 } |
848 } |
972 #else |
849 #else |
973 # ifdef __UNROLL_LOOPS__ |
850 # ifdef __UNROLL_LOOPS__ |
974 for (; (index+3) <= last; index += 4) { |
851 { |
975 if (cp[index-1] == byteValue) { RETURN ( __mkSmallInteger(index) ); } |
852 int last3 = last-3; |
976 if (cp[index-1+1] == byteValue) { RETURN ( __mkSmallInteger(index+1) ); } |
853 |
977 if (cp[index-1+2] == byteValue) { RETURN ( __mkSmallInteger(index+2) ); } |
854 for (; index <= last3; index += 4) { |
978 if (cp[index-1+3] == byteValue) { RETURN ( __mkSmallInteger(index+3) ); } |
855 if (cp[index-1] == byteValue) { RETURN ( __mkSmallInteger(index) ); } |
979 } |
856 if (cp[index-1+1] == byteValue) { RETURN ( __mkSmallInteger(index+1) ); } |
|
857 if (cp[index-1+2] == byteValue) { RETURN ( __mkSmallInteger(index+2) ); } |
|
858 if (cp[index-1+3] == byteValue) { RETURN ( __mkSmallInteger(index+3) ); } |
|
859 } |
|
860 } |
980 # endif |
861 # endif |
981 for (; index <= last; index++) { |
862 # ifdef V1 |
982 if (cp[index-1] == byteValue) { |
863 for (; index <= last; index++) { |
983 RETURN ( __mkSmallInteger(index) ); |
864 if (cp[index-1] == byteValue) { |
984 } |
865 RETURN ( __mkSmallInteger(index) ); |
985 } |
866 } |
|
867 } |
|
868 # endif |
|
869 # ifdef V2 |
|
870 { |
|
871 // see bit twiddling hacks |
|
872 # define hasZeroByte(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL) |
|
873 # define hasByteM(v,m) hasZeroByte( (v) ^ m) |
|
874 |
|
875 // the following loop checks four bytes at once |
|
876 if (((index-1) & 0x3) == 0) { |
|
877 int last4 = last-4; |
|
878 int m = (~0UL/255 * (byteValue)); |
|
879 |
|
880 while (index <= last4) { |
|
881 unsigned int v = *(unsigned int *)(cp+index-1); |
|
882 |
|
883 if (hasByteM(v,m)) break; |
|
884 index += 4; |
|
885 } |
|
886 } |
|
887 while (index <= last) { |
|
888 if (cp[index-1] == byteValue) { |
|
889 RETURN ( __mkSmallInteger(index) ); |
|
890 } |
|
891 index++; |
|
892 } |
|
893 } |
|
894 # endif |
986 #endif |
895 #endif |
987 } |
896 } |
988 } |
897 } |
989 } |
898 } |
990 RETURN ( __mkSmallInteger(0) ); |
899 RETURN ( __mkSmallInteger(0) ); |
991 } |
900 } |
992 } |
901 } |
|
902 #undef V2 |
993 %}. |
903 %}. |
994 ^ super indexOf:aCharacter startingAt:start |
904 ^ super indexOf:aCharacter startingAt:start |
995 |
905 |
996 " |
906 " |
997 'hello world' indexOf:$0 startingAt:1 |
907 'hello world' indexOf:$0 startingAt:1 |
998 'hello world' indexOf:$l startingAt:1 |
908 'hello world' indexOf:$l startingAt:1 |
999 'hello world' indexOf:$l startingAt:5 |
909 'hello world' indexOf:$l startingAt:5 |
1000 'hello world' indexOf:$d startingAt:5 |
910 'hello world' indexOf:$d startingAt:5 |
1001 #[0 0 1 0 0] asString indexOf:(Character value:1) startingAt:1 |
911 #[0 0 1 0 0] asString indexOf:(Character value:1) startingAt:1 |
1002 #[0 0 1 0 0] asString indexOf:(Character value:0) startingAt:3 |
912 #[0 0 1 0 0] asString indexOf:(Character value:0) startingAt:3 |
1003 " |
913 |
|
914 '1234567890123456a' indexOf:$a |
|
915 '1234567890123456a' indexOf:$b |
|
916 |
|
917 |s| |
|
918 s := '12345678901234b'. |
|
919 self assert:(s indexOf:$x) == 0. |
|
920 self assert:(s indexOf:$1) == 1. |
|
921 self assert:(s indexOf:$2) == 2. |
|
922 self assert:(s indexOf:$3) == 3. |
|
923 self assert:(s indexOf:$4) == 4. |
|
924 self assert:(s indexOf:$5) == 5. |
|
925 self assert:(s indexOf:$0) == 10. |
|
926 self assert:(s indexOf:$b) == 15. |
|
927 |
|
928 |s| |
|
929 s := ''. |
|
930 self assert:(s indexOf:$1) == 0. |
|
931 s := '1'. |
|
932 self assert:(s indexOf:$1) == 1. |
|
933 self assert:(s indexOf:$2) == 0. |
|
934 s := '12'. |
|
935 self assert:(s indexOf:$1) == 1. |
|
936 self assert:(s indexOf:$2) == 2. |
|
937 self assert:(s indexOf:$3) == 0. |
|
938 s := '123'. |
|
939 self assert:(s indexOf:$1) == 1. |
|
940 self assert:(s indexOf:$2) == 2. |
|
941 self assert:(s indexOf:$3) == 3. |
|
942 self assert:(s indexOf:$4) == 0. |
|
943 s := '1234'. |
|
944 self assert:(s indexOf:$1) == 1. |
|
945 self assert:(s indexOf:$2) == 2. |
|
946 self assert:(s indexOf:$3) == 3. |
|
947 self assert:(s indexOf:$4) == 4. |
|
948 self assert:(s indexOf:$5) == 0. |
|
949 s := '12345'. |
|
950 self assert:(s indexOf:$1) == 1. |
|
951 self assert:(s indexOf:$2) == 2. |
|
952 self assert:(s indexOf:$3) == 3. |
|
953 self assert:(s indexOf:$4) == 4. |
|
954 self assert:(s indexOf:$5) == 5. |
|
955 self assert:(s indexOf:$6) == 0. |
|
956 s := '123456'. |
|
957 self assert:(s indexOf:$1) == 1. |
|
958 self assert:(s indexOf:$2) == 2. |
|
959 self assert:(s indexOf:$3) == 3. |
|
960 self assert:(s indexOf:$4) == 4. |
|
961 self assert:(s indexOf:$5) == 5. |
|
962 self assert:(s indexOf:$6) == 6. |
|
963 self assert:(s indexOf:$7) == 0. |
|
964 s := '1234567'. |
|
965 self assert:(s indexOf:$1) == 1. |
|
966 self assert:(s indexOf:$2) == 2. |
|
967 self assert:(s indexOf:$3) == 3. |
|
968 self assert:(s indexOf:$4) == 4. |
|
969 self assert:(s indexOf:$5) == 5. |
|
970 self assert:(s indexOf:$6) == 6. |
|
971 self assert:(s indexOf:$7) == 7. |
|
972 self assert:(s indexOf:$8) == 0. |
|
973 s := '12345678'. |
|
974 self assert:(s indexOf:$1) == 1. |
|
975 self assert:(s indexOf:$2) == 2. |
|
976 self assert:(s indexOf:$3) == 3. |
|
977 self assert:(s indexOf:$4) == 4. |
|
978 self assert:(s indexOf:$5) == 5. |
|
979 self assert:(s indexOf:$6) == 6. |
|
980 self assert:(s indexOf:$7) == 7. |
|
981 self assert:(s indexOf:$8) == 8. |
|
982 self assert:(s indexOf:$9) == 0. |
|
983 s := '123456789'. |
|
984 self assert:(s indexOf:$1) == 1. |
|
985 self assert:(s indexOf:$2) == 2. |
|
986 self assert:(s indexOf:$3) == 3. |
|
987 self assert:(s indexOf:$4) == 4. |
|
988 self assert:(s indexOf:$5) == 5. |
|
989 self assert:(s indexOf:$6) == 6. |
|
990 self assert:(s indexOf:$7) == 7. |
|
991 self assert:(s indexOf:$8) == 8. |
|
992 self assert:(s indexOf:$9) == 9. |
|
993 |
|
994 self assert:(s indexOf:$0) == 0. |
|
995 self assert:(s indexOf:$b) == 0. |
|
996 |
|
997 |s| |
|
998 s := String new:1024. |
|
999 s atAllPut:$a. |
|
1000 s at:512 put:(Character space). |
|
1001 Time millisecondsToRun:[ |
|
1002 1000000 timesRepeat:[ s indexOf:(Character space) ] |
|
1003 ] |
|
1004 |
|
1005 timing (ms): |
|
1006 v1: 1763 normal |
|
1007 2340 +unroll |
|
1008 3308 memsrch !! |
|
1009 v2: 1045 |
|
1010 " |
|
1011 |
|
1012 "Modified: / 10-01-2012 / 17:09:34 / cg" |
1004 ! |
1013 ! |
1005 |
1014 |
1006 indexOfAny:aCollectionOfCharacters startingAt:start |
1015 indexOfAny:aCollectionOfCharacters startingAt:start |
1007 "return the index of the first occurrence of any character in aCollectionOfCharacters, |
1016 "return the index of the first occurrence of any character in aCollectionOfCharacters, |
1008 in myself starting at start, anInteger or 0 if not found; |
1017 in myself starting at start, anInteger or 0 if not found; |