--- a/SmallInteger.st Tue Mar 19 23:01:52 2019 +0100
+++ b/SmallInteger.st Wed Mar 20 12:49:59 2019 +0100
@@ -1054,7 +1054,6 @@
! !
-
!SmallInteger methodsFor:'bit operators'!
bitAnd:anInteger
@@ -1129,9 +1128,10 @@
#else
unsigned int _cnt;
- // popcnt is slower on some cpus;
- // ALGO3 is good on all
-# if 0 && defined(__GNUC__) && (defined(__x86__) || defined(__x86_64__))
+ // popcnt is actually slower on some cpus;
+ // and almost equal to ALG3 on modern intel hardware.
+ // So ALGO3 is good for all
+# if 0 && (defined(__GNUC__) || defined(clang)) && (defined(__x86__) || defined(__x86_64__))
# define ALGORITHM_4
# else
# define ALGORITHM_3
@@ -1228,19 +1228,13 @@
# elif defined( ALGORITHM_5 )
// using the builtin_popcnt intrinsic
+ unsigned INT _v;
+ _v = ((INT)self) - TAG_INT; // remove the tag
# if __POINTER_SIZE__ == 8
- unsigned INT _v;
-
- _v = ((INT)self) - TAG_INT; // remove the tag
_cnt = __builtin_popcountll(_v);
-
# else // not POINTER_SIZE 8
-
- unsigned INT _v;
- _v = (INT)self - TAG_INT; // remove the tag
_cnt = __builtin_popcountl(_v);
-
# endif
# else
@@ -1254,11 +1248,13 @@
^ super bitCount.
"
+ 16rAA bitCount
+
TimeDuration toRun:[
1 to:10000000 do:[:n |
n bitCount
].
- ]
+ ]
AL1: 967ms 958ms 971ms 930ms
AL2: 900ms 872ms 877ms 870ms
@@ -1267,6 +1263,10 @@
AL4: 858ms 852ms 846ms 810ms
AL5: 830ms 843ms 835ms 845ms
+ Mac PB2012/2.6Ghz I7
+ AL3: 855ms 885ms 859ms 878ms 844ms
+ AL5: 877ms 877ms 846ms 890ms 853ms
+
1 to:1000000 do:[:n |
self assert:(n bitCount = ((n printStringRadix:2) occurrencesOf:$1))
].
@@ -1303,6 +1303,7 @@
"
"Modified: / 09-01-2012 / 19:12:41 / cg"
+ "Modified: / 20-03-2019 / 12:49:47 / Claus Gittinger"
!
bitDeinterleave:n