SmallInteger.st
changeset 23942 d453cd166c2b
parent 23616 af0ae1b23128
child 23961 7e0c1ebaf951
--- a/SmallInteger.st	Tue Mar 19 23:01:52 2019 +0100
+++ b/SmallInteger.st	Wed Mar 20 12:49:59 2019 +0100
@@ -1054,7 +1054,6 @@
 ! !
 
 
-
 !SmallInteger methodsFor:'bit operators'!
 
 bitAnd:anInteger
@@ -1129,9 +1128,10 @@
 #else
     unsigned int _cnt;
 
-    // popcnt is slower on some cpus;
-    // ALGO3 is good on all
-# if 0 && defined(__GNUC__) && (defined(__x86__) || defined(__x86_64__))
+    // popcnt is actually slower on some cpus;
+    // and almost equal to ALG3 on modern intel hardware.
+    // So ALGO3 is good for all
+# if 0 && (defined(__GNUC__) || defined(clang)) && (defined(__x86__) || defined(__x86_64__))
 #  define ALGORITHM_4
 # else
 #  define ALGORITHM_3
@@ -1228,19 +1228,13 @@
 # elif defined( ALGORITHM_5 )
 
      // using the builtin_popcnt intrinsic
+    unsigned INT _v;
+    _v = ((INT)self) - TAG_INT;   // remove the tag
 
 #  if __POINTER_SIZE__ == 8
-    unsigned INT _v;
-
-    _v = ((INT)self) - TAG_INT;   // remove the tag
     _cnt = __builtin_popcountll(_v);
-
 #  else // not POINTER_SIZE 8
-
-    unsigned INT _v;
-    _v = (INT)self - TAG_INT;     // remove the tag
     _cnt = __builtin_popcountl(_v);
-
 #  endif
 
 # else
@@ -1254,11 +1248,13 @@
     ^ super bitCount.
 
     "
+     16rAA bitCount
+     
      TimeDuration toRun:[
         1 to:10000000 do:[:n |
             n bitCount
         ].
-     ]
+     ] 
 
      AL1: 967ms 958ms 971ms 930ms
      AL2: 900ms 872ms 877ms 870ms
@@ -1267,6 +1263,10 @@
      AL4: 858ms 852ms 846ms 810ms
      AL5: 830ms 843ms 835ms 845ms
 
+     Mac PB2012/2.6Ghz I7
+     AL3: 855ms 885ms 859ms 878ms 844ms
+     AL5: 877ms 877ms 846ms 890ms 853ms
+     
      1 to:1000000 do:[:n |
         self assert:(n bitCount = ((n printStringRadix:2) occurrencesOf:$1))
      ].
@@ -1303,6 +1303,7 @@
     "
 
     "Modified: / 09-01-2012 / 19:12:41 / cg"
+    "Modified: / 20-03-2019 / 12:49:47 / Claus Gittinger"
 !
 
 bitDeinterleave:n