Improved performance of perform:, perform:with: and perform:with:with. There is still a room for improvements jv
authorJan Vrany <jan.vrany@fit.cvut.cz>
Thu, 22 Jul 2010 13:32:34 +0100
branchjv
changeset 17788 c4f89c6602a5
parent 17787 c56ba6d58661
child 17789 338675f49dd6
Improved performance of perform:, perform:with: and perform:with:with. There is still a room for improvements
NamespaceAwareLookup.st
Object.st
stx_libbasic.st
--- a/NamespaceAwareLookup.st	Wed Jul 21 19:58:43 2010 +0100
+++ b/NamespaceAwareLookup.st	Thu Jul 22 13:32:34 2010 +0100
@@ -75,11 +75,11 @@
             ifNil:[nil]                
             ifNotNil:[sendingContext method nameSpace].
 
-    " "
+    " 
     Transcript 
             show: 'sel='; show: selector; show: ' ns='; show: sendingNs printString; 
             show: ' method=', sendingContext method printString; cr.
-    " "
+    " 
 
     queue := Queue with: (Array with: sendingNs).
     seen := Set new.
@@ -167,7 +167,7 @@
 !NamespaceAwareLookup class methodsFor:'documentation'!
 
 version_SVN
-    ^ '$Id: NamespaceAwareLookup.st 10552 2010-07-21 18:58:43Z vranyj1 $'
+    ^ '$Id: NamespaceAwareLookup.st 10553 2010-07-22 12:32:34Z vranyj1 $'
 ! !
 
 NamespaceAwareLookup initialize!
--- a/Object.st	Wed Jul 21 19:58:43 2010 +0100
+++ b/Object.st	Thu Jul 22 13:32:34 2010 +0100
@@ -5075,56 +5075,56 @@
 
     if (InterruptPending == nil) {
         struct inlineCache *pIlc;
-        static struct inlineCache ilc_0 = __ILCPERF0(@line);
-        static struct inlineCache ilc_1 = __ILCPERF0(@line);
-        static struct inlineCache ilc_2 = __ILCPERF0(@line);
-        static struct inlineCache ilc_3 = __ILCPERF0(@line);
-        static struct inlineCache ilc_4 = __ILCPERF0(@line);
-        static OBJ last_0 = nil;
-        static OBJ last_1 = nil;
-        static OBJ last_2 = nil;
-        static OBJ last_3 = nil;
-        static OBJ last_4 = nil;
+    /* JV @ 2010-22-07: To improve performance I use 256 ILCs instead
+       of default 4. This significantly reduces ILC misses. The code 
+       should be further optimized by remembering last selector index and 
+       try a first shot on that index. Another option is to use kind of 
+       hashing on lower bits of sel variable. However, a more detailed 
+       statistical data should be gathered before doing further optimizations.
+    */
+
+#define SEL_AND_ILC_INIT_1(l)   { nil , __ILCPERF0(l) }
+#define SEL_AND_ILC_INIT_2(l)   SEL_AND_ILC_INIT_1(l)   , SEL_AND_ILC_INIT_1(l)
+#define SEL_AND_ILC_INIT_4(l)   SEL_AND_ILC_INIT_2(l)   , SEL_AND_ILC_INIT_2(l)
+#define SEL_AND_ILC_INIT_8(l)   SEL_AND_ILC_INIT_4(l)   , SEL_AND_ILC_INIT_4(l)
+#define SEL_AND_ILC_INIT_16(l)  SEL_AND_ILC_INIT_8(l)   , SEL_AND_ILC_INIT_8(l)
+#define SEL_AND_ILC_INIT_32(l)  SEL_AND_ILC_INIT_16(l)  , SEL_AND_ILC_INIT_16(l)
+#define SEL_AND_ILC_INIT_32(l)  SEL_AND_ILC_INIT_16(l)  , SEL_AND_ILC_INIT_16(l)
+#define SEL_AND_ILC_INIT_64(l)  SEL_AND_ILC_INIT_32(l)  , SEL_AND_ILC_INIT_32(l)
+#define SEL_AND_ILC_INIT_128(l) SEL_AND_ILC_INIT_64(l)  , SEL_AND_ILC_INIT_64(l)
+#define SEL_AND_ILC_INIT_256(l) SEL_AND_ILC_INIT_128(l) , SEL_AND_ILC_INIT_128(l)
+#define nilcs 256
+
+        static struct { OBJ sel; struct inlineCache ilc; } sel_and_ilc[nilcs] = { SEL_AND_ILC_INIT_256(29) };
+
+#undef SEL_AND_ILC_INIT_1
+#undef SEL_AND_ILC_INIT_2
+#undef SEL_AND_ILC_INIT_4
+#undef SEL_AND_ILC_INIT_8
+#undef SEL_AND_ILC_INIT_16
+#undef SEL_AND_ILC_INIT_32
+#undef SEL_AND_ILC_INIT_64
+#undef SEL_AND_ILC_INIT_128
+#undef SEL_AND_ILC_INIT_256
+
         static flip = 0;
-
-        if (sel == last_0) {
-            pIlc = &ilc_0;
-        } else if (sel == last_1) {
-            pIlc = &ilc_1;
-        } else if (sel == last_2) {
-            pIlc = &ilc_2;
-        } else if (sel == last_3) {
-            pIlc = &ilc_3;
-        } else if (sel == last_4) {
-            pIlc = &ilc_4;
-        } else {
-            if (flip == 0) {
-                pIlc = &ilc_0;
-                flip = 1;
-                last_0 = sel;
-            } else if (flip == 1) {
-                pIlc = &ilc_1;
-                flip = 2;
-                last_1 = sel;
-            } else if (flip == 2) {
-                pIlc = &ilc_2;
-                flip = 3;
-                last_2 = sel;
-            } else if (flip == 3) {
-                pIlc = &ilc_3;
-                flip = 4;
-                last_3 = sel;
-            } else {
-                pIlc = &ilc_4;
-                flip = 0;
-                last_4 = sel;
-            }
-            pIlc->ilc_func = __SEND0ADDR__;
-            if (pIlc->ilc_poly) {
-                __flushPolyCache(pIlc->ilc_poly);
-                pIlc->ilc_poly = 0;
-            }
+        int i;
+        for (i = 0; i < nilcs; i++) {
+           if (sel == sel_and_ilc[i].sel) {
+                pIlc = &sel_and_ilc[i].ilc;
+                goto perform0_send_and_return;
+           }       
         }
+        printf("Object >> #perform: #%s --> no PIC found\n", __symbolVal(aSelector));
+        pIlc = &sel_and_ilc[flip].ilc;
+        sel_and_ilc[flip].sel = sel;
+        flip = (flip + 1) % nilcs;
+        pIlc->ilc_func = __SEND0ADDR__;
+        if (pIlc->ilc_poly) {
+             __flushPolyCache(pIlc->ilc_poly);
+            pIlc->ilc_poly = 0;
+        }
+perform0_send_and_return:
         RETURN ( (*(pIlc->ilc_func))(self, sel, nil, pIlc) );
     } else {
         static struct inlineCache ilc0 = __DUMMYILCSELF0(@line+1);
@@ -5343,56 +5343,51 @@
 
     if (InterruptPending == nil) {
         struct inlineCache *pIlc;
-        static struct inlineCache ilc_0 = __ILCPERF1(@line);
-        static struct inlineCache ilc_1 = __ILCPERF1(@line);
-        static struct inlineCache ilc_2 = __ILCPERF1(@line);
-        static struct inlineCache ilc_3 = __ILCPERF1(@line);
-        static struct inlineCache ilc_4 = __ILCPERF1(@line);
-        static OBJ last_0 = nil;
-        static OBJ last_1 = nil;
-        static OBJ last_2 = nil;
-        static OBJ last_3 = nil;
-        static OBJ last_4 = nil;
+    /* JV @ 2010-22-07: To improve performance I use 256 ILCs instead
+       of default 4. For details, see comment in perform: */
+
+#define SEL_AND_ILC_INIT_1(l)   { nil , __ILCPERF1(l) }
+#define SEL_AND_ILC_INIT_2(l)   SEL_AND_ILC_INIT_1(l)   , SEL_AND_ILC_INIT_1(l)
+#define SEL_AND_ILC_INIT_4(l)   SEL_AND_ILC_INIT_2(l)   , SEL_AND_ILC_INIT_2(l)
+#define SEL_AND_ILC_INIT_8(l)   SEL_AND_ILC_INIT_4(l)   , SEL_AND_ILC_INIT_4(l)
+#define SEL_AND_ILC_INIT_16(l)  SEL_AND_ILC_INIT_8(l)   , SEL_AND_ILC_INIT_8(l)
+#define SEL_AND_ILC_INIT_32(l)  SEL_AND_ILC_INIT_16(l)  , SEL_AND_ILC_INIT_16(l)
+#define SEL_AND_ILC_INIT_32(l)  SEL_AND_ILC_INIT_16(l)  , SEL_AND_ILC_INIT_16(l)
+#define SEL_AND_ILC_INIT_64(l)  SEL_AND_ILC_INIT_32(l)  , SEL_AND_ILC_INIT_32(l)
+#define SEL_AND_ILC_INIT_128(l) SEL_AND_ILC_INIT_64(l)  , SEL_AND_ILC_INIT_64(l)
+#define SEL_AND_ILC_INIT_256(l) SEL_AND_ILC_INIT_128(l) , SEL_AND_ILC_INIT_128(l)
+#define nilcs 256
+
+        static struct { OBJ sel; struct inlineCache ilc; } sel_and_ilc[nilcs] = { SEL_AND_ILC_INIT_256(29) };
+
+#undef SEL_AND_ILC_INIT_1
+#undef SEL_AND_ILC_INIT_2
+#undef SEL_AND_ILC_INIT_4
+#undef SEL_AND_ILC_INIT_8
+#undef SEL_AND_ILC_INIT_16
+#undef SEL_AND_ILC_INIT_32
+#undef SEL_AND_ILC_INIT_64
+#undef SEL_AND_ILC_INIT_128
+#undef SEL_AND_ILC_INIT_256
+
         static flip = 0;
-
-        if (sel == last_0) {
-            pIlc = &ilc_0;
-        } else if (sel == last_1) {
-            pIlc = &ilc_1;
-        } else if (sel == last_2) {
-            pIlc = &ilc_2;
-        } else if (sel == last_3) {
-            pIlc = &ilc_3;
-        } else if (sel == last_4) {
-            pIlc = &ilc_4;
-        } else {
-            if (flip == 0) {
-                pIlc = &ilc_0;
-                flip = 1;
-                last_0 = sel;
-            } else if (flip == 1) {
-                pIlc = &ilc_1;
-                flip = 2;
-                last_1 = sel;
-            } else if (flip == 2) {
-                pIlc = &ilc_2;
-                flip = 3;
-                last_2 = sel;
-            } else if (flip == 3) {
-                pIlc = &ilc_3;
-                flip = 4;
-                last_3 = sel;
-            } else {
-                pIlc = &ilc_4;
-                flip = 0;
-                last_4 = sel;
-            }
-            pIlc->ilc_func = __SEND1ADDR__;
-            if (pIlc->ilc_poly) {
-                __flushPolyCache(pIlc->ilc_poly);
-                pIlc->ilc_poly = 0;
-            }
+        int i;
+        for (i = 0; i < nilcs; i++) {
+           if (sel == sel_and_ilc[i].sel) {
+                pIlc = &sel_and_ilc[i].ilc;
+                goto perform1_send_and_return;
+           }       
         }
+        printf("Object >> #perform: #%s with: arg --> no PIC found\n", __symbolVal(aSelector));
+        pIlc = &sel_and_ilc[flip].ilc;
+        sel_and_ilc[flip].sel = sel;
+        flip = (flip + 1) % nilcs;
+        pIlc->ilc_func = __SEND1ADDR__;
+        if (pIlc->ilc_poly) {
+             __flushPolyCache(pIlc->ilc_poly);
+            pIlc->ilc_poly = 0;
+        }
+perform1_send_and_return:
         RETURN ( (*(pIlc->ilc_func))(self, sel, nil, pIlc, arg) );
     } else {
         static struct inlineCache ilc1 = __DUMMYILCSELF1(@line+1);
@@ -5407,35 +5402,56 @@
     "send the two-arg-message aSelector to the receiver"
 
 %{
+    REGISTER OBJ sel = aSelector;
     struct inlineCache *pIlc;
 
     if (InterruptPending == nil) {
-        static struct inlineCache ilc_0 = __ILCPERF2(@line);
-        static struct inlineCache ilc_1 = __ILCPERF2(@line);
-        static OBJ last_0 = nil;
-        static OBJ last_1 = nil;
+
+    /* JV @ 2010-22-07: To improve performance I use 256 ILCs instead
+       of default 4. For details, see comment in perform: */
+
+#define SEL_AND_ILC_INIT_1(l)   { nil , __ILCPERF2(l) }
+#define SEL_AND_ILC_INIT_2(l)   SEL_AND_ILC_INIT_1(l)   , SEL_AND_ILC_INIT_1(l)
+#define SEL_AND_ILC_INIT_4(l)   SEL_AND_ILC_INIT_2(l)   , SEL_AND_ILC_INIT_2(l)
+#define SEL_AND_ILC_INIT_8(l)   SEL_AND_ILC_INIT_4(l)   , SEL_AND_ILC_INIT_4(l)
+#define SEL_AND_ILC_INIT_16(l)  SEL_AND_ILC_INIT_8(l)   , SEL_AND_ILC_INIT_8(l)
+#define SEL_AND_ILC_INIT_32(l)  SEL_AND_ILC_INIT_16(l)  , SEL_AND_ILC_INIT_16(l)
+#define SEL_AND_ILC_INIT_32(l)  SEL_AND_ILC_INIT_16(l)  , SEL_AND_ILC_INIT_16(l)
+#define SEL_AND_ILC_INIT_64(l)  SEL_AND_ILC_INIT_32(l)  , SEL_AND_ILC_INIT_32(l)
+#define SEL_AND_ILC_INIT_128(l) SEL_AND_ILC_INIT_64(l)  , SEL_AND_ILC_INIT_64(l)
+#define SEL_AND_ILC_INIT_256(l) SEL_AND_ILC_INIT_128(l) , SEL_AND_ILC_INIT_128(l)
+#define nilcs 256
+
+        static struct { OBJ sel; struct inlineCache ilc; } sel_and_ilc[nilcs] = { SEL_AND_ILC_INIT_256(29) };
+
+#undef SEL_AND_ILC_INIT_1
+#undef SEL_AND_ILC_INIT_2
+#undef SEL_AND_ILC_INIT_4
+#undef SEL_AND_ILC_INIT_8
+#undef SEL_AND_ILC_INIT_16
+#undef SEL_AND_ILC_INIT_32
+#undef SEL_AND_ILC_INIT_64
+#undef SEL_AND_ILC_INIT_128
+#undef SEL_AND_ILC_INIT_256
+
         static flip = 0;
-
-        if (aSelector == last_0) {
-            pIlc = &ilc_0;
-        } else if (aSelector == last_1) {
-            pIlc = &ilc_1;
-        } else {
-            if (flip == 0) {
-                pIlc = &ilc_0;
-                flip = 1;
-                last_0 = aSelector;
-            } else {
-                pIlc = &ilc_1;
-                flip = 0;
-                last_1 = aSelector;
-            }
-            pIlc->ilc_func = __SEND2ADDR__;
-            if (pIlc->ilc_poly) {
-                __flushPolyCache(pIlc->ilc_poly);
-                pIlc->ilc_poly = 0;
-            }
+        int i;
+        for (i = 0; i < nilcs; i++) {
+           if (sel == sel_and_ilc[i].sel) {
+                pIlc = &sel_and_ilc[i].ilc;
+                goto perform2_send_and_return;
+           }       
         }
+        printf("Object >> #perform: #%s with: with: --> no PIC found\n", __symbolVal(aSelector));
+        pIlc = &sel_and_ilc[flip].ilc;
+        sel_and_ilc[flip].sel = sel;
+        flip = (flip + 1) % nilcs;
+        pIlc->ilc_func = __SEND2ADDR__;
+        if (pIlc->ilc_poly) {
+             __flushPolyCache(pIlc->ilc_poly);
+            pIlc->ilc_poly = 0;
+        }
+perform2_send_and_return:
         RETURN ( (*(pIlc->ilc_func))(self, aSelector, nil, pIlc, arg1, arg2) );
     } else {
         static struct inlineCache ilc2 = __DUMMYILCSELF2(@line+1);
@@ -5471,6 +5487,7 @@
                 }
                 pIlc->ilc_func = __SEND3ADDR__;
                 if (pIlc->ilc_poly) {
+printf("Object >> #perform:with:3 flushing PIC\n");
                     __flushPolyCache(pIlc->ilc_poly);
                     pIlc->ilc_poly = 0;
                 }
@@ -5515,6 +5532,7 @@
                 }
                 pIlc->ilc_func = __SEND4ADDR__;
                 if (pIlc->ilc_poly) {
+printf("Object >> #perform:with:4 flushing PIC\n");
                     __flushPolyCache(pIlc->ilc_poly);
                     pIlc->ilc_poly = 0;
                 }
@@ -5561,6 +5579,7 @@
                 }
                 pIlc->ilc_func = __SEND5ADDR__;
                 if (pIlc->ilc_poly) {
+printf("Object >> #perform:with:5 flushing PIC\n");
                     __flushPolyCache(pIlc->ilc_poly);
                     pIlc->ilc_poly = 0;
                 }
@@ -5608,6 +5627,7 @@
                 }
                 pIlc->ilc_func = __SEND6ADDR__;
                 if (pIlc->ilc_poly) {
+printf("Object >> #perform:with:6 flushing PIC\n");
                     __flushPolyCache(pIlc->ilc_poly);
                     pIlc->ilc_poly = 0;
                 }
@@ -9315,7 +9335,7 @@
 !Object class methodsFor:'documentation'!
 
 version
-    ^ '$Id: Object.st 10551 2010-07-21 15:52:22Z vranyj1 $'
+    ^ '$Id: Object.st 10553 2010-07-22 12:32:34Z vranyj1 $'
 !
 
 version_CVS
@@ -9323,7 +9343,7 @@
 !
 
 version_SVN
-    ^ '$Id: Object.st 10551 2010-07-21 15:52:22Z vranyj1 $'
+    ^ '$Id: Object.st 10553 2010-07-22 12:32:34Z vranyj1 $'
 ! !
 
 Object initialize!
--- a/stx_libbasic.st	Wed Jul 21 19:58:43 2010 +0100
+++ b/stx_libbasic.st	Thu Jul 22 13:32:34 2010 +0100
@@ -534,13 +534,13 @@
     "Return a SVN revision number of myself.
      This number is updated after a commit"
 
-    ^ "$SVN-Revision:"'10551M'"$"
+    ^ "$SVN-Revision:"'10551:10552M'"$"
 ! !
 
 !stx_libbasic class methodsFor:'documentation'!
 
 version
-    ^ '$Id: stx_libbasic.st 10552 2010-07-21 18:58:43Z vranyj1 $'
+    ^ '$Id: stx_libbasic.st 10553 2010-07-22 12:32:34Z vranyj1 $'
 !
 
 version_CVS
@@ -548,5 +548,5 @@
 !
 
 version_SVN
-    ^ '$Id: stx_libbasic.st 10552 2010-07-21 18:58:43Z vranyj1 $'
+    ^ '$Id: stx_libbasic.st 10553 2010-07-22 12:32:34Z vranyj1 $'
 ! !