large + small
authorClaus Gittinger <cg@exept.de>
Sun, 06 Jun 1999 20:53:07 +0200
changeset 4281 2acfd4c8b638
parent 4280 f100fa94428f
child 4282 fc1a9c630c92
large + small
LargeInt.st
LargeInteger.st
--- a/LargeInt.st	Sun Jun 06 15:19:19 1999 +0200
+++ b/LargeInt.st	Sun Jun 06 20:53:07 1999 +0200
@@ -2166,8 +2166,7 @@
         unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
         INT __ptrDelta = __dst - __src;
         unsigned char *__srcLast = __src + __len - 1;
-        unsigned char *__dstLast = __dst + __intVal(rsltLen) - 1;
-        unsigned char *__srcLast4;
+        int __rsltLen = __intVal(rsltLen);
 
         if (__carry < 0) {
             __carry = -__carry;
@@ -2175,98 +2174,208 @@
 
 #if defined(__LSBFIRST) || defined(i386) || defined(alpha)
 # if defined(i386) && defined(__GNUC__)
+#  if 0 /* NOTICE - the code below is 20% slower ... - why */
         /*
          * add long-wise
          */
-        __srcLast4 = __srcLast - 3;
-        while (__src <= __srcLast4) {
-            unsigned int __sum;
-
-            asm ("addl %%edx,%%eax      \n
-                  movl $0,%%edx         \n
-                  adcl $0,%%edx"    
-                    : "=d"  ((unsigned long)(__carry)),
-                      "=a"  ((unsigned long)(__sum))
-                    : "0"   ((unsigned long)(__carry)),
-                      "1"   ((unsigned long)(((unsigned *)__src)[0])) );
-
-            ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
-            __src += 4;  
-
-            if (__carry == 0) {
-                while (__src <= __srcLast4) {
-                    /* copy over words */
-                    ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
-                    __src += 4;
+        asm("  jecxz nothingToDo     
+               movl  %%eax, %%esi      /* __src input */
+               movl  %%ebx, %%edi      /* __dst input */
+
+               /* the first 4-byte int */
+               lodsl                   /* fetch */
+               addl  %%edx, %%eax      /* add */
+               stosl                   /* store */
+               leal  -1(%%ecx),%%ecx   /* do not clobber carry */
+               jecxz doneLoop          /* any more ? */
+               /* remaining 4-byte ints */
+               jmp   addLoop
+
+               .align 8
+             addLoop:
+               movl  0(%%esi), %%ebx   /* fetch  */
+               jnc   copyLoop2
+               movl  $0, %%eax 
+               leal  4(%%esi), %%esi   
+               adcl  %%ebx, %%eax      /* & add carry from prev int */
+               leal  8(%%edi), %%edi   
+               movl  %%eax, -8(%%edi)  /* store */
+               leal  -1(%%ecx),%%ecx   /* do not clobber carry */
+               jecxz doneLoop          /* any more ? */
+
+               movl  0(%%esi), %%ebx   /* fetch  */
+               movl  $0, %%eax 
+               leal  4(%%esi), %%esi   
+               adcl  %%ebx, %%eax      /* & add carry from prev int */
+               movl  %%eax, -4(%%edi)  /* store */
+
+               loop  addLoop
+               jmp   doneLoop
+
+               .align 8
+             copyLoop:
+               movl  0(%%esi), %%ebx   
+             copyLoop2:
+               add   $4, %%esi   
+               add   $4, %%edi   
+               movl  %%ebx, -4(%%edi)   
+               loop  copyLoop
+
+             doneLoop:
+               movl  $0, %%edx         /* do not clobber carry (xorl clears it) */
+               adcl  $0, %%edx
+               movl  %%esi, %%eax      /* __src output */
+             nothingToDo:           
+
+            " : "=d"  ((unsigned long)(__carry)),
+                "=a"  (__src)
+              : "1"   (__src),
+                "b"   (__dst),
+                "c"   (__len / 4),
+                "0"   (__carry)
+              : "esi", "edi");
+
+#  else
+        {
+            unsigned char *__srcLast4;
+            unsigned char *__srcLast8;
+
+            __srcLast8 = __srcLast - 3 - 4;
+            while (__src <= __srcLast8) {
+                unsigned int __sum;
+
+                asm ("addl %%edx,%%eax      \n
+                      movl $0,%%edx         \n
+                      adcl $0,%%edx"    
+                        : "=d"  ((unsigned long)(__carry)),
+                          "=a"  ((unsigned long)(__sum))
+                        : "0"   ((unsigned long)(__carry)),
+                          "1"   ((unsigned long)(((unsigned *)__src)[0])) );
+
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+
+                asm ("addl %%edx,%%eax      \n
+                      movl $0,%%edx         \n
+                      adcl $0,%%edx"    
+                        : "=d"  ((unsigned long)(__carry)),
+                          "=a"  ((unsigned long)(__sum))
+                        : "0"   ((unsigned long)(__carry)),
+                          "1"   ((unsigned long)(((unsigned *)__src)[1])) );
+
+                ((unsigned int *)(__src + __ptrDelta))[1] = __sum;
+
+                __src += 8;  
+
+                if (__carry == 0) {
+                    while (__src <= __srcLast8) {
+                        /* copy over words */
+                        ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+                        ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
+                        __src += 8;
+                    }
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-                while (__src <= __srcLast) {
-                    /* copy over bytes */
-                    __src[__ptrDelta] = __src[0];
-                    __src ++;
+            }
+
+            __srcLast4 = __srcLast - 3;
+            if (__src <= __srcLast4) {
+                unsigned int __sum;
+
+                asm ("addl %%edx,%%eax      \n
+                      movl $0,%%edx         \n
+                      adcl $0,%%edx"    
+                        : "=d"  ((unsigned long)(__carry)),
+                          "=a"  ((unsigned long)(__sum))
+                        : "0"   ((unsigned long)(__carry)),
+                          "1"   ((unsigned long)(((unsigned *)__src)[0])) );
+
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+                __src += 4;  
+
+                if (__carry == 0) {
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-                goto doneSource;
             }
         }
+#  endif
 # else 
 #  if defined(i386) && defined(WIN32)
-        /*
-         * add long-wise
-         */
-        __srcLast4 = __srcLast - 3;
-        while (__src <= __srcLast4) {
-            unsigned int __sum;
-
-            __sum = ((unsigned int *)__src)[0];    
-            asm {
-                  mov eax, __sum
-                  add eax, __carry
-                  mov edx, 0
-                  adc edx, 0
-                  mov __sum, eax
-                  mov __carry, edx
+        {
+            unsigned char *__srcLast4;
+
+            /*
+             * add long-wise
+             */
+            __srcLast4 = __srcLast - 3;
+            while (__src <= __srcLast4) {
+                unsigned int __sum;
+
+                __sum = ((unsigned int *)__src)[0];    
+                asm {
+                      mov eax, __sum
+                      add eax, __carry
+                      mov edx, 0
+                      adc edx, 0
+                      mov __sum, eax
+                      mov __carry, edx
+                    }
+
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+                __src += 4;  
+                if (__carry == 0) {
+                    while (__src <= __srcLast4) {
+                        /* copy over words */
+                        ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+                        __src += 4;
+                    }
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-
-            ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
-            __src += 4;  
-            if (__carry == 0) {
-                while (__src <= __srcLast4) {
-                    /* copy over words */
-                    ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
-                    __src += 4;
-                }
-                while (__src <= __srcLast) {
-                    /* copy over bytes */
-                    __src[__ptrDelta] = __src[0];
-                    __src ++;
-                }
-                goto doneSource;
             }
         }
 #  else 
 #   ifdef alpha64
-        /*
-         * add long-wise
-         */
-        __srcLast4 = __srcLast - 3;
-        while (__src <= __srcLast4) {
-            unsigned INT __sum;
-
-            __sum = ((unsigned int *)__src)[0] + __carry;
-            ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
-            __src += 4;  
-            __carry = __sum >> 32;
-            if (__carry == 0) {
-                while (__src <= __srcLast4) {
-                    /* copy over words */
-                    ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
-                    __src += 4;
+        {
+            unsigned char *__srcLast4;
+
+            /*
+             * add long-wise
+             */
+            __srcLast4 = __srcLast - 3;
+            while (__src <= __srcLast4) {
+                unsigned INT __sum;
+
+                __sum = ((unsigned int *)__src)[0] + __carry;
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
+                __src += 4;  
+                __carry = __sum >> 32;
+                if (__carry == 0) {
+                    while (__src <= __srcLast4) {
+                        /* copy over words */
+                        ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+                        __src += 4;
+                    }
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-                while (__src <= __srcLast) {
-                    /* copy over bytes */
-                    __src[__ptrDelta] = __src[0];
-                    __src ++;
-                }
-                goto doneSource;
             }
         }
 #   endif /* alpha64 */
@@ -2298,14 +2407,15 @@
 #endif /* __LSBFIRST */
 
     doneSource: ;
-        __dst = __src + __ptrDelta;
-        while (__dst <= __dstLast) {
-            __dst[0] = __carry /* & 0xFF */;
-            __carry >>= 8;
-            __dst++;
+        /*
+         * now, at most one other byte is to be stored ...
+         */
+        if (__len < __rsltLen) {
+            __src[__ptrDelta] = __carry /* & 0xFF */;
+            __src++;
         }
 
-        if (__dst[-1]) {      /* lastDigit */
+        if (__src[__ptrDelta-1]) {      /* lastDigit */
             RETURN (result);
         }
         ok = true;
@@ -3888,5 +3998,5 @@
 !LargeInteger class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/Attic/LargeInt.st,v 1.124 1999-06-05 13:04:37 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/Attic/LargeInt.st,v 1.125 1999-06-06 18:53:07 cg Exp $'
 ! !
--- a/LargeInteger.st	Sun Jun 06 15:19:19 1999 +0200
+++ b/LargeInteger.st	Sun Jun 06 20:53:07 1999 +0200
@@ -2166,8 +2166,7 @@
         unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
         INT __ptrDelta = __dst - __src;
         unsigned char *__srcLast = __src + __len - 1;
-        unsigned char *__dstLast = __dst + __intVal(rsltLen) - 1;
-        unsigned char *__srcLast4;
+        int __rsltLen = __intVal(rsltLen);
 
         if (__carry < 0) {
             __carry = -__carry;
@@ -2175,98 +2174,208 @@
 
 #if defined(__LSBFIRST) || defined(i386) || defined(alpha)
 # if defined(i386) && defined(__GNUC__)
+#  if 0 /* NOTICE - the code below is 20% slower ... - why */
         /*
          * add long-wise
          */
-        __srcLast4 = __srcLast - 3;
-        while (__src <= __srcLast4) {
-            unsigned int __sum;
-
-            asm ("addl %%edx,%%eax      \n
-                  movl $0,%%edx         \n
-                  adcl $0,%%edx"    
-                    : "=d"  ((unsigned long)(__carry)),
-                      "=a"  ((unsigned long)(__sum))
-                    : "0"   ((unsigned long)(__carry)),
-                      "1"   ((unsigned long)(((unsigned *)__src)[0])) );
-
-            ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
-            __src += 4;  
-
-            if (__carry == 0) {
-                while (__src <= __srcLast4) {
-                    /* copy over words */
-                    ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
-                    __src += 4;
+        asm("  jecxz nothingToDo     
+               movl  %%eax, %%esi      /* __src input */
+               movl  %%ebx, %%edi      /* __dst input */
+
+               /* the first 4-byte int */
+               lodsl                   /* fetch */
+               addl  %%edx, %%eax      /* add */
+               stosl                   /* store */
+               leal  -1(%%ecx),%%ecx   /* do not clobber carry */
+               jecxz doneLoop          /* any more ? */
+               /* remaining 4-byte ints */
+               jmp   addLoop
+
+               .align 8
+             addLoop:
+               movl  0(%%esi), %%ebx   /* fetch  */
+               jnc   copyLoop2
+               movl  $0, %%eax 
+               leal  4(%%esi), %%esi   
+               adcl  %%ebx, %%eax      /* & add carry from prev int */
+               leal  8(%%edi), %%edi   
+               movl  %%eax, -8(%%edi)  /* store */
+               leal  -1(%%ecx),%%ecx   /* do not clobber carry */
+               jecxz doneLoop          /* any more ? */
+
+               movl  0(%%esi), %%ebx   /* fetch  */
+               movl  $0, %%eax 
+               leal  4(%%esi), %%esi   
+               adcl  %%ebx, %%eax      /* & add carry from prev int */
+               movl  %%eax, -4(%%edi)  /* store */
+
+               loop  addLoop
+               jmp   doneLoop
+
+               .align 8
+             copyLoop:
+               movl  0(%%esi), %%ebx   
+             copyLoop2:
+               add   $4, %%esi   
+               add   $4, %%edi   
+               movl  %%ebx, -4(%%edi)   
+               loop  copyLoop
+
+             doneLoop:
+               movl  $0, %%edx         /* do not clobber carry (xorl clears it) */
+               adcl  $0, %%edx
+               movl  %%esi, %%eax      /* __src output */
+             nothingToDo:           
+
+            " : "=d"  ((unsigned long)(__carry)),
+                "=a"  (__src)
+              : "1"   (__src),
+                "b"   (__dst),
+                "c"   (__len / 4),
+                "0"   (__carry)
+              : "esi", "edi");
+
+#  else
+        {
+            unsigned char *__srcLast4;
+            unsigned char *__srcLast8;
+
+            __srcLast8 = __srcLast - 3 - 4;
+            while (__src <= __srcLast8) {
+                unsigned int __sum;
+
+                asm ("addl %%edx,%%eax      \n
+                      movl $0,%%edx         \n
+                      adcl $0,%%edx"    
+                        : "=d"  ((unsigned long)(__carry)),
+                          "=a"  ((unsigned long)(__sum))
+                        : "0"   ((unsigned long)(__carry)),
+                          "1"   ((unsigned long)(((unsigned *)__src)[0])) );
+
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+
+                asm ("addl %%edx,%%eax      \n
+                      movl $0,%%edx         \n
+                      adcl $0,%%edx"    
+                        : "=d"  ((unsigned long)(__carry)),
+                          "=a"  ((unsigned long)(__sum))
+                        : "0"   ((unsigned long)(__carry)),
+                          "1"   ((unsigned long)(((unsigned *)__src)[1])) );
+
+                ((unsigned int *)(__src + __ptrDelta))[1] = __sum;
+
+                __src += 8;  
+
+                if (__carry == 0) {
+                    while (__src <= __srcLast8) {
+                        /* copy over words */
+                        ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+                        ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
+                        __src += 8;
+                    }
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-                while (__src <= __srcLast) {
-                    /* copy over bytes */
-                    __src[__ptrDelta] = __src[0];
-                    __src ++;
+            }
+
+            __srcLast4 = __srcLast - 3;
+            if (__src <= __srcLast4) {
+                unsigned int __sum;
+
+                asm ("addl %%edx,%%eax      \n
+                      movl $0,%%edx         \n
+                      adcl $0,%%edx"    
+                        : "=d"  ((unsigned long)(__carry)),
+                          "=a"  ((unsigned long)(__sum))
+                        : "0"   ((unsigned long)(__carry)),
+                          "1"   ((unsigned long)(((unsigned *)__src)[0])) );
+
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+                __src += 4;  
+
+                if (__carry == 0) {
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-                goto doneSource;
             }
         }
+#  endif
 # else 
 #  if defined(i386) && defined(WIN32)
-        /*
-         * add long-wise
-         */
-        __srcLast4 = __srcLast - 3;
-        while (__src <= __srcLast4) {
-            unsigned int __sum;
-
-            __sum = ((unsigned int *)__src)[0];    
-            asm {
-                  mov eax, __sum
-                  add eax, __carry
-                  mov edx, 0
-                  adc edx, 0
-                  mov __sum, eax
-                  mov __carry, edx
+        {
+            unsigned char *__srcLast4;
+
+            /*
+             * add long-wise
+             */
+            __srcLast4 = __srcLast - 3;
+            while (__src <= __srcLast4) {
+                unsigned int __sum;
+
+                __sum = ((unsigned int *)__src)[0];    
+                asm {
+                      mov eax, __sum
+                      add eax, __carry
+                      mov edx, 0
+                      adc edx, 0
+                      mov __sum, eax
+                      mov __carry, edx
+                    }
+
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+                __src += 4;  
+                if (__carry == 0) {
+                    while (__src <= __srcLast4) {
+                        /* copy over words */
+                        ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+                        __src += 4;
+                    }
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-
-            ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
-            __src += 4;  
-            if (__carry == 0) {
-                while (__src <= __srcLast4) {
-                    /* copy over words */
-                    ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
-                    __src += 4;
-                }
-                while (__src <= __srcLast) {
-                    /* copy over bytes */
-                    __src[__ptrDelta] = __src[0];
-                    __src ++;
-                }
-                goto doneSource;
             }
         }
 #  else 
 #   ifdef alpha64
-        /*
-         * add long-wise
-         */
-        __srcLast4 = __srcLast - 3;
-        while (__src <= __srcLast4) {
-            unsigned INT __sum;
-
-            __sum = ((unsigned int *)__src)[0] + __carry;
-            ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
-            __src += 4;  
-            __carry = __sum >> 32;
-            if (__carry == 0) {
-                while (__src <= __srcLast4) {
-                    /* copy over words */
-                    ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
-                    __src += 4;
+        {
+            unsigned char *__srcLast4;
+
+            /*
+             * add long-wise
+             */
+            __srcLast4 = __srcLast - 3;
+            while (__src <= __srcLast4) {
+                unsigned INT __sum;
+
+                __sum = ((unsigned int *)__src)[0] + __carry;
+                ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
+                __src += 4;  
+                __carry = __sum >> 32;
+                if (__carry == 0) {
+                    while (__src <= __srcLast4) {
+                        /* copy over words */
+                        ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+                        __src += 4;
+                    }
+                    while (__src <= __srcLast) {
+                        /* copy over bytes */
+                        __src[__ptrDelta] = __src[0];
+                        __src ++;
+                    }
+                    goto doneSource;
                 }
-                while (__src <= __srcLast) {
-                    /* copy over bytes */
-                    __src[__ptrDelta] = __src[0];
-                    __src ++;
-                }
-                goto doneSource;
             }
         }
 #   endif /* alpha64 */
@@ -2298,14 +2407,15 @@
 #endif /* __LSBFIRST */
 
     doneSource: ;
-        __dst = __src + __ptrDelta;
-        while (__dst <= __dstLast) {
-            __dst[0] = __carry /* & 0xFF */;
-            __carry >>= 8;
-            __dst++;
+        /*
+         * now, at most one other byte is to be stored ...
+         */
+        if (__len < __rsltLen) {
+            __src[__ptrDelta] = __carry /* & 0xFF */;
+            __src++;
         }
 
-        if (__dst[-1]) {      /* lastDigit */
+        if (__src[__ptrDelta-1]) {      /* lastDigit */
             RETURN (result);
         }
         ok = true;
@@ -3888,5 +3998,5 @@
 !LargeInteger class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/LargeInteger.st,v 1.124 1999-06-05 13:04:37 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/LargeInteger.st,v 1.125 1999-06-06 18:53:07 cg Exp $'
 ! !