--- a/LargeInt.st Sun Jun 06 15:19:19 1999 +0200
+++ b/LargeInt.st Sun Jun 06 20:53:07 1999 +0200
@@ -2166,8 +2166,7 @@
unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
INT __ptrDelta = __dst - __src;
unsigned char *__srcLast = __src + __len - 1;
- unsigned char *__dstLast = __dst + __intVal(rsltLen) - 1;
- unsigned char *__srcLast4;
+ int __rsltLen = __intVal(rsltLen);
if (__carry < 0) {
__carry = -__carry;
@@ -2175,98 +2174,208 @@
#if defined(__LSBFIRST) || defined(i386) || defined(alpha)
# if defined(i386) && defined(__GNUC__)
+# if 0 /* NOTICE - the code below is 20% slower ... - why */
/*
* add long-wise
*/
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned int __sum;
-
- asm ("addl %%edx,%%eax \n
- movl $0,%%edx \n
- adcl $0,%%edx"
- : "=d" ((unsigned long)(__carry)),
- "=a" ((unsigned long)(__sum))
- : "0" ((unsigned long)(__carry)),
- "1" ((unsigned long)(((unsigned *)__src)[0])) );
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
-
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
+ asm(" jecxz nothingToDo
+ movl %%eax, %%esi /* __src input */
+ movl %%ebx, %%edi /* __dst input */
+
+ /* the first 4-byte int */
+ lodsl /* fetch */
+ addl %%edx, %%eax /* add */
+ stosl /* store */
+ leal -1(%%ecx),%%ecx /* do not clobber carry */
+ jecxz doneLoop /* any more ? */
+ /* remaining 4-byte ints */
+ jmp addLoop
+
+ .align 8
+ addLoop:
+ movl 0(%%esi), %%ebx /* fetch */
+ jnc copyLoop2
+ movl $0, %%eax
+ leal 4(%%esi), %%esi
+ adcl %%ebx, %%eax /* & add carry from prev int */
+ leal 8(%%edi), %%edi
+ movl %%eax, -8(%%edi) /* store */
+ leal -1(%%ecx),%%ecx /* do not clobber carry */
+ jecxz doneLoop /* any more ? */
+
+ movl 0(%%esi), %%ebx /* fetch */
+ movl $0, %%eax
+ leal 4(%%esi), %%esi
+ adcl %%ebx, %%eax /* & add carry from prev int */
+ movl %%eax, -4(%%edi) /* store */
+
+ loop addLoop
+ jmp doneLoop
+
+ .align 8
+ copyLoop:
+ movl 0(%%esi), %%ebx
+ copyLoop2:
+ add $4, %%esi
+ add $4, %%edi
+ movl %%ebx, -4(%%edi)
+ loop copyLoop
+
+ doneLoop:
+ movl $0, %%edx /* do not clobber carry (xorl clears it) */
+ adcl $0, %%edx
+ movl %%esi, %%eax /* __src output */
+ nothingToDo:
+
+ " : "=d" ((unsigned long)(__carry)),
+ "=a" (__src)
+ : "1" (__src),
+ "b" (__dst),
+ "c" (__len / 4),
+ "0" (__carry)
+ : "esi", "edi");
+
+# else
+ {
+ unsigned char *__srcLast4;
+ unsigned char *__srcLast8;
+
+ __srcLast8 = __srcLast - 3 - 4;
+ while (__src <= __srcLast8) {
+ unsigned int __sum;
+
+ asm ("addl %%edx,%%eax \n
+ movl $0,%%edx \n
+ adcl $0,%%edx"
+ : "=d" ((unsigned long)(__carry)),
+ "=a" ((unsigned long)(__sum))
+ : "0" ((unsigned long)(__carry)),
+ "1" ((unsigned long)(((unsigned *)__src)[0])) );
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+
+ asm ("addl %%edx,%%eax \n
+ movl $0,%%edx \n
+ adcl $0,%%edx"
+ : "=d" ((unsigned long)(__carry)),
+ "=a" ((unsigned long)(__sum))
+ : "0" ((unsigned long)(__carry)),
+ "1" ((unsigned long)(((unsigned *)__src)[1])) );
+
+ ((unsigned int *)(__src + __ptrDelta))[1] = __sum;
+
+ __src += 8;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast8) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
+ __src += 8;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
+ }
+
+ __srcLast4 = __srcLast - 3;
+ if (__src <= __srcLast4) {
+ unsigned int __sum;
+
+ asm ("addl %%edx,%%eax \n
+ movl $0,%%edx \n
+ adcl $0,%%edx"
+ : "=d" ((unsigned long)(__carry)),
+ "=a" ((unsigned long)(__sum))
+ : "0" ((unsigned long)(__carry)),
+ "1" ((unsigned long)(((unsigned *)__src)[0])) );
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
- goto doneSource;
}
}
+# endif
# else
# if defined(i386) && defined(WIN32)
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned int __sum;
-
- __sum = ((unsigned int *)__src)[0];
- asm {
- mov eax, __sum
- add eax, __carry
- mov edx, 0
- adc edx, 0
- mov __sum, eax
- mov __carry, edx
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned int __sum;
+
+ __sum = ((unsigned int *)__src)[0];
+ asm {
+ mov eax, __sum
+ add eax, __carry
+ mov edx, 0
+ adc edx, 0
+ mov __sum, eax
+ mov __carry, edx
+ }
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
}
}
# else
# ifdef alpha64
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned INT __sum;
-
- __sum = ((unsigned int *)__src)[0] + __carry;
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
- __src += 4;
- __carry = __sum >> 32;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned INT __sum;
+
+ __sum = ((unsigned int *)__src)[0] + __carry;
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
+ __src += 4;
+ __carry = __sum >> 32;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
}
}
# endif /* alpha64 */
@@ -2298,14 +2407,15 @@
#endif /* __LSBFIRST */
doneSource: ;
- __dst = __src + __ptrDelta;
- while (__dst <= __dstLast) {
- __dst[0] = __carry /* & 0xFF */;
- __carry >>= 8;
- __dst++;
+ /*
+ * now, at most one other byte is to be stored ...
+ */
+ if (__len < __rsltLen) {
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __src++;
}
- if (__dst[-1]) { /* lastDigit */
+ if (__src[__ptrDelta-1]) { /* lastDigit */
RETURN (result);
}
ok = true;
@@ -3888,5 +3998,5 @@
!LargeInteger class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/Attic/LargeInt.st,v 1.124 1999-06-05 13:04:37 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/Attic/LargeInt.st,v 1.125 1999-06-06 18:53:07 cg Exp $'
! !
--- a/LargeInteger.st Sun Jun 06 15:19:19 1999 +0200
+++ b/LargeInteger.st Sun Jun 06 20:53:07 1999 +0200
@@ -2166,8 +2166,7 @@
unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
INT __ptrDelta = __dst - __src;
unsigned char *__srcLast = __src + __len - 1;
- unsigned char *__dstLast = __dst + __intVal(rsltLen) - 1;
- unsigned char *__srcLast4;
+ int __rsltLen = __intVal(rsltLen);
if (__carry < 0) {
__carry = -__carry;
@@ -2175,98 +2174,208 @@
#if defined(__LSBFIRST) || defined(i386) || defined(alpha)
# if defined(i386) && defined(__GNUC__)
+# if 0 /* NOTICE - the code below is 20% slower ... - why */
/*
* add long-wise
*/
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned int __sum;
-
- asm ("addl %%edx,%%eax \n
- movl $0,%%edx \n
- adcl $0,%%edx"
- : "=d" ((unsigned long)(__carry)),
- "=a" ((unsigned long)(__sum))
- : "0" ((unsigned long)(__carry)),
- "1" ((unsigned long)(((unsigned *)__src)[0])) );
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
-
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
+ asm(" jecxz nothingToDo
+ movl %%eax, %%esi /* __src input */
+ movl %%ebx, %%edi /* __dst input */
+
+ /* the first 4-byte int */
+ lodsl /* fetch */
+ addl %%edx, %%eax /* add */
+ stosl /* store */
+ leal -1(%%ecx),%%ecx /* do not clobber carry */
+ jecxz doneLoop /* any more ? */
+ /* remaining 4-byte ints */
+ jmp addLoop
+
+ .align 8
+ addLoop:
+ movl 0(%%esi), %%ebx /* fetch */
+ jnc copyLoop2
+ movl $0, %%eax
+ leal 4(%%esi), %%esi
+ adcl %%ebx, %%eax /* & add carry from prev int */
+ leal 8(%%edi), %%edi
+ movl %%eax, -8(%%edi) /* store */
+ leal -1(%%ecx),%%ecx /* do not clobber carry */
+ jecxz doneLoop /* any more ? */
+
+ movl 0(%%esi), %%ebx /* fetch */
+ movl $0, %%eax
+ leal 4(%%esi), %%esi
+ adcl %%ebx, %%eax /* & add carry from prev int */
+ movl %%eax, -4(%%edi) /* store */
+
+ loop addLoop
+ jmp doneLoop
+
+ .align 8
+ copyLoop:
+ movl 0(%%esi), %%ebx
+ copyLoop2:
+ add $4, %%esi
+ add $4, %%edi
+ movl %%ebx, -4(%%edi)
+ loop copyLoop
+
+ doneLoop:
+ movl $0, %%edx /* do not clobber carry (xorl clears it) */
+ adcl $0, %%edx
+ movl %%esi, %%eax /* __src output */
+ nothingToDo:
+
+ " : "=d" ((unsigned long)(__carry)),
+ "=a" (__src)
+ : "1" (__src),
+ "b" (__dst),
+ "c" (__len / 4),
+ "0" (__carry)
+ : "esi", "edi");
+
+# else
+ {
+ unsigned char *__srcLast4;
+ unsigned char *__srcLast8;
+
+ __srcLast8 = __srcLast - 3 - 4;
+ while (__src <= __srcLast8) {
+ unsigned int __sum;
+
+ asm ("addl %%edx,%%eax \n
+ movl $0,%%edx \n
+ adcl $0,%%edx"
+ : "=d" ((unsigned long)(__carry)),
+ "=a" ((unsigned long)(__sum))
+ : "0" ((unsigned long)(__carry)),
+ "1" ((unsigned long)(((unsigned *)__src)[0])) );
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+
+ asm ("addl %%edx,%%eax \n
+ movl $0,%%edx \n
+ adcl $0,%%edx"
+ : "=d" ((unsigned long)(__carry)),
+ "=a" ((unsigned long)(__sum))
+ : "0" ((unsigned long)(__carry)),
+ "1" ((unsigned long)(((unsigned *)__src)[1])) );
+
+ ((unsigned int *)(__src + __ptrDelta))[1] = __sum;
+
+ __src += 8;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast8) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
+ __src += 8;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
+ }
+
+ __srcLast4 = __srcLast - 3;
+ if (__src <= __srcLast4) {
+ unsigned int __sum;
+
+ asm ("addl %%edx,%%eax \n
+ movl $0,%%edx \n
+ adcl $0,%%edx"
+ : "=d" ((unsigned long)(__carry)),
+ "=a" ((unsigned long)(__sum))
+ : "0" ((unsigned long)(__carry)),
+ "1" ((unsigned long)(((unsigned *)__src)[0])) );
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
- goto doneSource;
}
}
+# endif
# else
# if defined(i386) && defined(WIN32)
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned int __sum;
-
- __sum = ((unsigned int *)__src)[0];
- asm {
- mov eax, __sum
- add eax, __carry
- mov edx, 0
- adc edx, 0
- mov __sum, eax
- mov __carry, edx
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned int __sum;
+
+ __sum = ((unsigned int *)__src)[0];
+ asm {
+ mov eax, __sum
+ add eax, __carry
+ mov edx, 0
+ adc edx, 0
+ mov __sum, eax
+ mov __carry, edx
+ }
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
}
}
# else
# ifdef alpha64
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned INT __sum;
-
- __sum = ((unsigned int *)__src)[0] + __carry;
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
- __src += 4;
- __carry = __sum >> 32;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned INT __sum;
+
+ __sum = ((unsigned int *)__src)[0] + __carry;
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
+ __src += 4;
+ __carry = __sum >> 32;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
}
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
}
}
# endif /* alpha64 */
@@ -2298,14 +2407,15 @@
#endif /* __LSBFIRST */
doneSource: ;
- __dst = __src + __ptrDelta;
- while (__dst <= __dstLast) {
- __dst[0] = __carry /* & 0xFF */;
- __carry >>= 8;
- __dst++;
+ /*
+ * now, at most one other byte is to be stored ...
+ */
+ if (__len < __rsltLen) {
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __src++;
}
- if (__dst[-1]) { /* lastDigit */
+ if (__src[__ptrDelta-1]) { /* lastDigit */
RETURN (result);
}
ok = true;
@@ -3888,5 +3998,5 @@
!LargeInteger class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/LargeInteger.st,v 1.124 1999-06-05 13:04:37 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/LargeInteger.st,v 1.125 1999-06-06 18:53:07 cg Exp $'
! !