--- a/LargeInteger.st Thu Sep 22 15:11:47 2016 +0200
+++ b/LargeInteger.st Thu Sep 22 15:12:19 2016 +0200
@@ -315,8 +315,6 @@
"Modified: / 8.5.1998 / 21:40:41 / cg"
! !
-
-
!LargeInteger class methodsFor:'queries'!
isBuiltInClass
@@ -499,14 +497,14 @@
The result is truncated toward negative infinity
and will be negative, if the operands signs differ.
The following is always true:
- (receiver // aNumber) * aNumber + (receiver \\ aNumber) = receiver
+ (receiver // aNumber) * aNumber + (receiver \\ aNumber) = receiver
Be careful with negative results: 9 // 4 -> 2, while -9 // 4 -> -3.
Especially surprising (because of truncation toward negative infinity):
- -1 // 10 -> -1 (because -(1/10) is truncated towards next smaller integer, which is -1.
- -10 // 3 -> -4 (because -(10/3) is truncated towards next smaller integer, which is -4.
-
- See #quo: which truncates toward zero and returns -2 in the above case
+ -1 // 10 -> -1 (because -(1/10) is truncated towards next smaller integer, which is -1.
+ -10 // 3 -> -4 (because -(10/3) is truncated towards next smaller integer, which is -4.
+
+ See #quo: which truncates toward zero and returns -2 in the above case
and #rem: which is the corresponding remainder."
|nrClass divMod quo|
@@ -518,19 +516,19 @@
Use a special method for this case ...
"
((nrClass == SmallInteger) or:[ nrClass == self class]) ifFalse:[
- ^ self retry:#// coercing:aNumber
+ ^ self retry:#// coercing:aNumber
].
divMod := self absDivMod:aNumber.
quo := divMod at:1.
(sign == aNumber sign) ifFalse:[
- "/ adjust for truncation if negative and there is a remainder ...
- "/ be careful: there is one special case to care for here:
- "/ if quo is maxInt+1, the negation can be represented as a smallInt.
- quo := quo setSign:-1.
- (divMod at:2) == 0 ifFalse:[
- ^ quo - 1
- ].
+ "/ adjust for truncation if negative and there is a remainder ...
+ "/ be careful: there is one special case to care for here:
+ "/ if quo is maxInt+1, the negation can be represented as a smallInt.
+ quo := quo setSign:-1.
+ (divMod at:2) == 0 ifFalse:[
+ ^ quo - 1
+ ].
"/ quo digitLength == SmallInteger maxBytes ifTrue:[
"/ ^ quo compressed
"/ ].
@@ -799,7 +797,7 @@
truncates toward negative infinity).
The result's sign is negative if the receiver has a sign different from the arg's sign.
The following is always true:
- (receiver quo: aNumber) * aNumber + (receiver rem: aNumber) = receiver
+ (receiver quo: aNumber) * aNumber + (receiver rem: aNumber) = receiver
For positive results, this is the same as #//,
for negative results, the remainder is ignored.
I.e.: '9 // 4 = 2' and '-9 // 4 = -3'
@@ -809,12 +807,12 @@
nrClass := aNumber class.
((nrClass == SmallInteger) or:[ nrClass == self class] ) ifFalse:[
- ^ self retry:#quo: coercing:aNumber
+ ^ self retry:#quo: coercing:aNumber
].
quo := (self absDivMod:aNumber) at:1.
(sign == aNumber sign) ifTrue:[
- ^ quo
+ ^ quo
].
^ quo setSign:-1
@@ -1676,7 +1674,7 @@
!LargeInteger methodsFor:'coercing & converting'!
asLargeInteger
- "return a LargeInteger with same value as myself - thats me"
+ "return a LargeInteger with same value as myself - that's me"
^ self
!
@@ -2338,60 +2336,60 @@
num := anInteger abs.
SmallInteger maxBytes == 8 ifTrue:[
- (num > 16rFFFFFFFFFF) ifTrue:[
- "if num is too big (so that multiplying by a byte could create a Large)"
- ^ anInteger retry:#* coercing:self
- ].
+ (num > 16rFFFFFFFFFF) ifTrue:[
+ "if num is too big (so that multiplying by a byte could create a Large)"
+ ^ anInteger retry:#* coercing:self
+ ].
] ifFalse:[
- (num > 16r3FFFFF) ifTrue:[
- "if num is too big (so that multiplying by a byte could create a Large)"
- ^ anInteger retry:#* coercing:self
- ].
+ (num > 16r3FFFFF) ifTrue:[
+ "if num is too big (so that multiplying by a byte could create a Large)"
+ ^ anInteger retry:#* coercing:self
+ ].
].
len := digitByteArray size.
val := num.
val <= 16rFF ifTrue:[
- lResult := len + 1.
+ lResult := len + 1.
] ifFalse:[
- val <= 16rFFFF ifTrue:[
- lResult := len + 2
- ] ifFalse:[
- val <= 16rFFFFFF ifTrue:[
- lResult := len + 4.
- ] ifFalse:[
- val <= 16rFFFFFFFF ifTrue:[
- lResult := len + 6.
- ] ifFalse:[
- val <= 16rFFFFFFFFFF ifTrue:[
- lResult := len + 8.
- ] ifFalse:[
- val <= 16rFFFFFFFFFF ifTrue:[
- lResult := len + 10.
- ] ifFalse:[
- val <= 16rFFFFFFFFFFFF ifTrue:[
- lResult := len + 12.
- ] ifFalse:[
- lResult := len + 14.
- ]
- ]
- ]
- ]
- ]
- ]
+ val <= 16rFFFF ifTrue:[
+ lResult := len + 2
+ ] ifFalse:[
+ val <= 16rFFFFFF ifTrue:[
+ lResult := len + 4.
+ ] ifFalse:[
+ val <= 16rFFFFFFFF ifTrue:[
+ lResult := len + 6.
+ ] ifFalse:[
+ val <= 16rFFFFFFFFFF ifTrue:[
+ lResult := len + 8.
+ ] ifFalse:[
+ val <= 16rFFFFFFFFFF ifTrue:[
+ lResult := len + 10.
+ ] ifFalse:[
+ val <= 16rFFFFFFFFFFFF ifTrue:[
+ lResult := len + 12.
+ ] ifFalse:[
+ lResult := len + 14.
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
].
resultDigitByteArray := ByteArray uninitializedNew:lResult.
result := self class basicNew setDigits:resultDigitByteArray.
anInteger < 0 ifTrue:[
- sign > 0 ifTrue:[
- result setSign:-1
- ].
+ sign > 0 ifTrue:[
+ result setSign:-1
+ ].
] ifFalse:[
- sign < 0 ifTrue:[
- result setSign:sign
- ]
+ sign < 0 ifTrue:[
+ result setSign:sign
+ ]
].
ok := false.
@@ -2401,194 +2399,252 @@
if (__isSmallInteger(len)
&& __isByteArray(__digitByteArray)
&& __isByteArray(resultDigitByteArray)) {
- INT _l = __intVal(len);
- INT _v = __intVal(val);
- unsigned INT _carry = 0;
- unsigned INT _prod;
- unsigned char *digitP = __ByteArrayInstPtr(__digitByteArray)->ba_element;
- unsigned char *resultP = __ByteArrayInstPtr(resultDigitByteArray)->ba_element;
-
- /*
- * skipping zeros does not help much (a few percent) on
- * a P5 or other CPUS with a fast multiplier.
- * It may make more of a difference on CPUs with slower 0-multiply.
- * Late news: it actually hurts modern x86_64 cpus.
- * So only reenable for specific CPUs after concrete benchmarks.
- */
+ INT _l = __intVal(len);
+ INT _v = __intVal(val);
+ unsigned INT _carry = 0;
+ unsigned INT _prod;
+ unsigned char *digitP = __ByteArrayInstPtr(__digitByteArray)->ba_element;
+ unsigned char *resultP = __ByteArrayInstPtr(resultDigitByteArray)->ba_element;
+
+ /*
+ * skipping zeros does not help much (a few percent) on
+ * a P5 or other CPUS with a fast multiplier.
+ * It may make more of a difference on CPUs with slower 0-multiply.
+ * Late news: it actually hurts modern x86_64 cpus.
+ * So only reenable for specific CPUs after concrete benchmarks.
+ */
#if 0
- while ((_l >= sizeof(INT)) && (((unsigned INT *)digitP)[0] == 0)) {
- ((unsigned INT *)resultP)[0] = 0;
- digitP += sizeof(INT);
- resultP += sizeof(INT);
- _l -= sizeof(INT);
- }
+ while ((_l >= sizeof(INT)) && (((unsigned INT *)digitP)[0] == 0)) {
+ ((unsigned INT *)resultP)[0] = 0;
+ digitP += sizeof(INT);
+ resultP += sizeof(INT);
+ _l -= sizeof(INT);
+ }
#endif
#if defined(__LSBFIRST__)
# if defined (__GNUC__) && defined(__i386__) && (__POINTER_SIZE__ == 4)
- /*
- * can do it long-word-wise;
- * 32*32 -> 64 multiplication
- */
- while (_l > 3) {
- unsigned __pHi, __pLow;
- unsigned __digit;
-
- /*
- * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
- * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
- */
- __digit = ((unsigned long *)digitP)[0];
- asm ("mull %3 \n\
- addl %4,%%eax \n\
- adcl $0,%%edx"
- : "=a" (__pLow),
- "=d" (__pHi)
- : "0" (__digit),
- "1" (ASM_ULONGCAST(_v)),
- "rm" (ASM_ULONGCAST(_carry)) );
-
- ((unsigned long *)resultP)[0] = __pLow;
- _carry = __pHi;
- digitP += 4;
- resultP += 4;
- _l -= 4;
- }
+ /*
+ * can do it long-word-wise;
+ * 32*32 -> 64 multiplication
+ */
+ while (_l > 3) {
+ unsigned __pHi, __pLow;
+ unsigned __digit;
+
+ /*
+ * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
+ * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
+ */
+ __digit = ((unsigned long *)digitP)[0];
+ asm ("mull %3 \n\
+ addl %4,%%eax \n\
+ adcl $0,%%edx"
+ : "=a" (__pLow),
+ "=d" (__pHi)
+ : "0" (__digit),
+ "1" (ASM_ULONGCAST(_v)),
+ "rm" (ASM_ULONGCAST(_carry)) );
+
+ ((unsigned long *)resultP)[0] = __pLow;
+ _carry = __pHi;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
# else /* not GNU-i386 */
# if defined(__win32__) && defined(__BORLANDC__) && defined(__x86__) && (__POINTER_SIZE__ == 4)
- /*
- * can do it long-word-wise;
- * 32*32 -> 64 multiplication
- */
- while (_l > 3) {
- unsigned __pLow;
- unsigned digit;
-
- /*
- * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
- * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
- */
+ /*
+ * can do it long-word-wise;
+ * 32*32 -> 64 multiplication
+ */
+ while (_l > 3) {
+ unsigned __pLow;
+ unsigned digit;
+
+ /*
+ * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
+ * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
+ */
/*
- digit = ((unsigned long *)digitP)[0];
- edx::eax = (digit * _v);
- edx::eax += _carry;
- ((unsigned long *)resultP)[0] = eax; -- pLow
- _carry = edx; -- pHigh
- digitP += 4;
- resultP += 4;
+ digit = ((unsigned long *)digitP)[0];
+ edx::eax = (digit * _v);
+ edx::eax += _carry;
+ ((unsigned long *)resultP)[0] = eax; -- pLow
+ _carry = edx; -- pHigh
+ digitP += 4;
+ resultP += 4;
*/
- digit = ((unsigned long *)digitP)[0];
- asm {
- mov eax, digit
- mov edx, _v
- mul edx
- add eax, _carry
- adc edx, 0
- mov __pLow, eax
- mov _carry, edx
- }
-
- ((unsigned long *)resultP)[0] = __pLow;
- digitP += 4;
- resultP += 4;
- _l -= 4;
- }
+ digit = ((unsigned long *)digitP)[0];
+ asm {
+ mov eax, digit
+ mov edx, _v
+ mul edx
+ add eax, _carry
+ adc edx, 0
+ mov __pLow, eax
+ mov _carry, edx
+ }
+
+ ((unsigned long *)resultP)[0] = __pLow;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
# else /* not WIN32-i386 */
+# if defined(INT128)
+ if (_v <= 0xFFFFFFFFFFFFFFFFL) {
+ /* have 128bit ints; can do it int64-wise
+ *
+ */
+ while (_l >= 8) {
+ UINT64 __t1;
+ UINT128 _prod128a;
+
+ __t1 = ((UINT64 *)digitP)[0];
+ _prod128a = (INT128)_v;
+ _prod128a *= __t1;
+ _prod128a += _carry;
+ ((UINT64 *)resultP)[0] = _prod128a;
+ _carry = _prod128a >> 64;
+
+ digitP += (8);
+ resultP += (8);
+ _l -= (8);
+ }
+ while (_l >= 4) {
+ unsigned __t;
+ UINT128 _prod128;
+
+ __t = ((unsigned *)digitP)[0];
+ _prod128 = (INT128)_v;
+ _prod128 *= __t;
+ _prod128 += _carry;
+ ((unsigned *)resultP)[0] = _prod128 /* & 0xFFFFFFFFL */;
+ _carry = _prod128 >> 32;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
+ if (_l >= 2) {
+ unsigned short __t;
+ UINT128 _prod128;
+
+ __t = ((unsigned short *)digitP)[0];
+ _prod128 = (INT128)_v;
+ _prod128 *= __t;
+ _prod128 += _carry;
+ ((unsigned short *)resultP)[0] = _prod128 /* & 0xFFFF */;
+ _carry = _prod128 >> 16;
+ digitP += 2;
+ resultP += 2;
+ _l -= 2;
+ }
+ if (_l > 0) {
+ UINT128 _prod128;
+ _prod128 = *digitP++ * _v + _carry;
+ *resultP++ = _prod128 /* & 0xFF */;
+ _carry = _prod128 >> 8;
+ _l--;
+ }
+ }
+# endif
+
# if defined(INT64)
- if (_v <= 0xFFFFFFFFL) {
- /* have 64bit ints; can do it int-wise
- *
- * max: 0xFFFFFFFF * 0xFFFFFFFF -> 0xFFFFFFFE.0001
- * + maxCarry (0xFFFFFFFF) -> 0xFFFFFFFF.0000
- */
- while (_l >= (4+4+4+4)) {
- unsigned __t1, __t2, __t3, __t4;
- UINT64 _prod64a, _prod64b, _prod64c, _prod64d;
-
- __t1 = ((unsigned *)digitP)[0];
- _prod64a = (INT64)_v;
- _prod64a *= __t1;
- _prod64a += _carry;
- ((unsigned *)resultP)[0] = _prod64a /* & 0xFFFFFFFFL */;
- _carry = _prod64a >> 32;
-
- __t2 = ((unsigned *)digitP)[1];
- _prod64b = (INT64)_v;
- _prod64b *= __t2;
- _prod64b += _carry;
- ((unsigned *)resultP)[1] = _prod64b /* & 0xFFFFFFFFL */;
- _carry = _prod64b >> 32;
-
- __t3 = ((unsigned *)digitP)[2];
- _prod64c = (INT64)_v;
- _prod64c *= __t3;
- _prod64c += _carry;
- ((unsigned *)resultP)[2] = _prod64c /* & 0xFFFFFFFFL */;
- _carry = _prod64c >> 32;
-
- __t4 = ((unsigned *)digitP)[3];
- _prod64d = (INT64)_v;
- _prod64d *= __t4;
- _prod64d += _carry;
- ((unsigned *)resultP)[3] = _prod64d /* & 0xFFFFFFFFL */;
- _carry = _prod64d >> 32;
-
- digitP += (4+4+4+4);
- resultP += (4+4+4+4);
- _l -= (4+4+4+4);
- }
- while (_l >= 4) {
- unsigned __t;
- UINT64 _prod64;
-
- __t = ((unsigned *)digitP)[0];
- _prod64 = (INT64)_v;
- _prod64 *= __t;
- _prod64 += _carry;
- ((unsigned *)resultP)[0] = _prod64 /* & 0xFFFFFFFFL */;
- _carry = _prod64 >> 32;
- digitP += 4;
- resultP += 4;
- _l -= 4;
- }
- if (_l >= 2) {
- unsigned short __t;
- UINT64 _prod64;
-
- __t = ((unsigned short *)digitP)[0];
- _prod64 = (INT64)_v;
- _prod64 *= __t;
- _prod64 += _carry;
- ((unsigned short *)resultP)[0] = _prod64 /* & 0xFFFF */;
- _carry = _prod64 >> 16;
- digitP += 2;
- resultP += 2;
- _l -= 2;
- }
- if (_l > 0) {
- UINT64 _prod64;
- _prod64 = *digitP++ * _v + _carry;
- *resultP++ = _prod64 /* & 0xFF */;
- _carry = _prod64 >> 8;
- _l--;
- }
- }
+ if (_v <= 0xFFFFFFFFL) {
+ /* have 64bit ints; can do it int-wise
+ *
+ * max: 0xFFFFFFFF * 0xFFFFFFFF -> 0xFFFFFFFE.0001
+ * + maxCarry (0xFFFFFFFF) -> 0xFFFFFFFF.0000
+ */
+ while (_l >= (4+4+4+4)) {
+ unsigned __t1, __t2, __t3, __t4;
+ UINT64 _prod64a, _prod64b, _prod64c, _prod64d;
+
+ __t1 = ((unsigned *)digitP)[0];
+ _prod64a = (INT64)_v;
+ _prod64a *= __t1;
+ _prod64a += _carry;
+ ((unsigned *)resultP)[0] = _prod64a /* & 0xFFFFFFFFL */;
+ _carry = _prod64a >> 32;
+
+ __t2 = ((unsigned *)digitP)[1];
+ _prod64b = (INT64)_v;
+ _prod64b *= __t2;
+ _prod64b += _carry;
+ ((unsigned *)resultP)[1] = _prod64b /* & 0xFFFFFFFFL */;
+ _carry = _prod64b >> 32;
+
+ __t3 = ((unsigned *)digitP)[2];
+ _prod64c = (INT64)_v;
+ _prod64c *= __t3;
+ _prod64c += _carry;
+ ((unsigned *)resultP)[2] = _prod64c /* & 0xFFFFFFFFL */;
+ _carry = _prod64c >> 32;
+
+ __t4 = ((unsigned *)digitP)[3];
+ _prod64d = (INT64)_v;
+ _prod64d *= __t4;
+ _prod64d += _carry;
+ ((unsigned *)resultP)[3] = _prod64d /* & 0xFFFFFFFFL */;
+ _carry = _prod64d >> 32;
+
+ digitP += (4+4+4+4);
+ resultP += (4+4+4+4);
+ _l -= (4+4+4+4);
+ }
+ while (_l >= 4) {
+ unsigned __t;
+ UINT64 _prod64;
+
+ __t = ((unsigned *)digitP)[0];
+ _prod64 = (INT64)_v;
+ _prod64 *= __t;
+ _prod64 += _carry;
+ ((unsigned *)resultP)[0] = _prod64 /* & 0xFFFFFFFFL */;
+ _carry = _prod64 >> 32;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
+ if (_l >= 2) {
+ unsigned short __t;
+ UINT64 _prod64;
+
+ __t = ((unsigned short *)digitP)[0];
+ _prod64 = (INT64)_v;
+ _prod64 *= __t;
+ _prod64 += _carry;
+ ((unsigned short *)resultP)[0] = _prod64 /* & 0xFFFF */;
+ _carry = _prod64 >> 16;
+ digitP += 2;
+ resultP += 2;
+ _l -= 2;
+ }
+ if (_l > 0) {
+ UINT64 _prod64;
+ _prod64 = *digitP++ * _v + _carry;
+ *resultP++ = _prod64 /* & 0xFF */;
+ _carry = _prod64 >> 8;
+ _l--;
+ }
+ }
# else /* no INT64 type */
- if (_v <= 0xFFFF) {
- /* can do it short-wise
- *
- * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
- * + maxCarry (0xFFFF) -> 0xFFFF.0000
- */
- while (_l > 1) {
- _prod = ((unsigned short *)digitP)[0] * _v + _carry;
- ((unsigned short *)resultP)[0] = _prod /* & 0xFFFF */;
- _carry = _prod >> 16;
- digitP += 2;
- resultP += 2;
- _l -= 2;
- }
- }
+ if (_v <= 0xFFFF) {
+ /* can do it short-wise
+ *
+ * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
+ * + maxCarry (0xFFFF) -> 0xFFFF.0000
+ */
+ while (_l > 1) {
+ _prod = ((unsigned short *)digitP)[0] * _v + _carry;
+ ((unsigned short *)resultP)[0] = _prod /* & 0xFFFF */;
+ _carry = _prod >> 16;
+ digitP += 2;
+ resultP += 2;
+ _l -= 2;
+ }
+ }
# endif /* no INT64 */
# endif /* not WIN32-i386 */
# endif /* not GNU-i386 */
@@ -2599,80 +2655,80 @@
/* no, STORE_WORD_WISE makes it slower */
# endif
- if (_v <= 0xFFFF) {
- /* can do it short-wise
- *
- * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
- * + maxCarry (0xFFFF) -> 0xFFFF.0000
- */
- while (_l > 1) {
- unsigned int t;
+ if (_v <= 0xFFFF) {
+ /* can do it short-wise
+ *
+ * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
+ * + maxCarry (0xFFFF) -> 0xFFFF.0000
+ */
+ while (_l > 1) {
+ unsigned int t;
#if defined(LOAD_WORD_WISE)
- /* better fetch short-wise */
- t = ((unsigned short *)digitP)[0];
- digitP += 2;
- t = ((t >> 8) | (t << 8)) & 0xFFFF;
+ /* better fetch short-wise */
+ t = ((unsigned short *)digitP)[0];
+ digitP += 2;
+ t = ((t >> 8) | (t << 8)) & 0xFFFF;
#else
- t = (digitP[1]<<8) + digitP[0];
- digitP += 2;
+ t = (digitP[1]<<8) + digitP[0];
+ digitP += 2;
#endif
- _prod = t * _v + _carry;
- _carry = _prod >> 16;
+ _prod = t * _v + _carry;
+ _carry = _prod >> 16;
#if defined(STORE_WORD_WISE)
- /* better store short-wise */
- _prod = ((_prod >> 8) | (_prod << 8)) & 0xFFFF;
- ((unsigned short *)resultP)[0] = _prod;
+ /* better store short-wise */
+ _prod = ((_prod >> 8) | (_prod << 8)) & 0xFFFF;
+ ((unsigned short *)resultP)[0] = _prod;
#else
- resultP[0] = _prod /* & 0xFF */;
- resultP[1] = (_prod>>8) /* & 0xFF */;
+ resultP[0] = _prod /* & 0xFF */;
+ resultP[1] = (_prod>>8) /* & 0xFF */;
#endif
- resultP += 2;
- _l -= 2;
- }
- }
+ resultP += 2;
+ _l -= 2;
+ }
+ }
#endif /* LSB_FIRST */
- /*
- * rest is done byte-wise
- */
- while (_l > 0) {
- _prod = *digitP++ * _v + _carry;
- *resultP++ = _prod /* & 0xFF */;
- _carry = _prod >> 8;
- _l--;
- }
-
- _l = __intVal(lResult) - __intVal(len);
-
- /*
- * remaining carry
- */
- while (_carry) {
- *resultP++ = _carry /* & 0xFF */;
- _carry >>= 8;
- _l--;
- }
-
- /*
- * remaining zeros
- */
- while (_l--) {
- *resultP++ = 0;
- }
-
- /*
- * need compress ?
- */
- if (resultP[-1]) {
- /*
- * no
- */
- RETURN(result);
- }
-
- ok = true;
+ /*
+ * rest is done byte-wise
+ */
+ while (_l > 0) {
+ _prod = *digitP++ * _v + _carry;
+ *resultP++ = _prod /* & 0xFF */;
+ _carry = _prod >> 8;
+ _l--;
+ }
+
+ _l = __intVal(lResult) - __intVal(len);
+
+ /*
+ * remaining carry
+ */
+ while (_carry) {
+ *resultP++ = _carry /* & 0xFF */;
+ _carry >>= 8;
+ _l--;
+ }
+
+ /*
+ * remaining zeros
+ */
+ while (_l--) {
+ *resultP++ = 0;
+ }
+
+ /*
+ * need compress ?
+ */
+ if (resultP[-1]) {
+ /*
+ * no
+ */
+ RETURN(result);
+ }
+
+ ok = true;
}
%}.
"
@@ -2680,21 +2736,21 @@
(could make it a primitive-failure as well)
"
ok ifFalse:[
- carry := 0.
- 1 to:len do:[:i |
- prod := (digitByteArray basicAt:i) * val + carry.
- resultDigitByteArray basicAt:i put:(prod bitAnd:16rFF).
- carry := prod bitShift:-8.
- ].
- [carry ~~ 0] whileTrue:[
- len := len + 1.
- resultDigitByteArray basicAt:len put:(carry bitAnd:16rFF).
- carry := carry bitShift:-8
- ].
- [len < lResult] whileTrue:[
- len := len + 1.
- resultDigitByteArray basicAt:len put:0
- ]
+ carry := 0.
+ 1 to:len do:[:i |
+ prod := (digitByteArray basicAt:i) * val + carry.
+ resultDigitByteArray basicAt:i put:(prod bitAnd:16rFF).
+ carry := prod bitShift:-8.
+ ].
+ [carry ~~ 0] whileTrue:[
+ len := len + 1.
+ resultDigitByteArray basicAt:len put:(carry bitAnd:16rFF).
+ carry := carry bitShift:-8
+ ].
+ [len < lResult] whileTrue:[
+ len := len + 1.
+ resultDigitByteArray basicAt:len put:0
+ ]
].
^ result compressed
!
@@ -3506,7 +3562,7 @@
((aSmallInteger < (SmallInteger minVal + 255))
or:[aSmallInteger > (SmallInteger maxVal - 255)]) ifTrue:[
- ^ self absPlus:(self class value:aSmallInteger) sign:newSign.
+ ^ self absPlus:(self class value:aSmallInteger) sign:newSign.
].
len := rsltLen := digitByteArray size.
@@ -3515,18 +3571,18 @@
"/ if it is 255 (since the other number is definitely smaller)
"/
(digitByteArray at:len) == 16rFF ifTrue:[
- rsltLen := len + 1.
+ rsltLen := len + 1.
] ifFalse:[
- "/ or the argument has something in the high byte ..
+ "/ or the argument has something in the high byte ..
%{
#if __POINTER_SIZE__ == 8
- if (__intVal(aSmallInteger) & 0xFF00000000000000L) {
- rsltLen = __mkSmallInteger(__intVal(len) + 1);
- }
+ if (__intVal(aSmallInteger) & 0xFF00000000000000L) {
+ rsltLen = __mkSmallInteger(__intVal(len) + 1);
+ }
#else
- if (__intVal(aSmallInteger) & 0xFF000000) {
- rsltLen = __mkSmallInteger(__intVal(len) + 1);
- }
+ if (__intVal(aSmallInteger) & 0xFF000000) {
+ rsltLen = __mkSmallInteger(__intVal(len) + 1);
+ }
#endif
%}
].
@@ -3538,309 +3594,309 @@
if (__isByteArray(__INST(digitByteArray))
&& __isByteArray(resultDigitByteArray)
&& __isSmallInteger(aSmallInteger)) {
- /* carry is NOT unsigned (see negation below) */
- INT __carry = __intVal(aSmallInteger);
- int __index = 1;
- int __len = __intVal(len);
- unsigned char *__src = (unsigned char *)(__ByteArrayInstPtr(__INST(digitByteArray))->ba_element);
- unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
- INT __ptrDelta = __dst - __src;
- unsigned char *__srcLast = __src + __len - 1;
- int __rsltLen = __intVal(rsltLen);
-
- if (__carry < 0) {
- __carry = -__carry;
- }
+ /* carry is NOT unsigned (see negation below) */
+ INT __carry = __intVal(aSmallInteger);
+ int __index = 1;
+ int __len = __intVal(len);
+ unsigned char *__src = (unsigned char *)(__ByteArrayInstPtr(__INST(digitByteArray))->ba_element);
+ unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
+ INT __ptrDelta = __dst - __src;
+ unsigned char *__srcLast = __src + __len - 1;
+ int __rsltLen = __intVal(rsltLen);
+
+ if (__carry < 0) {
+ __carry = -__carry;
+ }
#if defined(__LSBFIRST__)
# if defined(__i386__) && defined(__GNUC__) && (__POINTER_SIZE__ == 4)
# if 0 /* NOTICE - the code below is 20% slower ... - why */
- /*
- * add long-wise
- */
- asm(" jecxz nothingToDo \n\
- movl %%eax, %%esi /* __src input */ \n\
- movl %%ebx, %%edi /* __dst input */ \n\
- \n\
- /* the first 4-byte int */ \n\
- lodsl /* fetch */ \n\
- addl %%edx, %%eax /* add */ \n\
- stosl /* store */ \n\
- leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
- jecxz doneLoop /* any more ? */ \n\
- /* remaining 4-byte ints */ \n\
- jmp addLoop \n\
- \n\
- .align 8 \n\
- addLoop: \n\
- movl 0(%%esi), %%ebx /* fetch */ \n\
- jnc copyLoop2 \n\
- movl $0, %%eax \n\
- leal 4(%%esi), %%esi \n\
- adcl %%ebx, %%eax /* & add carry from prev int */\n\
- leal 8(%%edi), %%edi \n\
- movl %%eax, -8(%%edi) /* store */ \n\
- leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
- jecxz doneLoop /* any more ? */ \n\
- \n\
- movl 0(%%esi), %%ebx /* fetch */ \n\
- movl $0, %%eax \n\
- leal 4(%%esi), %%esi \
- adcl %%ebx, %%eax /* & add carry from prev int */\n\
- movl %%eax, -4(%%edi) /* store */ \n\
- \n\
- loop addLoop \n\
- jmp doneLoop \n\
- \n\
- .align 8 \n\
- copyLoop: \n\
- movl 0(%%esi), %%ebx \n\
- copyLoop2: \n\
- add $4, %%esi \n\
- add $4, %%edi \n\
- movl %%ebx, -4(%%edi) \n\
- loop copyLoop \n\
- \n\
- doneLoop: \n\
- movl $0, %%edx /* do not clobber carry (xorl clears it) */ \n\
- adcl $0, %%edx \n\
- movl %%esi, %%eax /* __src output */ \n\
- nothingToDo: \n\
- " : "=d" (ASM_ULONGCAST(__carry)),
- "=a" (__src)
- : "1" (__src),
- "b" (__dst),
- "c" (__len / 4),
- "0" (__carry)
- : "esi", "edi");
+ /*
+ * add long-wise
+ */
+ asm(" jecxz nothingToDo \n\
+ movl %%eax, %%esi /* __src input */ \n\
+ movl %%ebx, %%edi /* __dst input */ \n\
+ \n\
+ /* the first 4-byte int */ \n\
+ lodsl /* fetch */ \n\
+ addl %%edx, %%eax /* add */ \n\
+ stosl /* store */ \n\
+ leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
+ jecxz doneLoop /* any more ? */ \n\
+ /* remaining 4-byte ints */ \n\
+ jmp addLoop \n\
+ \n\
+ .align 8 \n\
+ addLoop: \n\
+ movl 0(%%esi), %%ebx /* fetch */ \n\
+ jnc copyLoop2 \n\
+ movl $0, %%eax \n\
+ leal 4(%%esi), %%esi \n\
+ adcl %%ebx, %%eax /* & add carry from prev int */\n\
+ leal 8(%%edi), %%edi \n\
+ movl %%eax, -8(%%edi) /* store */ \n\
+ leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
+ jecxz doneLoop /* any more ? */ \n\
+ \n\
+ movl 0(%%esi), %%ebx /* fetch */ \n\
+ movl $0, %%eax \n\
+ leal 4(%%esi), %%esi \
+ adcl %%ebx, %%eax /* & add carry from prev int */\n\
+ movl %%eax, -4(%%edi) /* store */ \n\
+ \n\
+ loop addLoop \n\
+ jmp doneLoop \n\
+ \n\
+ .align 8 \n\
+ copyLoop: \n\
+ movl 0(%%esi), %%ebx \n\
+ copyLoop2: \n\
+ add $4, %%esi \n\
+ add $4, %%edi \n\
+ movl %%ebx, -4(%%edi) \n\
+ loop copyLoop \n\
+ \n\
+ doneLoop: \n\
+ movl $0, %%edx /* do not clobber carry (xorl clears it) */ \n\
+ adcl $0, %%edx \n\
+ movl %%esi, %%eax /* __src output */ \n\
+ nothingToDo: \n\
+ " : "=d" (ASM_ULONGCAST(__carry)),
+ "=a" (__src)
+ : "1" (__src),
+ "b" (__dst),
+ "c" (__len / 4),
+ "0" (__carry)
+ : "esi", "edi");
# else
- {
- unsigned char *__srcLastX;
-
- __srcLastX = __srcLast - 3 - 4;
- while (__src <= __srcLastX) {
- unsigned int __sum, __sum2;
- unsigned __digit1, __digit2;
-
- __digit1 = ((unsigned *)__src)[0];
- __digit2 = ((unsigned *)__src)[1];
- asm ("addl %%edx,%%ecx \n\
- adcl $0, %%eax \n\
- movl $0, %%edx \n\
- adcl $0, %%edx"
- : "=d" (ASM_ULONGCAST(__carry)),
- "=c" (ASM_ULONGCAST(__sum)),
- "=a" (ASM_ULONGCAST(__sum2))
- : "0" (ASM_ULONGCAST(__carry)),
- "1" (__digit1),
- "2" (__digit2));
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- ((unsigned int *)(__src + __ptrDelta))[1] = __sum2;
-
- __src += 8;
-
- if (__carry == 0) {
- while (__src <= __srcLastX) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
- __src += 8;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
-
- __srcLastX = __srcLastX + 4;
- if (__src <= __srcLastX) {
- unsigned int __sum, __digit;
-
- __digit = ((unsigned *)__src)[0];
-
- asm ("addl %%eax,%%edx \n\
- movl $0,%%eax \n\
- adcl $0,%%eax"
- : "=a" (ASM_ULONGCAST(__carry)),
- "=d" (ASM_ULONGCAST(__sum))
- : "0" (ASM_ULONGCAST(__carry)),
- "1" (__digit) );
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
-
- if (__carry == 0) {
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
- }
+ {
+ unsigned char *__srcLastX;
+
+ __srcLastX = __srcLast - 3 - 4;
+ while (__src <= __srcLastX) {
+ unsigned int __sum, __sum2;
+ unsigned __digit1, __digit2;
+
+ __digit1 = ((unsigned *)__src)[0];
+ __digit2 = ((unsigned *)__src)[1];
+ asm ("addl %%edx,%%ecx \n\
+ adcl $0, %%eax \n\
+ movl $0, %%edx \n\
+ adcl $0, %%edx"
+ : "=d" (ASM_ULONGCAST(__carry)),
+ "=c" (ASM_ULONGCAST(__sum)),
+ "=a" (ASM_ULONGCAST(__sum2))
+ : "0" (ASM_ULONGCAST(__carry)),
+ "1" (__digit1),
+ "2" (__digit2));
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ ((unsigned int *)(__src + __ptrDelta))[1] = __sum2;
+
+ __src += 8;
+
+ if (__carry == 0) {
+ while (__src <= __srcLastX) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
+ __src += 8;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+
+ __srcLastX = __srcLastX + 4;
+ if (__src <= __srcLastX) {
+ unsigned int __sum, __digit;
+
+ __digit = ((unsigned *)__src)[0];
+
+ asm ("addl %%eax,%%edx \n\
+ movl $0,%%eax \n\
+ adcl $0,%%eax"
+ : "=a" (ASM_ULONGCAST(__carry)),
+ "=d" (ASM_ULONGCAST(__sum))
+ : "0" (ASM_ULONGCAST(__carry)),
+ "1" (__digit) );
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+ }
# endif
# else /* not i386-GNUC */
# if defined(__win32__) && defined(__BORLANDC__) && defined(__x86__) && (__POINTER_SIZE__ == 4)
- {
- unsigned char *__srcLast4;
-
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned int __sum;
-
- __sum = ((unsigned int *)__src)[0];
- asm {
- mov eax, __sum
- add eax, __carry
- mov edx, 0
- adc edx, 0
- mov __sum, eax
- mov __carry, edx
- }
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
- }
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned int __sum;
+
+ __sum = ((unsigned int *)__src)[0];
+ asm {
+ mov eax, __sum
+ add eax, __carry
+ mov edx, 0
+ adc edx, 0
+ mov __sum, eax
+ mov __carry, edx
+ }
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+ }
# else /* not i386-WIN32 */
# if defined(__LSBFIRST__) && (__POINTER_SIZE__ == 8)
- {
- unsigned char *__srcLast4;
-
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned INT __sum;
-
- __sum = (INT)(((unsigned int *)__src)[0]);
- __sum += __carry;
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
- __src += 4;
- __carry = __sum >> 32;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
- }
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned INT __sum;
+
+ __sum = (INT)(((unsigned int *)__src)[0]);
+ __sum += __carry;
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
+ __src += 4;
+ __carry = __sum >> 32;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+ }
# endif /* LSB+64bit */
# endif /* __i386__ & WIN32 */
# endif /* __i386__ & GNUC */
- /*
- * add short-wise
- */
- while (__src < __srcLast) {
- __carry += ((unsigned short *)__src)[0];
- ((unsigned short *)(__src + __ptrDelta))[0] = __carry /* & 0xFFFF */;
- __carry >>= 16;
- __src += 2;
- }
- /*
- * last (odd) byte
- */
- if (__src <= __srcLast) {
- __carry += __src[0];
- __src[__ptrDelta] = __carry /* & 0xFF */;
- __carry >>= 8;
- __src++;
- }
+ /*
+ * add short-wise
+ */
+ while (__src < __srcLast) {
+ __carry += ((unsigned short *)__src)[0];
+ ((unsigned short *)(__src + __ptrDelta))[0] = __carry /* & 0xFFFF */;
+ __carry >>= 16;
+ __src += 2;
+ }
+ /*
+ * last (odd) byte
+ */
+ if (__src <= __srcLast) {
+ __carry += __src[0];
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __carry >>= 8;
+ __src++;
+ }
#else /* not __LSBFIRST__ */
- /*
- * add byte-wise
- */
- while (__src <= __srcLast) {
- __carry += __src[0];
- __src[__ptrDelta] = __carry /* & 0xFF */;
- __src++;
- __carry >>= 8;
-
- if (__carry == 0) {
- while (__src <= __srcLast) {
- /* copy over rest */
- __src[__ptrDelta] = __src[0];
- __src++;
- }
- goto doneSource;
- }
- }
+ /*
+ * add byte-wise
+ */
+ while (__src <= __srcLast) {
+ __carry += __src[0];
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __src++;
+ __carry >>= 8;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast) {
+ /* copy over rest */
+ __src[__ptrDelta] = __src[0];
+ __src++;
+ }
+ goto doneSource;
+ }
+ }
#endif /* __LSBFIRST__ */
doneSource: ;
- /*
- * now, at most one other byte is to be stored ...
- */
- if (__len < __rsltLen) {
- __src[__ptrDelta] = __carry /* & 0xFF */;
- __src++;
- }
-
- if (__src[__ptrDelta-1] != 0) { /* lastDigit */
- RETURN (result);
- }
- // must compress
- ok = true;
+ /*
+ * now, at most one other byte is to be stored ...
+ */
+ if (__len < __rsltLen) {
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __src++;
+ }
+
+ if (__src[__ptrDelta-1] != 0) { /* lastDigit */
+ RETURN (result);
+ }
+ // must compress
+ ok = true;
}
%}.
ok ~~ true ifTrue:[
- index := 1.
- carry := aSmallInteger abs.
-
- [carry ~~ 0] whileTrue:[
- (index <= len) ifTrue:[
- carry := (digitByteArray basicAt:index) + carry.
- ].
- resultDigitByteArray basicAt:index put:(lastDigit := carry bitAnd:16rFF).
- carry := carry bitShift:-8.
- index := index + 1
- ].
-
- (index <= rsltLen) ifTrue:[
- [index <= len] whileTrue:[
- resultDigitByteArray basicAt:index put:(digitByteArray basicAt:index).
- index := index + 1
- ].
- lastDigit := 0.
- ].
-
- (lastDigit ~~ 0 and:[rsltLen > SmallInteger maxBytes]) ifTrue:[
- ^ result
- ].
+ index := 1.
+ carry := aSmallInteger abs.
+
+ [carry ~~ 0] whileTrue:[
+ (index <= len) ifTrue:[
+ carry := (digitByteArray basicAt:index) + carry.
+ ].
+ resultDigitByteArray basicAt:index put:(lastDigit := carry bitAnd:16rFF).
+ carry := carry bitShift:-8.
+ index := index + 1
+ ].
+
+ (index <= rsltLen) ifTrue:[
+ [index <= len] whileTrue:[
+ resultDigitByteArray basicAt:index put:(digitByteArray basicAt:index).
+ index := index + 1
+ ].
+ lastDigit := 0.
+ ].
+
+ (lastDigit ~~ 0 and:[rsltLen > SmallInteger maxBytes]) ifTrue:[
+ ^ result
+ ].
].
^ result compressed
@@ -4422,7 +4478,7 @@
len2 := otherDigitByteArray size.
lenRslt := len1 + len2.
UseKarazuba ~~ false ifTrue:[
- lenRslt > 400 ifTrue:[ ^ self absMulKarazuba:aLargeInteger ].
+ lenRslt > 400 ifTrue:[ ^ self absMulKarazuba:aLargeInteger ].
].
result := self class basicNew numberOfDigits:lenRslt.
@@ -4432,198 +4488,198 @@
if (__isByteArray(__INST(digitByteArray))
&& __isByteArray(otherDigitByteArray)
&& __isByteArray(resultDigitByteArray)) {
- unsigned char *myBytes = __ByteArrayInstPtr(__INST(digitByteArray))->ba_element;
- unsigned char *otherBytes = __ByteArrayInstPtr(otherDigitByteArray)->ba_element;
- unsigned char *resultBytes = __ByteArrayInstPtr(resultDigitByteArray)->ba_element;
- unsigned char *_p1, *_pResult0, *_p1Last, *_p2Last;
- unsigned INT _v;
- int _len1 = __intVal(len1);
- int _len2 = __intVal(len2);
-
- _p1Last = myBytes + _len1 - 1; /* the last byte */
- _p2Last = otherBytes + _len2 - 1; /* the last byte */
- _pResult0 = resultBytes;
-
- /*
- * aaa...aaa f1[0] * f2
- * + bbb...bbb f1[1] * f2
- * + ccc...ccc f1[2] * f2
- * + ...
- * + xxx...xxx f1[high] * f2
- *
- * start short-wise
- * bounds: (16rFFFF * 16rFFFF) + 16rFFFF -> FFFF0000
- */
- _p1 = myBytes;
+ unsigned char *myBytes = __ByteArrayInstPtr(__INST(digitByteArray))->ba_element;
+ unsigned char *otherBytes = __ByteArrayInstPtr(otherDigitByteArray)->ba_element;
+ unsigned char *resultBytes = __ByteArrayInstPtr(resultDigitByteArray)->ba_element;
+ unsigned char *_p1, *_pResult0, *_p1Last, *_p2Last;
+ unsigned INT _v;
+ int _len1 = __intVal(len1);
+ int _len2 = __intVal(len2);
+
+ _p1Last = myBytes + _len1 - 1; /* the last byte */
+ _p2Last = otherBytes + _len2 - 1; /* the last byte */
+ _pResult0 = resultBytes;
+
+ /*
+ * aaa...aaa f1[0] * f2
+ * + bbb...bbb f1[1] * f2
+ * + ccc...ccc f1[2] * f2
+ * + ...
+ * + xxx...xxx f1[high] * f2
+ *
+ * start short-wise
+ * bounds: (16rFFFF * 16rFFFF) + 16rFFFF -> FFFF0000
+ */
+ _p1 = myBytes;
#if defined(__LSBFIRST__) && (__POINTER_SIZE__ == 8)
- /* loop over ints of f1 */
- for (; _p1 <= (_p1Last-3); _p1 += 4, _pResult0 += 4) {
- unsigned char *_pResult = _pResult0;
- unsigned char *_p2;
- unsigned INT word1 = ((unsigned int *)_p1)[0];
-
- _v = 0;
-
- /* loop over ints of f2 */
- for (_p2 = otherBytes; _p2 <= (_p2Last-3); _p2 += 4) {
- _v = word1 * (unsigned INT)(((unsigned int *)_p2)[0])
- + _v + (unsigned INT)((unsigned int *)_pResult)[0];
- ((unsigned int *)_pResult)[0] = _v /* & 0xFFFFFFFF */;
- _v >>= 32; /* now _v contains the carry*/
- _pResult += 4;
- }
-
- /* possible odd up to 3 highBytes of f2 */
- for ( ; _p2 <= _p2Last; _p2++) {
- _v = word1 * (unsigned INT)(_p2[0])
- + _v + (unsigned INT)(_pResult[0]);
-
- ((unsigned char *)_pResult)[0] = _v /* & 0xFFFFFFFF */;
- _v >>= 8; /* now _v contains the carry*/
- _pResult++;
- }
- /* distribute leftover carry byte-wise */
- for ( ; _v; _v >>= 8, _pResult++) {
- _v += _pResult[0];
- _pResult[0] = _v /* & 0xFF */;
- }
- }
+ /* loop over ints of f1 */
+ for (; _p1 <= (_p1Last-3); _p1 += 4, _pResult0 += 4) {
+ unsigned char *_pResult = _pResult0;
+ unsigned char *_p2;
+ unsigned INT word1 = ((unsigned int *)_p1)[0];
+
+ _v = 0;
+
+ /* loop over ints of f2 */
+ for (_p2 = otherBytes; _p2 <= (_p2Last-3); _p2 += 4) {
+ _v = word1 * (unsigned INT)(((unsigned int *)_p2)[0])
+ + _v + (unsigned INT)((unsigned int *)_pResult)[0];
+ ((unsigned int *)_pResult)[0] = _v /* & 0xFFFFFFFF */;
+ _v >>= 32; /* now _v contains the carry*/
+ _pResult += 4;
+ }
+
+ /* possible odd up to 3 highBytes of f2 */
+ for ( ; _p2 <= _p2Last; _p2++) {
+ _v = word1 * (unsigned INT)(_p2[0])
+ + _v + (unsigned INT)(_pResult[0]);
+
+ ((unsigned char *)_pResult)[0] = _v /* & 0xFFFFFFFF */;
+ _v >>= 8; /* now _v contains the carry*/
+ _pResult++;
+ }
+ /* distribute leftover carry byte-wise */
+ for ( ; _v; _v >>= 8, _pResult++) {
+ _v += _pResult[0];
+ _pResult[0] = _v /* & 0xFF */;
+ }
+ }
#endif /* 64bit */
- /* possible odd high short of f1 (or shortLoop, if not 64bit) */
-
- for (; _p1 < _p1Last; _p1 += 2, _pResult0 += 2) {
- unsigned char *_pResult = _pResult0;
- unsigned char *_p2 = otherBytes;
- unsigned int short1 = ((unsigned short *)_p1)[0];
+ /* possible odd high short of f1 (or shortLoop, if not 64bit) */
+
+ for (; _p1 < _p1Last; _p1 += 2, _pResult0 += 2) {
+ unsigned char *_pResult = _pResult0;
+ unsigned char *_p2 = otherBytes;
+ unsigned int short1 = ((unsigned short *)_p1)[0];
#if !defined(__LSBFIRST__)
- short1 = ((short1 >> 8) & 0xFF) | ((short1 & 0xFF) << 8);
+ short1 = ((short1 >> 8) & 0xFF) | ((short1 & 0xFF) << 8);
#endif
- _v = 0;
-
- /* loop over shorts of f2 */
- for ( ; _p2 < _p2Last; _p2 += 2, _pResult += 2) {
+ _v = 0;
+
+ /* loop over shorts of f2 */
+ for ( ; _p2 < _p2Last; _p2 += 2, _pResult += 2) {
#if !defined(__LSBFIRST__)
- unsigned int _short2 = ((unsigned short *)_p2)[0];
- unsigned int _short3 = ((unsigned short *)_pResult)[0];
-
- _short2 = ((_short2 >> 8) /* & 0xFF */) | ((_short2 & 0xFF) << 8);
- _short3 = ((_short3 >> 8) /* & 0xFF */) | ((_short3 & 0xFF) << 8);
- _v = (short1 * _short2) + _short3 + _v;
- _pResult[0] = _v;
- _pResult[1] = _v >> 8;
+ unsigned int _short2 = ((unsigned short *)_p2)[0];
+ unsigned int _short3 = ((unsigned short *)_pResult)[0];
+
+ _short2 = ((_short2 >> 8) /* & 0xFF */) | ((_short2 & 0xFF) << 8);
+ _short3 = ((_short3 >> 8) /* & 0xFF */) | ((_short3 & 0xFF) << 8);
+ _v = (short1 * _short2) + _short3 + _v;
+ _pResult[0] = _v;
+ _pResult[1] = _v >> 8;
#else /* __LSBFIRST__ */
- _v = (short1 * ((unsigned short *)_p2)[0]) + _v + ((unsigned short *)_pResult)[0];
- ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */;
+ _v = (short1 * ((unsigned short *)_p2)[0]) + _v + ((unsigned short *)_pResult)[0];
+ ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */;
#endif /* __LSBFIRST__ */
- _v >>= 16; /* now _v contains the carry*/
- }
-
- /* possible odd highByte of f2 */
- for ( ; _p2 <= _p2Last; _p2++, _pResult += 2) {
+ _v >>= 16; /* now _v contains the carry*/
+ }
+
+ /* possible odd highByte of f2 */
+ for ( ; _p2 <= _p2Last; _p2++, _pResult += 2) {
#if !defined(__LSBFIRST__)
- unsigned int _short3 = ((unsigned short *)_pResult)[0];
-
- _short3 = ((_short3 >> 8) /* & 0xFF */) | ((_short3 & 0xFF) << 8);
- _v = (short1 * _p2[0]) + _v + _short3;
- _pResult[0] = _v;
- _pResult[1] = _v >> 8;
+ unsigned int _short3 = ((unsigned short *)_pResult)[0];
+
+ _short3 = ((_short3 >> 8) /* & 0xFF */) | ((_short3 & 0xFF) << 8);
+ _v = (short1 * _p2[0]) + _v + _short3;
+ _pResult[0] = _v;
+ _pResult[1] = _v >> 8;
#else /* __LSBFIRST__ */
- _v = (short1 * _p2[0]) + _v + ((unsigned short *)_pResult)[0];
- ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */;
+ _v = (short1 * _p2[0]) + _v + ((unsigned short *)_pResult)[0];
+ ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */;
#endif /* __LSBFIRST__ */
- _v >>= 16; /* now _v contains the carry*/
- }
- /* distribute leftover carry byte-wise */
- for ( ; _v; _v >>= 8, _pResult++) {
- _v += _pResult[0];
- _pResult[0] = _v /* & 0xFF */;
- }
- }
-
- /* possible odd highByte of f1 (or byteLoop, if above is ever disabled) */
- for (; _p1 <= _p1Last; _p1++, _pResult0++) {
- unsigned char *_pResult = _pResult0;
- unsigned char *_p2 = otherBytes;
- unsigned int byte1 = _p1[0];
-
- _v = 0;
-
- /* loop over shorts of f2 */
- for ( ; _p2 < _p2Last; _p2 += 2, _pResult += 2) {
+ _v >>= 16; /* now _v contains the carry*/
+ }
+ /* distribute leftover carry byte-wise */
+ for ( ; _v; _v >>= 8, _pResult++) {
+ _v += _pResult[0];
+ _pResult[0] = _v /* & 0xFF */;
+ }
+ }
+
+ /* possible odd highByte of f1 (or byteLoop, if above is ever disabled) */
+ for (; _p1 <= _p1Last; _p1++, _pResult0++) {
+ unsigned char *_pResult = _pResult0;
+ unsigned char *_p2 = otherBytes;
+ unsigned int byte1 = _p1[0];
+
+ _v = 0;
+
+ /* loop over shorts of f2 */
+ for ( ; _p2 < _p2Last; _p2 += 2, _pResult += 2) {
#if !defined(__LSBFIRST__)
- unsigned int _short2 = ((unsigned short *)_p2)[0];
- unsigned int _short3 = ((unsigned short *)_pResult)[0];
-
- _short2 = ((_short2 >> 8) /* & 0xFF */) | ((_short2 & 0xFF) << 8);
- _short3 = ((_short3 >> 8) /* & 0xFF */) | ((_short3 & 0xFF) << 8);
- _v = (byte1 * _short2) + _v +_short3;
- _pResult[0] = _v;
- _pResult[1] = _v >> 8;
+ unsigned int _short2 = ((unsigned short *)_p2)[0];
+ unsigned int _short3 = ((unsigned short *)_pResult)[0];
+
+ _short2 = ((_short2 >> 8) /* & 0xFF */) | ((_short2 & 0xFF) << 8);
+ _short3 = ((_short3 >> 8) /* & 0xFF */) | ((_short3 & 0xFF) << 8);
+ _v = (byte1 * _short2) + _v +_short3;
+ _pResult[0] = _v;
+ _pResult[1] = _v >> 8;
#else /* __LSBFIRST__ */
- _v = (byte1 * ((unsigned short *)_p2)[0]) + _v + ((unsigned short *)_pResult)[0];
- ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */;
+ _v = (byte1 * ((unsigned short *)_p2)[0]) + _v + ((unsigned short *)_pResult)[0];
+ ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */;
#endif /* __LSBFIRST__ */
- _v >>= 16; /* now _v contains the carry*/
- }
-
- /* possible odd highByte of f2 (or byteLoop, if not __LSBFIRST__) */
- for ( ; _p2 <= _p2Last; _p2++, _pResult++) {
- _v = (byte1 * _p2[0]) + _v + _pResult[0];
- _pResult[0] = _v /* & 0xFF */;
- _v >>= 8; /* now _v contains the carry*/
- }
- /* distribute leftover carry byte-wise */
- for ( ; _v; _v >>= 8, _pResult++) {
- _v += _pResult[0];
- _pResult[0] = _v /* & 0xFF */;
- }
- }
- ok = true;
+ _v >>= 16; /* now _v contains the carry*/
+ }
+
+ /* possible odd highByte of f2 (or byteLoop, if not __LSBFIRST__) */
+ for ( ; _p2 <= _p2Last; _p2++, _pResult++) {
+ _v = (byte1 * _p2[0]) + _v + _pResult[0];
+ _pResult[0] = _v /* & 0xFF */;
+ _v >>= 8; /* now _v contains the carry*/
+ }
+ /* distribute leftover carry byte-wise */
+ for ( ; _v; _v >>= 8, _pResult++) {
+ _v += _pResult[0];
+ _pResult[0] = _v /* & 0xFF */;
+ }
+ }
+ ok = true;
}
%}.
ok ifFalse:[
- 1 to:len1 do:[:index1 |
- 1 to:len2 do:[:index2 |
- dstIndex := index1 + index2 - 1.
- prod := (digitByteArray basicAt:index1) * (otherDigitByteArray basicAt:index2).
- prod := prod + (resultDigitByteArray basicAt:dstIndex).
- resultDigitByteArray basicAt:dstIndex put:(prod bitAnd:16rFF).
- carry := prod bitShift:-8.
- carry ~~ 0 ifTrue:[
- idx := dstIndex + 1.
- [carry ~~ 0] whileTrue:[
- v := (resultDigitByteArray basicAt:idx) + carry.
- resultDigitByteArray basicAt:idx put:(v bitAnd:255).
- carry := v bitShift:-8.
- idx := idx + 1
- ]
- ]
- ]
- ].
+ 1 to:len1 do:[:index1 |
+ 1 to:len2 do:[:index2 |
+ dstIndex := index1 + index2 - 1.
+ prod := (digitByteArray basicAt:index1) * (otherDigitByteArray basicAt:index2).
+ prod := prod + (resultDigitByteArray basicAt:dstIndex).
+ resultDigitByteArray basicAt:dstIndex put:(prod bitAnd:16rFF).
+ carry := prod bitShift:-8.
+ carry ~~ 0 ifTrue:[
+ idx := dstIndex + 1.
+ [carry ~~ 0] whileTrue:[
+ v := (resultDigitByteArray basicAt:idx) + carry.
+ resultDigitByteArray basicAt:idx put:(v bitAnd:255).
+ carry := v bitShift:-8.
+ idx := idx + 1
+ ]
+ ]
+ ]
+ ].
].
^ result compressed
!
absMulKarazuba:aLargeInteger
"return a LargeInteger representing abs(self) * abs(theArgument) using the karazuba algorithm.
- a = (2^n * p) + q
- b = (2^n * r) + s
- a * b = ((2^n * p) + q) * ((2^n * r) + s)
- = 2^(n+n)*p*r + 2^n*p*s + 2^n*q*r + q*s
- = 2^(n+n)*p*r + (p*r + q*s - (q-p)*(s-r))*2^n + q*s
-
- this is faster for sufficient large n1,n2
- because regular multiplication is O(n1*n2) and karazuma multiplies much smaller numbers
- (half number of bits) but does more multiplications (on smaller numbers) and req's more
- additions and subtractions (on smaller numbers).
- The break-even for when to use regular multiplication has been determined heuristically
- and is somewhere around 1600 bits (digitLength of 200).
- (see test in absMul:)
-
- To disable karazuba, set UseKarazuba to false.
+ a = (2^n * p) + q
+ b = (2^n * r) + s
+ a * b = ((2^n * p) + q) * ((2^n * r) + s)
+ = 2^(n+n)*p*r + 2^n*p*s + 2^n*q*r + q*s
+ = 2^(n+n)*p*r + (p*r + q*s - (q-p)*(s-r))*2^n + q*s
+
+ this is faster for sufficient large n1,n2
+ because regular multiplication is O(n1*n2) and karazuma multiplies much smaller numbers
+ (half number of bits) but does more multiplications (on smaller numbers) and req's more
+ additions and subtractions (on smaller numbers).
+ The break-even for when to use regular multiplication has been determined heuristically
+ and is somewhere around 1600 bits (digitLength of 200).
+ (see test in absMul:)
+
+ To disable karazuba, set UseKarazuba to false.
"
"/ compute half-sized pi and qi...
@@ -4657,12 +4713,12 @@
"
#(100 500 1000 2500 5000 10000 25000 50000 100000 250000 500000 1000000) do:[:exp |
- |nr r1 r2|
- nr := (3 raisedTo:exp) asInteger.
- Transcript show:exp; show:' nbytes: '; showCR:nr digitLength;
- show:' normal: '; show:(Time microsecondsToRun:[ UseKarazuba := false. r1 := nr * nr ]); showCR:'us';
- show:' karazuba: '; show:(Time microsecondsToRun:[ UseKarazuba := true. r2 := nr absMulKarazuba: nr]); showCR:'us'.
- self assert:(r1 = r2).
+ |nr r1 r2|
+ nr := (3 raisedTo:exp) asInteger.
+ Transcript show:exp; show:' nbytes: '; showCR:nr digitLength;
+ show:' normal: '; show:(Time microsecondsToRun:[ UseKarazuba := false. r1 := nr * nr ]); showCR:'us';
+ show:' karazuba: '; show:(Time microsecondsToRun:[ UseKarazuba := true. r2 := nr absMulKarazuba: nr]); showCR:'us'.
+ self assert:(r1 = r2).
]
"
!