--- a/LargeInteger.st Tue May 24 20:37:52 2016 +0200
+++ b/LargeInteger.st Tue May 24 21:42:54 2016 +0200
@@ -2334,60 +2334,60 @@
num := anInteger abs.
SmallInteger maxBytes == 8 ifTrue:[
- (num > 16rFFFFFFFFFF) ifTrue:[
- "if num is too big (so that multiplying by a byte could create a Large)"
- ^ anInteger retry:#* coercing:self
- ].
+ (num > 16rFFFFFFFFFF) ifTrue:[
+ "if num is too big (so that multiplying by a byte could create a Large)"
+ ^ anInteger retry:#* coercing:self
+ ].
] ifFalse:[
- (num > 16r3FFFFF) ifTrue:[
- "if num is too big (so that multiplying by a byte could create a Large)"
- ^ anInteger retry:#* coercing:self
- ].
+ (num > 16r3FFFFF) ifTrue:[
+ "if num is too big (so that multiplying by a byte could create a Large)"
+ ^ anInteger retry:#* coercing:self
+ ].
].
len := digitByteArray size.
val := num.
val <= 16rFF ifTrue:[
- lResult := len + 1.
+ lResult := len + 1.
] ifFalse:[
- val <= 16rFFFF ifTrue:[
- lResult := len + 2
- ] ifFalse:[
- val <= 16rFFFFFF ifTrue:[
- lResult := len + 4.
- ] ifFalse:[
- val <= 16rFFFFFFFF ifTrue:[
- lResult := len + 6.
- ] ifFalse:[
- val <= 16rFFFFFFFFFF ifTrue:[
- lResult := len + 8.
- ] ifFalse:[
- val <= 16rFFFFFFFFFF ifTrue:[
- lResult := len + 10.
- ] ifFalse:[
- val <= 16rFFFFFFFFFFFF ifTrue:[
- lResult := len + 12.
- ] ifFalse:[
- lResult := len + 14.
- ]
- ]
- ]
- ]
- ]
- ]
+ val <= 16rFFFF ifTrue:[
+ lResult := len + 2
+ ] ifFalse:[
+ val <= 16rFFFFFF ifTrue:[
+ lResult := len + 4.
+ ] ifFalse:[
+ val <= 16rFFFFFFFF ifTrue:[
+ lResult := len + 6.
+ ] ifFalse:[
+ val <= 16rFFFFFFFFFF ifTrue:[
+ lResult := len + 8.
+ ] ifFalse:[
+ val <= 16rFFFFFFFFFF ifTrue:[
+ lResult := len + 10.
+ ] ifFalse:[
+ val <= 16rFFFFFFFFFFFF ifTrue:[
+ lResult := len + 12.
+ ] ifFalse:[
+ lResult := len + 14.
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
].
resultDigitByteArray := ByteArray uninitializedNew:lResult.
result := self class basicNew setDigits:resultDigitByteArray.
anInteger < 0 ifTrue:[
- sign > 0 ifTrue:[
- result setSign:-1
- ].
+ sign > 0 ifTrue:[
+ result setSign:-1
+ ].
] ifFalse:[
- sign < 0 ifTrue:[
- result setSign:sign
- ]
+ sign < 0 ifTrue:[
+ result setSign:sign
+ ]
].
ok := false.
@@ -2397,194 +2397,194 @@
if (__isSmallInteger(len)
&& __isByteArray(__digitByteArray)
&& __isByteArray(resultDigitByteArray)) {
- INT _l = __intVal(len);
- INT _v = __intVal(val);
- unsigned INT _carry = 0;
- unsigned INT _prod;
- unsigned char *digitP = __ByteArrayInstPtr(__digitByteArray)->ba_element;
- unsigned char *resultP = __ByteArrayInstPtr(resultDigitByteArray)->ba_element;
-
- /*
- * skipping zeros does not help much (a few percent) on
- * a P5 or other CPUS with a fast multiplier.
- * It may make more of a difference on CPUs with slower 0-multiply.
- * Late news: it actually hurts modern x86_64 cpus.
- * So only reenable for specific CPUs after concrete benchmarks.
- */
+ INT _l = __intVal(len);
+ INT _v = __intVal(val);
+ unsigned INT _carry = 0;
+ unsigned INT _prod;
+ unsigned char *digitP = __ByteArrayInstPtr(__digitByteArray)->ba_element;
+ unsigned char *resultP = __ByteArrayInstPtr(resultDigitByteArray)->ba_element;
+
+ /*
+ * skipping zeros does not help much (a few percent) on
+ * a P5 or other CPUS with a fast multiplier.
+ * It may make more of a difference on CPUs with slower 0-multiply.
+ * Late news: it actually hurts modern x86_64 cpus.
+ * So only reenable for specific CPUs after concrete benchmarks.
+ */
#if 0
- while ((_l >= sizeof(INT)) && (((unsigned INT *)digitP)[0] == 0)) {
- ((unsigned INT *)resultP)[0] = 0;
- digitP += sizeof(INT);
- resultP += sizeof(INT);
- _l -= sizeof(INT);
- }
+ while ((_l >= sizeof(INT)) && (((unsigned INT *)digitP)[0] == 0)) {
+ ((unsigned INT *)resultP)[0] = 0;
+ digitP += sizeof(INT);
+ resultP += sizeof(INT);
+ _l -= sizeof(INT);
+ }
#endif
#if defined(__LSBFIRST__)
# if defined (__GNUC__) && defined(__i386__) && (__POINTER_SIZE__ == 4)
- /*
- * can do it long-word-wise;
- * 32*32 -> 64 multiplication
- */
- while (_l > 3) {
- unsigned __pHi, __pLow;
- unsigned __digit;
-
- /*
- * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
- * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
- */
- __digit = ((unsigned long *)digitP)[0];
- asm ("mull %3 \n\
- addl %4,%%eax \n\
- adcl $0,%%edx"
- : "=a" (__pLow),
- "=d" (__pHi)
- : "0" (__digit),
- "1" ((unsigned long)(_v)),
- "rm" ((unsigned long)(_carry)) );
-
- ((unsigned long *)resultP)[0] = __pLow;
- _carry = __pHi;
- digitP += 4;
- resultP += 4;
- _l -= 4;
- }
+ /*
+ * can do it long-word-wise;
+ * 32*32 -> 64 multiplication
+ */
+ while (_l > 3) {
+ unsigned __pHi, __pLow;
+ unsigned __digit;
+
+ /*
+ * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
+ * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
+ */
+ __digit = ((unsigned long *)digitP)[0];
+ asm ("mull %3 \n\
+ addl %4,%%eax \n\
+ adcl $0,%%edx"
+ : "=a" (__pLow),
+ "=d" (__pHi)
+ : "0" (__digit),
+ "1" (ASM_ULONGCAST(_v)),
+ "rm" (ASM_ULONGCAST(_carry)) );
+
+ ((unsigned long *)resultP)[0] = __pLow;
+ _carry = __pHi;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
# else /* not GNU-i386 */
# if defined(__win32__) && defined(__BORLANDC__) && defined(__x86__) && (__POINTER_SIZE__ == 4)
- /*
- * can do it long-word-wise;
- * 32*32 -> 64 multiplication
- */
- while (_l > 3) {
- unsigned __pLow;
- unsigned digit;
-
- /*
- * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
- * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
- */
+ /*
+ * can do it long-word-wise;
+ * 32*32 -> 64 multiplication
+ */
+ while (_l > 3) {
+ unsigned __pLow;
+ unsigned digit;
+
+ /*
+ * max: 0xFFFF.FFFF * 0xFFFF.FFFF -> 0xFFFF.FFFE.0000.0001
+ * + maxCarry (0xFFFF.FFFF) -> 0xFFFF.FFFF.0000.0000
+ */
/*
- digit = ((unsigned long *)digitP)[0];
- edx::eax = (digit * _v);
- edx::eax += _carry;
- ((unsigned long *)resultP)[0] = eax; -- pLow
- _carry = edx; -- pHigh
- digitP += 4;
- resultP += 4;
+ digit = ((unsigned long *)digitP)[0];
+ edx::eax = (digit * _v);
+ edx::eax += _carry;
+ ((unsigned long *)resultP)[0] = eax; -- pLow
+ _carry = edx; -- pHigh
+ digitP += 4;
+ resultP += 4;
*/
- digit = ((unsigned long *)digitP)[0];
- asm {
- mov eax, digit
- mov edx, _v
- mul edx
- add eax, _carry
- adc edx, 0
- mov __pLow, eax
- mov _carry, edx
- }
-
- ((unsigned long *)resultP)[0] = __pLow;
- digitP += 4;
- resultP += 4;
- _l -= 4;
- }
+ digit = ((unsigned long *)digitP)[0];
+ asm {
+ mov eax, digit
+ mov edx, _v
+ mul edx
+ add eax, _carry
+ adc edx, 0
+ mov __pLow, eax
+ mov _carry, edx
+ }
+
+ ((unsigned long *)resultP)[0] = __pLow;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
# else /* not WIN32-i386 */
# if defined(INT64)
- if (_v <= 0xFFFFFFFFL) {
- /* have 64bit ints; can do it int-wise
- *
- * max: 0xFFFFFFFF * 0xFFFFFFFF -> 0xFFFFFFFE.0001
- * + maxCarry (0xFFFFFFFF) -> 0xFFFFFFFF.0000
- */
- while (_l >= (4+4+4+4)) {
- unsigned __t1, __t2, __t3, __t4;
- UINT64 _prod64a, _prod64b, _prod64c, _prod64d;
-
- __t1 = ((unsigned *)digitP)[0];
- _prod64a = (INT64)_v;
- _prod64a *= __t1;
- _prod64a += _carry;
- ((unsigned *)resultP)[0] = _prod64a /* & 0xFFFFFFFFL */;
- _carry = _prod64a >> 32;
-
- __t2 = ((unsigned *)digitP)[1];
- _prod64b = (INT64)_v;
- _prod64b *= __t2;
- _prod64b += _carry;
- ((unsigned *)resultP)[1] = _prod64b /* & 0xFFFFFFFFL */;
- _carry = _prod64b >> 32;
-
- __t3 = ((unsigned *)digitP)[2];
- _prod64c = (INT64)_v;
- _prod64c *= __t3;
- _prod64c += _carry;
- ((unsigned *)resultP)[2] = _prod64c /* & 0xFFFFFFFFL */;
- _carry = _prod64c >> 32;
-
- __t4 = ((unsigned *)digitP)[3];
- _prod64d = (INT64)_v;
- _prod64d *= __t4;
- _prod64d += _carry;
- ((unsigned *)resultP)[3] = _prod64d /* & 0xFFFFFFFFL */;
- _carry = _prod64d >> 32;
-
- digitP += (4+4+4+4);
- resultP += (4+4+4+4);
- _l -= (4+4+4+4);
- }
- while (_l >= 4) {
- unsigned __t;
- UINT64 _prod64;
-
- __t = ((unsigned *)digitP)[0];
- _prod64 = (INT64)_v;
- _prod64 *= __t;
- _prod64 += _carry;
- ((unsigned *)resultP)[0] = _prod64 /* & 0xFFFFFFFFL */;
- _carry = _prod64 >> 32;
- digitP += 4;
- resultP += 4;
- _l -= 4;
- }
- if (_l >= 2) {
- unsigned short __t;
- UINT64 _prod64;
-
- __t = ((unsigned short *)digitP)[0];
- _prod64 = (INT64)_v;
- _prod64 *= __t;
- _prod64 += _carry;
- ((unsigned short *)resultP)[0] = _prod64 /* & 0xFFFF */;
- _carry = _prod64 >> 16;
- digitP += 2;
- resultP += 2;
- _l -= 2;
- }
- if (_l > 0) {
- UINT64 _prod64;
- _prod64 = *digitP++ * _v + _carry;
- *resultP++ = _prod64 /* & 0xFF */;
- _carry = _prod64 >> 8;
- _l--;
- }
- }
+ if (_v <= 0xFFFFFFFFL) {
+ /* have 64bit ints; can do it int-wise
+ *
+ * max: 0xFFFFFFFF * 0xFFFFFFFF -> 0xFFFFFFFE.0001
+ * + maxCarry (0xFFFFFFFF) -> 0xFFFFFFFF.0000
+ */
+ while (_l >= (4+4+4+4)) {
+ unsigned __t1, __t2, __t3, __t4;
+ UINT64 _prod64a, _prod64b, _prod64c, _prod64d;
+
+ __t1 = ((unsigned *)digitP)[0];
+ _prod64a = (INT64)_v;
+ _prod64a *= __t1;
+ _prod64a += _carry;
+ ((unsigned *)resultP)[0] = _prod64a /* & 0xFFFFFFFFL */;
+ _carry = _prod64a >> 32;
+
+ __t2 = ((unsigned *)digitP)[1];
+ _prod64b = (INT64)_v;
+ _prod64b *= __t2;
+ _prod64b += _carry;
+ ((unsigned *)resultP)[1] = _prod64b /* & 0xFFFFFFFFL */;
+ _carry = _prod64b >> 32;
+
+ __t3 = ((unsigned *)digitP)[2];
+ _prod64c = (INT64)_v;
+ _prod64c *= __t3;
+ _prod64c += _carry;
+ ((unsigned *)resultP)[2] = _prod64c /* & 0xFFFFFFFFL */;
+ _carry = _prod64c >> 32;
+
+ __t4 = ((unsigned *)digitP)[3];
+ _prod64d = (INT64)_v;
+ _prod64d *= __t4;
+ _prod64d += _carry;
+ ((unsigned *)resultP)[3] = _prod64d /* & 0xFFFFFFFFL */;
+ _carry = _prod64d >> 32;
+
+ digitP += (4+4+4+4);
+ resultP += (4+4+4+4);
+ _l -= (4+4+4+4);
+ }
+ while (_l >= 4) {
+ unsigned __t;
+ UINT64 _prod64;
+
+ __t = ((unsigned *)digitP)[0];
+ _prod64 = (INT64)_v;
+ _prod64 *= __t;
+ _prod64 += _carry;
+ ((unsigned *)resultP)[0] = _prod64 /* & 0xFFFFFFFFL */;
+ _carry = _prod64 >> 32;
+ digitP += 4;
+ resultP += 4;
+ _l -= 4;
+ }
+ if (_l >= 2) {
+ unsigned short __t;
+ UINT64 _prod64;
+
+ __t = ((unsigned short *)digitP)[0];
+ _prod64 = (INT64)_v;
+ _prod64 *= __t;
+ _prod64 += _carry;
+ ((unsigned short *)resultP)[0] = _prod64 /* & 0xFFFF */;
+ _carry = _prod64 >> 16;
+ digitP += 2;
+ resultP += 2;
+ _l -= 2;
+ }
+ if (_l > 0) {
+ UINT64 _prod64;
+ _prod64 = *digitP++ * _v + _carry;
+ *resultP++ = _prod64 /* & 0xFF */;
+ _carry = _prod64 >> 8;
+ _l--;
+ }
+ }
# else /* no INT64 type */
- if (_v <= 0xFFFF) {
- /* can do it short-wise
- *
- * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
- * + maxCarry (0xFFFF) -> 0xFFFF.0000
- */
- while (_l > 1) {
- _prod = ((unsigned short *)digitP)[0] * _v + _carry;
- ((unsigned short *)resultP)[0] = _prod /* & 0xFFFF */;
- _carry = _prod >> 16;
- digitP += 2;
- resultP += 2;
- _l -= 2;
- }
- }
+ if (_v <= 0xFFFF) {
+ /* can do it short-wise
+ *
+ * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
+ * + maxCarry (0xFFFF) -> 0xFFFF.0000
+ */
+ while (_l > 1) {
+ _prod = ((unsigned short *)digitP)[0] * _v + _carry;
+ ((unsigned short *)resultP)[0] = _prod /* & 0xFFFF */;
+ _carry = _prod >> 16;
+ digitP += 2;
+ resultP += 2;
+ _l -= 2;
+ }
+ }
# endif /* no INT64 */
# endif /* not WIN32-i386 */
# endif /* not GNU-i386 */
@@ -2595,80 +2595,80 @@
/* no, STORE_WORD_WISE makes it slower */
# endif
- if (_v <= 0xFFFF) {
- /* can do it short-wise
- *
- * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
- * + maxCarry (0xFFFF) -> 0xFFFF.0000
- */
- while (_l > 1) {
- unsigned int t;
+ if (_v <= 0xFFFF) {
+ /* can do it short-wise
+ *
+ * max: 0xFFFF * 0xFFFF -> 0xFFFE.0001
+ * + maxCarry (0xFFFF) -> 0xFFFF.0000
+ */
+ while (_l > 1) {
+ unsigned int t;
#if defined(LOAD_WORD_WISE)
- /* better fetch short-wise */
- t = ((unsigned short *)digitP)[0];
- digitP += 2;
- t = ((t >> 8) | (t << 8)) & 0xFFFF;
+ /* better fetch short-wise */
+ t = ((unsigned short *)digitP)[0];
+ digitP += 2;
+ t = ((t >> 8) | (t << 8)) & 0xFFFF;
#else
- t = (digitP[1]<<8) + digitP[0];
- digitP += 2;
+ t = (digitP[1]<<8) + digitP[0];
+ digitP += 2;
#endif
- _prod = t * _v + _carry;
- _carry = _prod >> 16;
+ _prod = t * _v + _carry;
+ _carry = _prod >> 16;
#if defined(STORE_WORD_WISE)
- /* better store short-wise */
- _prod = ((_prod >> 8) | (_prod << 8)) & 0xFFFF;
- ((unsigned short *)resultP)[0] = _prod;
+ /* better store short-wise */
+ _prod = ((_prod >> 8) | (_prod << 8)) & 0xFFFF;
+ ((unsigned short *)resultP)[0] = _prod;
#else
- resultP[0] = _prod /* & 0xFF */;
- resultP[1] = (_prod>>8) /* & 0xFF */;
+ resultP[0] = _prod /* & 0xFF */;
+ resultP[1] = (_prod>>8) /* & 0xFF */;
#endif
- resultP += 2;
- _l -= 2;
- }
- }
+ resultP += 2;
+ _l -= 2;
+ }
+ }
#endif /* LSB_FIRST */
- /*
- * rest is done byte-wise
- */
- while (_l > 0) {
- _prod = *digitP++ * _v + _carry;
- *resultP++ = _prod /* & 0xFF */;
- _carry = _prod >> 8;
- _l--;
- }
-
- _l = __intVal(lResult) - __intVal(len);
-
- /*
- * remaining carry
- */
- while (_carry) {
- *resultP++ = _carry /* & 0xFF */;
- _carry >>= 8;
- _l--;
- }
-
- /*
- * remaining zeros
- */
- while (_l--) {
- *resultP++ = 0;
- }
-
- /*
- * need compress ?
- */
- if (resultP[-1]) {
- /*
- * no
- */
- RETURN(result);
- }
-
- ok = true;
+ /*
+ * rest is done byte-wise
+ */
+ while (_l > 0) {
+ _prod = *digitP++ * _v + _carry;
+ *resultP++ = _prod /* & 0xFF */;
+ _carry = _prod >> 8;
+ _l--;
+ }
+
+ _l = __intVal(lResult) - __intVal(len);
+
+ /*
+ * remaining carry
+ */
+ while (_carry) {
+ *resultP++ = _carry /* & 0xFF */;
+ _carry >>= 8;
+ _l--;
+ }
+
+ /*
+ * remaining zeros
+ */
+ while (_l--) {
+ *resultP++ = 0;
+ }
+
+ /*
+ * need compress ?
+ */
+ if (resultP[-1]) {
+ /*
+ * no
+ */
+ RETURN(result);
+ }
+
+ ok = true;
}
%}.
"
@@ -2676,21 +2676,21 @@
(could make it a primitive-failure as well)
"
ok ifFalse:[
- carry := 0.
- 1 to:len do:[:i |
- prod := (digitByteArray basicAt:i) * val + carry.
- resultDigitByteArray basicAt:i put:(prod bitAnd:16rFF).
- carry := prod bitShift:-8.
- ].
- [carry ~~ 0] whileTrue:[
- len := len + 1.
- resultDigitByteArray basicAt:len put:(carry bitAnd:16rFF).
- carry := carry bitShift:-8
- ].
- [len < lResult] whileTrue:[
- len := len + 1.
- resultDigitByteArray basicAt:len put:0
- ]
+ carry := 0.
+ 1 to:len do:[:i |
+ prod := (digitByteArray basicAt:i) * val + carry.
+ resultDigitByteArray basicAt:i put:(prod bitAnd:16rFF).
+ carry := prod bitShift:-8.
+ ].
+ [carry ~~ 0] whileTrue:[
+ len := len + 1.
+ resultDigitByteArray basicAt:len put:(carry bitAnd:16rFF).
+ carry := carry bitShift:-8
+ ].
+ [len < lResult] whileTrue:[
+ len := len + 1.
+ resultDigitByteArray basicAt:len put:0
+ ]
].
^ result compressed
!
@@ -3502,7 +3502,7 @@
((aSmallInteger < (SmallInteger minVal + 255))
or:[aSmallInteger > (SmallInteger maxVal - 255)]) ifTrue:[
- ^ self absPlus:(self class value:aSmallInteger) sign:newSign.
+ ^ self absPlus:(self class value:aSmallInteger) sign:newSign.
].
len := rsltLen := digitByteArray size.
@@ -3511,18 +3511,18 @@
"/ if it is 255 (since the other number is definitely smaller)
"/
(digitByteArray at:len) == 16rFF ifTrue:[
- rsltLen := len + 1.
+ rsltLen := len + 1.
] ifFalse:[
- "/ or the argument has something in the high byte ..
+ "/ or the argument has something in the high byte ..
%{
#if __POINTER_SIZE__ == 8
- if (__intVal(aSmallInteger) & 0xFF00000000000000L) {
- rsltLen = __mkSmallInteger(__intVal(len) + 1);
- }
+ if (__intVal(aSmallInteger) & 0xFF00000000000000L) {
+ rsltLen = __mkSmallInteger(__intVal(len) + 1);
+ }
#else
- if (__intVal(aSmallInteger) & 0xFF000000) {
- rsltLen = __mkSmallInteger(__intVal(len) + 1);
- }
+ if (__intVal(aSmallInteger) & 0xFF000000) {
+ rsltLen = __mkSmallInteger(__intVal(len) + 1);
+ }
#endif
%}
].
@@ -3534,309 +3534,309 @@
if (__isByteArray(__INST(digitByteArray))
&& __isByteArray(resultDigitByteArray)
&& __isSmallInteger(aSmallInteger)) {
- /* carry is NOT unsigned (see negation below) */
- INT __carry = __intVal(aSmallInteger);
- int __index = 1;
- int __len = __intVal(len);
- unsigned char *__src = (unsigned char *)(__ByteArrayInstPtr(__INST(digitByteArray))->ba_element);
- unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
- INT __ptrDelta = __dst - __src;
- unsigned char *__srcLast = __src + __len - 1;
- int __rsltLen = __intVal(rsltLen);
-
- if (__carry < 0) {
- __carry = -__carry;
- }
+ /* carry is NOT unsigned (see negation below) */
+ INT __carry = __intVal(aSmallInteger);
+ int __index = 1;
+ int __len = __intVal(len);
+ unsigned char *__src = (unsigned char *)(__ByteArrayInstPtr(__INST(digitByteArray))->ba_element);
+ unsigned char *__dst = (unsigned char *)(__ByteArrayInstPtr(resultDigitByteArray)->ba_element);
+ INT __ptrDelta = __dst - __src;
+ unsigned char *__srcLast = __src + __len - 1;
+ int __rsltLen = __intVal(rsltLen);
+
+ if (__carry < 0) {
+ __carry = -__carry;
+ }
#if defined(__LSBFIRST__)
# if defined(__i386__) && defined(__GNUC__) && (__POINTER_SIZE__ == 4)
# if 0 /* NOTICE - the code below is 20% slower ... - why */
- /*
- * add long-wise
- */
- asm(" jecxz nothingToDo \n\
- movl %%eax, %%esi /* __src input */ \n\
- movl %%ebx, %%edi /* __dst input */ \n\
- \n\
- /* the first 4-byte int */ \n\
- lodsl /* fetch */ \n\
- addl %%edx, %%eax /* add */ \n\
- stosl /* store */ \n\
- leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
- jecxz doneLoop /* any more ? */ \n\
- /* remaining 4-byte ints */ \n\
- jmp addLoop \n\
- \n\
- .align 8 \n\
- addLoop: \n\
- movl 0(%%esi), %%ebx /* fetch */ \n\
- jnc copyLoop2 \n\
- movl $0, %%eax \n\
- leal 4(%%esi), %%esi \n\
- adcl %%ebx, %%eax /* & add carry from prev int */\n\
- leal 8(%%edi), %%edi \n\
- movl %%eax, -8(%%edi) /* store */ \n\
- leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
- jecxz doneLoop /* any more ? */ \n\
- \n\
- movl 0(%%esi), %%ebx /* fetch */ \n\
- movl $0, %%eax \n\
- leal 4(%%esi), %%esi \
- adcl %%ebx, %%eax /* & add carry from prev int */\n\
- movl %%eax, -4(%%edi) /* store */ \n\
- \n\
- loop addLoop \n\
- jmp doneLoop \n\
- \n\
- .align 8 \n\
- copyLoop: \n\
- movl 0(%%esi), %%ebx \n\
- copyLoop2: \n\
- add $4, %%esi \n\
- add $4, %%edi \n\
- movl %%ebx, -4(%%edi) \n\
- loop copyLoop \n\
- \n\
- doneLoop: \n\
- movl $0, %%edx /* do not clobber carry (xorl clears it) */ \n\
- adcl $0, %%edx \n\
- movl %%esi, %%eax /* __src output */ \n\
- nothingToDo: \n\
- " : "=d" ((unsigned long)(__carry)),
- "=a" (__src)
- : "1" (__src),
- "b" (__dst),
- "c" (__len / 4),
- "0" (__carry)
- : "esi", "edi");
+ /*
+ * add long-wise
+ */
+ asm(" jecxz nothingToDo \n\
+ movl %%eax, %%esi /* __src input */ \n\
+ movl %%ebx, %%edi /* __dst input */ \n\
+ \n\
+ /* the first 4-byte int */ \n\
+ lodsl /* fetch */ \n\
+ addl %%edx, %%eax /* add */ \n\
+ stosl /* store */ \n\
+ leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
+ jecxz doneLoop /* any more ? */ \n\
+ /* remaining 4-byte ints */ \n\
+ jmp addLoop \n\
+ \n\
+ .align 8 \n\
+ addLoop: \n\
+ movl 0(%%esi), %%ebx /* fetch */ \n\
+ jnc copyLoop2 \n\
+ movl $0, %%eax \n\
+ leal 4(%%esi), %%esi \n\
+ adcl %%ebx, %%eax /* & add carry from prev int */\n\
+ leal 8(%%edi), %%edi \n\
+ movl %%eax, -8(%%edi) /* store */ \n\
+ leal -1(%%ecx),%%ecx /* do not clobber carry */ \n\
+ jecxz doneLoop /* any more ? */ \n\
+ \n\
+ movl 0(%%esi), %%ebx /* fetch */ \n\
+ movl $0, %%eax \n\
+ leal 4(%%esi), %%esi \
+ adcl %%ebx, %%eax /* & add carry from prev int */\n\
+ movl %%eax, -4(%%edi) /* store */ \n\
+ \n\
+ loop addLoop \n\
+ jmp doneLoop \n\
+ \n\
+ .align 8 \n\
+ copyLoop: \n\
+ movl 0(%%esi), %%ebx \n\
+ copyLoop2: \n\
+ add $4, %%esi \n\
+ add $4, %%edi \n\
+ movl %%ebx, -4(%%edi) \n\
+ loop copyLoop \n\
+ \n\
+ doneLoop: \n\
+ movl $0, %%edx /* do not clobber carry (xorl clears it) */ \n\
+ adcl $0, %%edx \n\
+ movl %%esi, %%eax /* __src output */ \n\
+ nothingToDo: \n\
+ " : "=d" (ASM_ULONGCAST(__carry)),
+ "=a" (__src)
+ : "1" (__src),
+ "b" (__dst),
+ "c" (__len / 4),
+ "0" (__carry)
+ : "esi", "edi");
# else
- {
- unsigned char *__srcLastX;
-
- __srcLastX = __srcLast - 3 - 4;
- while (__src <= __srcLastX) {
- unsigned int __sum, __sum2;
- unsigned __digit1, __digit2;
-
- __digit1 = ((unsigned *)__src)[0];
- __digit2 = ((unsigned *)__src)[1];
- asm ("addl %%edx,%%ecx \n\
- adcl $0, %%eax \n\
- movl $0, %%edx \n\
- adcl $0, %%edx"
- : "=d" ((unsigned long)(__carry)),
- "=c" ((unsigned long)(__sum)),
- "=a" ((unsigned long)(__sum2))
- : "0" ((unsigned long)(__carry)),
- "1" (__digit1),
- "2" (__digit2));
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- ((unsigned int *)(__src + __ptrDelta))[1] = __sum2;
-
- __src += 8;
-
- if (__carry == 0) {
- while (__src <= __srcLastX) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
- __src += 8;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
-
- __srcLastX = __srcLastX + 4;
- if (__src <= __srcLastX) {
- unsigned int __sum, __digit;
-
- __digit = ((unsigned *)__src)[0];
-
- asm ("addl %%eax,%%edx \n\
- movl $0,%%eax \n\
- adcl $0,%%eax"
- : "=a" ((unsigned long)(__carry)),
- "=d" ((unsigned long)(__sum))
- : "0" ((unsigned long)(__carry)),
- "1" (__digit) );
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
-
- if (__carry == 0) {
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
- }
+ {
+ unsigned char *__srcLastX;
+
+ __srcLastX = __srcLast - 3 - 4;
+ while (__src <= __srcLastX) {
+ unsigned int __sum, __sum2;
+ unsigned __digit1, __digit2;
+
+ __digit1 = ((unsigned *)__src)[0];
+ __digit2 = ((unsigned *)__src)[1];
+ asm ("addl %%edx,%%ecx \n\
+ adcl $0, %%eax \n\
+ movl $0, %%edx \n\
+ adcl $0, %%edx"
+ : "=d" (ASM_ULONGCAST(__carry)),
+ "=c" (ASM_ULONGCAST(__sum)),
+ "=a" (ASM_ULONGCAST(__sum2))
+ : "0" (ASM_ULONGCAST(__carry)),
+ "1" (__digit1),
+ "2" (__digit2));
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ ((unsigned int *)(__src + __ptrDelta))[1] = __sum2;
+
+ __src += 8;
+
+ if (__carry == 0) {
+ while (__src <= __srcLastX) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ ((unsigned int *)(__src + __ptrDelta))[1] = ((unsigned int *)__src)[1];
+ __src += 8;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+
+ __srcLastX = __srcLastX + 4;
+ if (__src <= __srcLastX) {
+ unsigned int __sum, __digit;
+
+ __digit = ((unsigned *)__src)[0];
+
+ asm ("addl %%eax,%%edx \n\
+ movl $0,%%eax \n\
+ adcl $0,%%eax"
+ : "=a" (ASM_ULONGCAST(__carry)),
+ "=d" (ASM_ULONGCAST(__sum))
+ : "0" (ASM_ULONGCAST(__carry)),
+ "1" (__digit) );
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+ }
# endif
# else /* not i386-GNUC */
# if defined(__win32__) && defined(__BORLANDC__) && defined(__x86__) && (__POINTER_SIZE__ == 4)
- {
- unsigned char *__srcLast4;
-
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned int __sum;
-
- __sum = ((unsigned int *)__src)[0];
- asm {
- mov eax, __sum
- add eax, __carry
- mov edx, 0
- adc edx, 0
- mov __sum, eax
- mov __carry, edx
- }
-
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
- __src += 4;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
- }
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned int __sum;
+
+ __sum = ((unsigned int *)__src)[0];
+ asm {
+ mov eax, __sum
+ add eax, __carry
+ mov edx, 0
+ adc edx, 0
+ mov __sum, eax
+ mov __carry, edx
+ }
+
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum;
+ __src += 4;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+ }
# else /* not i386-WIN32 */
# if defined(__LSBFIRST__) && (__POINTER_SIZE__ == 8)
- {
- unsigned char *__srcLast4;
-
- /*
- * add long-wise
- */
- __srcLast4 = __srcLast - 3;
- while (__src <= __srcLast4) {
- unsigned INT __sum;
-
- __sum = (INT)(((unsigned int *)__src)[0]);
- __sum += __carry;
- ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
- __src += 4;
- __carry = __sum >> 32;
- if (__carry == 0) {
- while (__src <= __srcLast4) {
- /* copy over words */
- ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
- __src += 4;
- }
- while (__src <= __srcLast) {
- /* copy over bytes */
- __src[__ptrDelta] = __src[0];
- __src ++;
- }
- goto doneSource;
- }
- }
- }
+ {
+ unsigned char *__srcLast4;
+
+ /*
+ * add long-wise
+ */
+ __srcLast4 = __srcLast - 3;
+ while (__src <= __srcLast4) {
+ unsigned INT __sum;
+
+ __sum = (INT)(((unsigned int *)__src)[0]);
+ __sum += __carry;
+ ((unsigned int *)(__src + __ptrDelta))[0] = __sum /* & 0xFFFF */;
+ __src += 4;
+ __carry = __sum >> 32;
+ if (__carry == 0) {
+ while (__src <= __srcLast4) {
+ /* copy over words */
+ ((unsigned int *)(__src + __ptrDelta))[0] = ((unsigned int *)__src)[0];
+ __src += 4;
+ }
+ while (__src <= __srcLast) {
+ /* copy over bytes */
+ __src[__ptrDelta] = __src[0];
+ __src ++;
+ }
+ goto doneSource;
+ }
+ }
+ }
# endif /* LSB+64bit */
# endif /* __i386__ & WIN32 */
# endif /* __i386__ & GNUC */
- /*
- * add short-wise
- */
- while (__src < __srcLast) {
- __carry += ((unsigned short *)__src)[0];
- ((unsigned short *)(__src + __ptrDelta))[0] = __carry /* & 0xFFFF */;
- __carry >>= 16;
- __src += 2;
- }
- /*
- * last (odd) byte
- */
- if (__src <= __srcLast) {
- __carry += __src[0];
- __src[__ptrDelta] = __carry /* & 0xFF */;
- __carry >>= 8;
- __src++;
- }
+ /*
+ * add short-wise
+ */
+ while (__src < __srcLast) {
+ __carry += ((unsigned short *)__src)[0];
+ ((unsigned short *)(__src + __ptrDelta))[0] = __carry /* & 0xFFFF */;
+ __carry >>= 16;
+ __src += 2;
+ }
+ /*
+ * last (odd) byte
+ */
+ if (__src <= __srcLast) {
+ __carry += __src[0];
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __carry >>= 8;
+ __src++;
+ }
#else /* not __LSBFIRST__ */
- /*
- * add byte-wise
- */
- while (__src <= __srcLast) {
- __carry += __src[0];
- __src[__ptrDelta] = __carry /* & 0xFF */;
- __src++;
- __carry >>= 8;
-
- if (__carry == 0) {
- while (__src <= __srcLast) {
- /* copy over rest */
- __src[__ptrDelta] = __src[0];
- __src++;
- }
- goto doneSource;
- }
- }
+ /*
+ * add byte-wise
+ */
+ while (__src <= __srcLast) {
+ __carry += __src[0];
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __src++;
+ __carry >>= 8;
+
+ if (__carry == 0) {
+ while (__src <= __srcLast) {
+ /* copy over rest */
+ __src[__ptrDelta] = __src[0];
+ __src++;
+ }
+ goto doneSource;
+ }
+ }
#endif /* __LSBFIRST__ */
doneSource: ;
- /*
- * now, at most one other byte is to be stored ...
- */
- if (__len < __rsltLen) {
- __src[__ptrDelta] = __carry /* & 0xFF */;
- __src++;
- }
-
- if (__src[__ptrDelta-1] != 0) { /* lastDigit */
- RETURN (result);
- }
- // must compress
- ok = true;
+ /*
+ * now, at most one other byte is to be stored ...
+ */
+ if (__len < __rsltLen) {
+ __src[__ptrDelta] = __carry /* & 0xFF */;
+ __src++;
+ }
+
+ if (__src[__ptrDelta-1] != 0) { /* lastDigit */
+ RETURN (result);
+ }
+ // must compress
+ ok = true;
}
%}.
ok ~~ true ifTrue:[
- index := 1.
- carry := aSmallInteger abs.
-
- [carry ~~ 0] whileTrue:[
- (index <= len) ifTrue:[
- carry := (digitByteArray basicAt:index) + carry.
- ].
- resultDigitByteArray basicAt:index put:(lastDigit := carry bitAnd:16rFF).
- carry := carry bitShift:-8.
- index := index + 1
- ].
-
- (index <= rsltLen) ifTrue:[
- [index <= len] whileTrue:[
- resultDigitByteArray basicAt:index put:(digitByteArray basicAt:index).
- index := index + 1
- ].
- lastDigit := 0.
- ].
-
- (lastDigit ~~ 0 and:[rsltLen > SmallInteger maxBytes]) ifTrue:[
- ^ result
- ].
+ index := 1.
+ carry := aSmallInteger abs.
+
+ [carry ~~ 0] whileTrue:[
+ (index <= len) ifTrue:[
+ carry := (digitByteArray basicAt:index) + carry.
+ ].
+ resultDigitByteArray basicAt:index put:(lastDigit := carry bitAnd:16rFF).
+ carry := carry bitShift:-8.
+ index := index + 1
+ ].
+
+ (index <= rsltLen) ifTrue:[
+ [index <= len] whileTrue:[
+ resultDigitByteArray basicAt:index put:(digitByteArray basicAt:index).
+ index := index + 1
+ ].
+ lastDigit := 0.
+ ].
+
+ (lastDigit ~~ 0 and:[rsltLen > SmallInteger maxBytes]) ifTrue:[
+ ^ result
+ ].
].
^ result compressed
@@ -5612,3 +5612,4 @@
version_CVS
^ '$Header$'
! !
+