2723 %{ |
2723 %{ |
2724 if (__isByteArray(__INST(digitByteArray)) |
2724 if (__isByteArray(__INST(digitByteArray)) |
2725 && __isByteArray(otherDigitByteArray) |
2725 && __isByteArray(otherDigitByteArray) |
2726 && __isByteArray(resultDigitByteArray) |
2726 && __isByteArray(resultDigitByteArray) |
2727 && __bothSmallInteger(len1, len2)) { |
2727 && __bothSmallInteger(len1, len2)) { |
2728 unsigned char *myBytes = __ByteArrayInstPtr(__INST(digitByteArray))->ba_element; |
2728 unsigned char *myBytes = __ByteArrayInstPtr(__INST(digitByteArray))->ba_element; |
2729 unsigned char *otherBytes = __ByteArrayInstPtr(otherDigitByteArray)->ba_element; |
2729 unsigned char *otherBytes = __ByteArrayInstPtr(otherDigitByteArray)->ba_element; |
2730 unsigned char *resultBytes = __ByteArrayInstPtr(resultDigitByteArray)->ba_element; |
2730 unsigned char *resultBytes = __ByteArrayInstPtr(resultDigitByteArray)->ba_element; |
2731 unsigned char *_p1, *_p2, *_pResult, *_pResult0, *_pResult1, *_p1Last, *_p2Last; |
2731 unsigned char *_p1, *_p2, *_pResult, *_pResult0, *_pResult1, *_p1Last, *_p2Last; |
2732 unsigned INT _v; |
2732 unsigned INT _v; |
2733 int _len1 = __intVal(len1); |
2733 int _len1 = __intVal(len1); |
2734 int _len2 = __intVal(len2); |
2734 int _len2 = __intVal(len2); |
2735 |
2735 |
2736 _p1Last = myBytes + _len1 - 1; /* the last byte */ |
2736 _p1Last = myBytes + _len1 - 1; /* the last byte */ |
2737 _p2Last = otherBytes + _len2 - 1; /* the last byte */ |
2737 _p2Last = otherBytes + _len2 - 1; /* the last byte */ |
2738 _pResult0 = resultBytes; |
2738 _pResult0 = resultBytes; |
2739 |
2739 |
2740 /* |
2740 /* |
2741 * aaa...aaa f1[0] * f2 |
2741 * aaa...aaa f1[0] * f2 |
2742 * bbb...bbb f1[1] * f2 |
2742 * bbb...bbb f1[1] * f2 |
2743 * ccc...ccc f1[2] * f2 |
2743 * ccc...ccc f1[2] * f2 |
2744 * ... |
2744 * ... |
2745 * xxx...xxx f1[high] * f2 |
2745 * xxx...xxx f1[high] * f2 |
2746 * |
2746 * |
2747 * start short-wise |
2747 * start short-wise |
2748 * bounds: (16rFFFF * 16rFFFF) + 16rFFFF -> FFFF0000 |
2748 * bounds: (16rFFFF * 16rFFFF) + 16rFFFF -> FFFF0000 |
2749 */ |
2749 */ |
2750 _p1 = myBytes; |
2750 _p1 = myBytes; |
|
2751 |
|
2752 #if defined(alpha64) |
|
2753 /* loop over ints of f1 */ |
|
2754 for (; _p1 < _p1Last-2; _p1 += 4, _pResult0 += 4) { |
|
2755 unsigned INT word1 = ((unsigned int *)_p1)[0]; |
|
2756 |
|
2757 _pResult = _pResult0; |
|
2758 _p2 = otherBytes; |
|
2759 |
|
2760 /* loop over ints of f2 */ |
|
2761 while (_p2 < (_p2Last-2)) { |
|
2762 _v = (word1 * ((unsigned int *)_p2)[0]) + ((unsigned int *)_pResult)[0]; |
|
2763 ((unsigned int *)_pResult)[0] = _v /* & 0xFFFFFFFF */; |
|
2764 _v >>= 32; /* now _v contains the carry*/ |
|
2765 _pResult += 4; |
|
2766 if (_v) { |
|
2767 /* distribute carry */ |
|
2768 for (_pResult1 = _pResult; _v; _pResult1++) { |
|
2769 _v += _pResult1[0]; |
|
2770 _pResult1[0] = _v /* & 0xFF */; |
|
2771 _v >>= 8; |
|
2772 } |
|
2773 } |
|
2774 _p2 += 4; |
|
2775 } |
|
2776 |
|
2777 /* possible odd highByte of f2 */ |
|
2778 while (_p2 <= _p2Last) { |
|
2779 _v = (word1 * _p2[0]) + ((unsigned int *)_pResult)[0]; |
|
2780 ((unsigned int *)_pResult)[0] = _v /* & 0xFFFFFFFF */; |
|
2781 _v >>= 32; /* now _v contains the carry*/ |
|
2782 _pResult += 4; |
|
2783 if (_v) { |
|
2784 /* distribute carry */ |
|
2785 for (_pResult1 = _pResult; _v; _pResult1++) { |
|
2786 _v += _pResult1[0]; |
|
2787 _pResult1[0] = _v /* & 0xFF */; |
|
2788 _v >>= 8; |
|
2789 } |
|
2790 } |
|
2791 _p2++; |
|
2792 } |
|
2793 } |
|
2794 #endif /* alpha64 */ |
2751 |
2795 |
2752 #if defined(__LSBFIRST) || defined(alpha) || defined(i386) |
2796 #if defined(__LSBFIRST) || defined(alpha) || defined(i386) |
2753 /* loop over shorts of f1 */ |
2797 /* loop over shorts of f1 */ |
2754 for (; _p1 < _p1Last; _p1 += 2, _pResult0 += 2) { |
2798 for (; _p1 < _p1Last; _p1 += 2, _pResult0 += 2) { |
2755 unsigned int short1 = ((unsigned short *)_p1)[0]; |
2799 unsigned int short1 = ((unsigned short *)_p1)[0]; |
2756 |
2800 |
2757 _pResult = _pResult0; |
2801 _pResult = _pResult0; |
2758 _p2 = otherBytes; |
2802 _p2 = otherBytes; |
2759 |
2803 |
2760 /* loop over shorts of f2 */ |
2804 /* loop over shorts of f2 */ |
2761 while (_p2 < _p2Last) { |
2805 while (_p2 < _p2Last) { |
2762 _v = (short1 * ((unsigned short *)_p2)[0]) + ((unsigned short *)_pResult)[0]; |
2806 _v = (short1 * ((unsigned short *)_p2)[0]) + ((unsigned short *)_pResult)[0]; |
2763 ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */; |
2807 ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */; |
2764 _v >>= 16; /* now _v contains the carry*/ |
2808 _v >>= 16; /* now _v contains the carry*/ |
2765 _pResult += 2; |
2809 _pResult += 2; |
2766 if (_v) { |
2810 if (_v) { |
2767 /* distribute carry */ |
2811 /* distribute carry */ |
2768 for (_pResult1 = _pResult; _v; _pResult1++) { |
2812 for (_pResult1 = _pResult; _v; _pResult1++) { |
2769 _v += _pResult1[0]; |
2813 _v += _pResult1[0]; |
2770 _pResult1[0] = _v /* & 0xFF */; |
2814 _pResult1[0] = _v /* & 0xFF */; |
2771 _v >>= 8; |
2815 _v >>= 8; |
2772 } |
2816 } |
2773 } |
2817 } |
2774 _p2 += 2; |
2818 _p2 += 2; |
2775 } |
2819 } |
2776 |
2820 |
2777 /* possible odd highByte of f2 */ |
2821 /* possible odd highByte of f2 */ |
2778 if (_p2 <= _p2Last) { |
2822 if (_p2 <= _p2Last) { |
2779 _v = (short1 * _p2[0]) + ((unsigned short *)_pResult)[0]; |
2823 _v = (short1 * _p2[0]) + ((unsigned short *)_pResult)[0]; |
2780 ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */; |
2824 ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */; |
2781 _v >>= 16; /* now _v contains the carry*/ |
2825 _v >>= 16; /* now _v contains the carry*/ |
2782 _pResult += 2; |
2826 _pResult += 2; |
2783 if (_v) { |
2827 if (_v) { |
2784 /* distribute carry */ |
2828 /* distribute carry */ |
2785 for (_pResult1 = _pResult; _v; _pResult1++) { |
2829 for (_pResult1 = _pResult; _v; _pResult1++) { |
2786 _v += _pResult1[0]; |
2830 _v += _pResult1[0]; |
2787 _pResult1[0] = _v /* & 0xFF */; |
2831 _pResult1[0] = _v /* & 0xFF */; |
2788 _v >>= 8; |
2832 _v >>= 8; |
2789 } |
2833 } |
2790 } |
2834 } |
2791 _p2++; |
2835 _p2++; |
2792 } |
2836 } |
2793 } |
2837 } |
2794 #endif /* LSBFIRST */ |
2838 #endif /* LSBFIRST */ |
2795 |
2839 |
2796 /* possible odd highByte of f1 (or byteLoop, if not LSBFIRST) */ |
2840 /* possible odd highByte of f1 (or byteLoop, if not LSBFIRST) */ |
2797 for (; _p1 <= _p1Last; _p1++, _pResult0++) { |
2841 for (; _p1 <= _p1Last; _p1++, _pResult0++) { |
2798 unsigned int byte1 = _p1[0]; |
2842 unsigned int byte1 = _p1[0]; |
2799 |
2843 |
2800 _pResult = _pResult0; |
2844 _pResult = _pResult0; |
2801 _p2 = otherBytes; |
2845 _p2 = otherBytes; |
2802 |
2846 |
2803 #if defined(__LSBFIRST) || defined(alpha) || defined(i386) |
2847 #if defined(__LSBFIRST) || defined(alpha) || defined(i386) |
2804 /* loop over shorts of f2 */ |
2848 /* loop over shorts of f2 */ |
2805 while (_p2 < _p2Last) { |
2849 while (_p2 < _p2Last) { |
2806 _v = (byte1 * ((unsigned short *)_p2)[0]) + ((unsigned short *)_pResult)[0]; |
2850 _v = (byte1 * ((unsigned short *)_p2)[0]) + ((unsigned short *)_pResult)[0]; |
2807 ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */; |
2851 ((unsigned short *)_pResult)[0] = _v /* & 0xFFFF */; |
2808 _v >>= 16; /* now _v contains the carry*/ |
2852 _v >>= 16; /* now _v contains the carry*/ |
2809 _pResult += 2; |
2853 _pResult += 2; |
2810 if (_v) { |
2854 if (_v) { |
2811 /* distribute carry */ |
2855 /* distribute carry */ |
2812 for (_pResult1 = _pResult; _v; _pResult1++) { |
2856 for (_pResult1 = _pResult; _v; _pResult1++) { |
2813 _v += _pResult1[0]; |
2857 _v += _pResult1[0]; |
2814 _pResult1[0] = _v /* & 0xFF */; |
2858 _pResult1[0] = _v /* & 0xFF */; |
2815 _v >>= 8; |
2859 _v >>= 8; |
2816 } |
2860 } |
2817 } |
2861 } |
2818 _p2 += 2; |
2862 _p2 += 2; |
2819 } |
2863 } |
2820 #endif /* __LSBFIRST */ |
2864 #endif /* __LSBFIRST */ |
2821 |
2865 |
2822 /* possible odd highByte of f2 (or byteLoop, if not LSBFIRST) */ |
2866 /* possible odd highByte of f2 (or byteLoop, if not LSBFIRST) */ |
2823 while (_p2 <= _p2Last) { |
2867 while (_p2 <= _p2Last) { |
2824 _v = (byte1 * _p2[0]) + _pResult[0]; |
2868 _v = (byte1 * _p2[0]) + _pResult[0]; |
2825 _pResult[0] = _v /* & 0xFF */; |
2869 _pResult[0] = _v /* & 0xFF */; |
2826 _v >>= 8; /* now _v contains the carry*/ |
2870 _v >>= 8; /* now _v contains the carry*/ |
2827 _pResult++; |
2871 _pResult++; |
2828 if (_v) { |
2872 if (_v) { |
2829 /* distribute carry */ |
2873 /* distribute carry */ |
2830 for (_pResult1 = _pResult; _v; _pResult1++) { |
2874 for (_pResult1 = _pResult; _v; _pResult1++) { |
2831 _v += _pResult1[0]; |
2875 _v += _pResult1[0]; |
2832 _pResult1[0] = _v /* & 0xFF */; |
2876 _pResult1[0] = _v /* & 0xFF */; |
2833 _v >>= 8; |
2877 _v >>= 8; |
2834 } |
2878 } |
2835 } |
2879 } |
2836 _p2++; |
2880 _p2++; |
2837 } |
2881 } |
2838 } |
2882 } |
2839 ok = true; |
2883 ok = true; |
2840 } |
2884 } |
2841 %}. |
2885 %}. |
2842 ok ifFalse:[ |
2886 ok ifFalse:[ |
2843 1 to:len1 do:[:index1 | |
2887 1 to:len1 do:[:index1 | |
2844 1 to:len2 do:[:index2 | |
2888 1 to:len2 do:[:index2 | |
2845 dstIndex := index1 + index2 - 1. |
2889 dstIndex := index1 + index2 - 1. |
2846 prod := (digitByteArray basicAt:index1) * (otherDigitByteArray basicAt:index2). |
2890 prod := (digitByteArray basicAt:index1) * (otherDigitByteArray basicAt:index2). |
2847 prod := prod + (resultDigitByteArray basicAt:dstIndex). |
2891 prod := prod + (resultDigitByteArray basicAt:dstIndex). |
2848 resultDigitByteArray basicAt:dstIndex put:(prod bitAnd:16rFF). |
2892 resultDigitByteArray basicAt:dstIndex put:(prod bitAnd:16rFF). |
2849 carry := prod bitShift:-8. |
2893 carry := prod bitShift:-8. |
2850 carry ~~ 0 ifTrue:[ |
2894 carry ~~ 0 ifTrue:[ |
2851 idx := dstIndex + 1. |
2895 idx := dstIndex + 1. |
2852 [carry ~~ 0] whileTrue:[ |
2896 [carry ~~ 0] whileTrue:[ |
2853 v := (resultDigitByteArray basicAt:idx) + carry. |
2897 v := (resultDigitByteArray basicAt:idx) + carry. |
2854 resultDigitByteArray basicAt:idx put:(v bitAnd:255). |
2898 resultDigitByteArray basicAt:idx put:(v bitAnd:255). |
2855 carry := v bitShift:-8. |
2899 carry := v bitShift:-8. |
2856 idx := idx + 1 |
2900 idx := idx + 1 |
2857 ] |
2901 ] |
2858 ] |
2902 ] |
2859 ] |
2903 ] |
2860 ]. |
2904 ]. |
2861 ]. |
2905 ]. |
2862 ^ result compressed |
2906 ^ result compressed |
2863 ! |
2907 ! |
2864 |
2908 |
2865 absPlus:aLargeInteger sign:newSign |
2909 absPlus:aLargeInteger sign:newSign |