*** empty log message ***
authorClaus Gittinger <cg@exept.de>
Fri, 07 Jun 2019 01:11:34 +0200
changeset 5000 608e4912b5cf
parent 4999 fd4435d4e583
child 5001 11cbbbcbc87e
*** empty log message ***
QuadFloat.st
--- a/QuadFloat.st	Fri Jun 07 00:48:13 2019 +0200
+++ b/QuadFloat.st	Fri Jun 07 01:11:34 2019 +0200
@@ -170,10 +170,30 @@
 #define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
 #define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
 
-#if 0
-static inline
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
+/*----------------------------------------------------------------------------
+| This function or macro is the same as 'softfloat_shortShiftLeftM' with
+| 'size_words' = 5 (N = 160).
+*----------------------------------------------------------------------------*/
+#define softfloat_shortShiftLeft160M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, dist, zPtr )
+/*----------------------------------------------------------------------------
+| This function or macro is the same as 'softfloat_shiftRightJamM' with
+| 'size_words' = 5 (N = 160).
+*----------------------------------------------------------------------------*/
+#define softfloat_shiftRightJam160M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 5, aPtr, dist, zPtr )
+/*----------------------------------------------------------------------------
+| This function or macro is the same as 'softfloat_shortShiftLeftM' with
+| 'size_words' = 4 (N = 128).
+*----------------------------------------------------------------------------*/
+#define softfloat_shortShiftLeft128M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, dist, zPtr )
+
+static inline void
+softfloat_raiseFlags( uint_fast8_t flags ) {
+    softfloat_exceptionFlags |= flags;
+}
+
+#if 1
+static inline void
+softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
 {
     zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
     zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
@@ -190,22 +210,274 @@
 }
 #endif
 
-void
-softfloat_raiseFlags( uint_fast8_t flags ) {
-    softfloat_exceptionFlags |= flags;
+static inline void
+softfloat_invalidF128M( uint32_t *zWPtr ) {
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
+    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
+    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
+    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
+}
+
+static bool
+f128M_isSignalingNaN( const float128_t *aPtr )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    if ( (uiA96 & 0x7FFF8000) != 0x7FFF0000 ) return 0;
+    return
+	((uiA96 & 0x00007FFF) != 0)
+	    || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+		     | aWPtr[indexWord( 4, 0 )])
+		    != 0);
+}
+
+static void
+softfloat_shortShiftRightJamM(
+      uint_fast8_t size_words,
+      const uint64_t *aPtr,
+      uint_fast8_t dist,
+      uint64_t *zPtr
+  )
+{
+    uint_fast8_t uNegDist;
+    unsigned int index, lastIndex;
+    uint64_t partWordZ, wordA;
+
+    uNegDist = -dist;
+    index = indexWordLo( size_words );
+    lastIndex = indexWordHi( size_words );
+    wordA = aPtr[index];
+    partWordZ = wordA>>dist;
+    if ( partWordZ<<dist != wordA ) partWordZ |= 1;
+    while ( index != lastIndex ) {
+	wordA = aPtr[index + wordIncr];
+	zPtr[index] = wordA<<(uNegDist & 63) | partWordZ;
+	index += wordIncr;
+	partWordZ = wordA>>dist;
+    }
+    zPtr[index] = partWordZ;
 }
 
+void
+softfloat_shiftRightJamM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint32_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint32_t wordJam, wordDist, *ptr;
+    uint_fast8_t i, innerDist;
+
+    wordJam = 0;
+    wordDist = dist>>5;
+    if ( wordDist ) {
+	if ( size_words < wordDist ) wordDist = size_words;
+	ptr = (uint32_t *) (aPtr + indexMultiwordLo( size_words, wordDist ));
+	i = wordDist;
+	do {
+	    wordJam = *ptr++;
+	    if ( wordJam ) break;
+	    --i;
+	} while ( i );
+	ptr = zPtr;
+    }
+    if ( wordDist < size_words ) {
+	aPtr += indexMultiwordHiBut( size_words, wordDist );
+	innerDist = dist & 31;
+	if ( innerDist ) {
+	    softfloat_shortShiftRightJamM(
+		size_words - wordDist,
+		aPtr,
+		innerDist,
+		zPtr + indexMultiwordLoBut( size_words, wordDist )
+	    );
+	    if ( ! wordDist ) goto wordJam;
+	} else {
+	    aPtr += indexWordLo( size_words - wordDist );
+	    ptr = zPtr + indexWordLo( size_words );
+	    for ( i = size_words - wordDist; i; --i ) {
+		*ptr = *aPtr;
+		aPtr += wordIncr;
+		ptr += wordIncr;
+	    }
+	}
+	ptr = zPtr + indexMultiwordHi( size_words, wordDist );
+    }
+    do {
+	*ptr++ = 0;
+	--wordDist;
+    } while ( wordDist );
+ wordJam:
+    if ( wordJam ) zPtr[indexWordLo( size_words )] |= 1;
+
+}
+
+void
+ softfloat_shortShiftLeftM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint_fast8_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint_fast8_t uNegDist;
+    unsigned int index, lastIndex;
+    uint32_t partWordZ, wordA;
+
+    uNegDist = -dist;
+    index = indexWordHi( size_words );
+    lastIndex = indexWordLo( size_words );
+    partWordZ = aPtr[index]<<dist;
+    while ( index != lastIndex ) {
+	wordA = aPtr[index - wordIncr];
+	zPtr[index] = partWordZ | wordA>>(uNegDist & 31);
+	index -= wordIncr;
+	partWordZ = wordA<<dist;
+    }
+    zPtr[index] = partWordZ;
+}
+
+static void
+softfloat_shiftLeftM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint32_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint32_t wordDist;
+    uint_fast8_t innerDist;
+    uint32_t *destPtr;
+    uint_fast8_t i;
+
+    wordDist = dist>>5;
+    if ( wordDist < size_words ) {
+	aPtr += indexMultiwordLoBut( size_words, wordDist );
+	innerDist = dist & 31;
+	if ( innerDist ) {
+	    softfloat_shortShiftLeftM(
+		size_words - wordDist,
+		aPtr,
+		innerDist,
+		zPtr + indexMultiwordHiBut( size_words, wordDist )
+	    );
+	    if ( ! wordDist ) return;
+	} else {
+	    aPtr += indexWordHi( size_words - wordDist );
+	    destPtr = zPtr + indexWordHi( size_words );
+	    for ( i = size_words - wordDist; i; --i ) {
+		*destPtr = *aPtr;
+		aPtr -= wordIncr;
+		destPtr -= wordIncr;
+	    }
+	}
+	zPtr += indexMultiwordLo( size_words, wordDist );
+    } else {
+	wordDist = size_words;
+    }
+    do {
+	*zPtr++ = 0;
+	--wordDist;
+    } while ( wordDist );
+}
+
+/*----------------------------------------------------------------------------
+| This function or macro is the same as 'softfloat_shiftLeftM' with
+| 'size_words' = 4 (N = 128).
+*----------------------------------------------------------------------------*/
+#define softfloat_shiftLeft128M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 4, aPtr, dist, zPtr )
+
 bool
+softfloat_isNaNF128M( const uint32_t *aWPtr )
+{
+    uint32_t uiA96;
+
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    if ( (~uiA96 & 0x7FFF0000) != 0 ) return 0;
+    return
+	((uiA96 & 0x0000FFFF) != 0)
+	    || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+		     | aWPtr[indexWord( 4, 0 )])
+		    != 0);
+}
+
+static inline uint_fast8_t
+softfloat_countLeadingZeros32( uint32_t a ) {
+    return a ? __builtin_clz( a ) : 32;
+}
+
+static inline bool
 softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
     return (a64 == b64) && (a0 == b0);
 }
 
-bool
+static inline bool
 softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
     return (a64 < b64) || ((a64 == b64) && (a0 < b0));
 }
 
-uint_fast8_t
+void
+softfloat_propagateNaNF128M(
+     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
+{
+    bool isSigNaNA;
+    const uint32_t *ptr;
+    bool isSigNaNB;
+    uint32_t uiA96, uiB96, wordMagA, wordMagB;
+
+    isSigNaNA = f128M_isSignalingNaN( (const float128_t *) aWPtr );
+    ptr = aWPtr;
+    if ( ! bWPtr ) {
+	if ( isSigNaNA ) softfloat_raiseFlags( softfloat_flag_invalid );
+	goto copy;
+    }
+    isSigNaNB = f128M_isSignalingNaN( (const float128_t *) bWPtr );
+    if ( isSigNaNA | isSigNaNB ) {
+	softfloat_raiseFlags( softfloat_flag_invalid );
+	if ( isSigNaNA ) {
+	    if ( isSigNaNB ) goto returnLargerUIMag;
+	    if ( softfloat_isNaNF128M( bWPtr ) ) goto copyB;
+	    goto copy;
+	} else {
+	    if ( softfloat_isNaNF128M( aWPtr ) ) goto copy;
+	    goto copyB;
+	}
+    }
+ returnLargerUIMag:
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    wordMagA = uiA96 & 0x7FFFFFFF;
+    wordMagB = uiB96 & 0x7FFFFFFF;
+    if ( wordMagA < wordMagB ) goto copyB;
+    if ( wordMagB < wordMagA ) goto copy;
+    wordMagA = aWPtr[indexWord( 4, 2 )];
+    wordMagB = bWPtr[indexWord( 4, 2 )];
+    if ( wordMagA < wordMagB ) goto copyB;
+    if ( wordMagB < wordMagA ) goto copy;
+    wordMagA = aWPtr[indexWord( 4, 1 )];
+    wordMagB = bWPtr[indexWord( 4, 1 )];
+    if ( wordMagA < wordMagB ) goto copyB;
+    if ( wordMagB < wordMagA ) goto copy;
+    wordMagA = aWPtr[indexWord( 4, 0 )];
+    wordMagB = bWPtr[indexWord( 4, 0 )];
+    if ( wordMagA < wordMagB ) goto copyB;
+    if ( wordMagB < wordMagA ) goto copy;
+    if ( uiA96 < uiB96 ) goto copy;
+ copyB:
+    ptr = bWPtr;
+ copy:
+    zWPtr[indexWordHi( 4 )] = ptr[indexWordHi( 4 )] | 0x00008000;
+    zWPtr[indexWord( 4, 2 )] = ptr[indexWord( 4, 2 )];
+    zWPtr[indexWord( 4, 1 )] = ptr[indexWord( 4, 1 )];
+    zWPtr[indexWord( 4, 0 )] = ptr[indexWord( 4, 0 )];
+}
+
+static inline uint_fast8_t
 softfloat_countLeadingZeros64( uint64_t a )
 {
     uint_fast8_t count;
@@ -232,7 +504,7 @@
     return count;
 }
 
-struct exp16_sig64
+static inline struct exp16_sig64
 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
 {
     int_fast8_t shiftDist;
@@ -242,10 +514,9 @@
     z.exp = 1 - shiftDist;
     z.sig = sig<<shiftDist;
     return z;
-
 }
 
-struct uint128_extra
+static inline struct uint128_extra
 softfloat_shiftRightJam128Extra(
      uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist )
 {
@@ -277,7 +548,7 @@
     return z;
 }
 
-struct uint128_extra
+static inline struct uint128_extra
 softfloat_shortShiftRightJam128Extra(
      uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
 {
@@ -289,8 +560,8 @@
     return z;
 }
 
-struct uint128
- softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
+static inline struct uint128
+softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
 {
     uint_fast8_t u8NegDist;
     struct uint128 z;
@@ -313,7 +584,7 @@
     return z;
 }
 
-struct uint128
+static inline struct uint128
 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
 {
     struct uint128 z;
@@ -322,7 +593,209 @@
     return z;
 }
 
-struct uint128
+static int_fast8_t
+softfloat_compare128M( const uint32_t *aPtr, const uint32_t *bPtr )
+{
+    unsigned int index, lastIndex;
+    uint32_t wordA, wordB;
+
+    index = indexWordHi( 4 );
+    lastIndex = indexWordLo( 4 );
+    for (;;) {
+	wordA = aPtr[index];
+	wordB = bPtr[index];
+	if ( wordA != wordB ) return (wordA < wordB) ? -1 : 1;
+	if ( index == lastIndex ) break;
+	index -= wordIncr;
+    }
+    return 0;
+}
+
+static void
+softfloat_roundPackMToF128M(
+     bool sign, int32_t exp, uint32_t *extSigPtr, uint32_t *zWPtr )
+{
+    uint_fast8_t roundingMode;
+    bool roundNearEven;
+    uint32_t sigExtra;
+    bool doIncrement, isTiny;
+    static const uint32_t maxSig[4] =
+	INIT_UINTM4( 0x0001FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
+    uint32_t ui, uj;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundingMode = softfloat_roundingMode;
+    roundNearEven = (roundingMode == softfloat_round_near_even);
+    sigExtra = extSigPtr[indexWordLo( 5 )];
+    doIncrement = (0x80000000 <= sigExtra);
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+	doIncrement =
+	    (roundingMode
+		 == (sign ? softfloat_round_min : softfloat_round_max))
+		&& sigExtra;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x7FFD <= (uint32_t) exp ) {
+	if ( exp < 0 ) {
+	    /*----------------------------------------------------------------
+	    *----------------------------------------------------------------*/
+	    isTiny =
+		   (softfloat_detectTininess
+			== softfloat_tininess_beforeRounding)
+		|| (exp < -1)
+		|| ! doIncrement
+		|| (softfloat_compare128M(
+			extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
+			< 0);
+	    softfloat_shiftRightJam160M( extSigPtr, -exp, extSigPtr );
+	    exp = 0;
+	    sigExtra = extSigPtr[indexWordLo( 5 )];
+	    if ( isTiny && sigExtra ) {
+		softfloat_raiseFlags( softfloat_flag_underflow );
+	    }
+	    doIncrement = (0x80000000 <= sigExtra);
+	    if (
+		   ! roundNearEven
+		&& (roundingMode != softfloat_round_near_maxMag)
+	    ) {
+		doIncrement =
+		    (roundingMode
+			 == (sign ? softfloat_round_min : softfloat_round_max))
+			&& sigExtra;
+	    }
+	} else if (
+	       (0x7FFD < exp)
+	    || ((exp == 0x7FFD) && doIncrement
+		    && (softfloat_compare128M(
+			    extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
+			    == 0))
+	) {
+	    /*----------------------------------------------------------------
+	    *----------------------------------------------------------------*/
+	    softfloat_raiseFlags(
+		softfloat_flag_overflow | softfloat_flag_inexact );
+	    if (
+		   roundNearEven
+		|| (roundingMode == softfloat_round_near_maxMag)
+		|| (roundingMode
+			== (sign ? softfloat_round_min : softfloat_round_max))
+	    ) {
+		ui = packToF128UI96( sign, 0x7FFF, 0 );
+		uj = 0;
+	    } else {
+		ui = packToF128UI96( sign, 0x7FFE, 0x0000FFFF );
+		uj = 0xFFFFFFFF;
+	    }
+	    zWPtr[indexWordHi( 4 )] = ui;
+	    zWPtr[indexWord( 4, 2 )] = uj;
+	    zWPtr[indexWord( 4, 1 )] = uj;
+	    zWPtr[indexWord( 4, 0 )] = uj;
+	    return;
+	}
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uj = extSigPtr[indexWord( 5, 1 )];
+    if ( sigExtra ) {
+	softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+	if ( roundingMode == softfloat_round_odd ) {
+	    uj |= 1;
+	    goto noIncrementPackReturn;
+	}
+#endif
+    }
+    if ( doIncrement ) {
+	++uj;
+	if ( uj ) {
+	    if ( ! (sigExtra & 0x7FFFFFFF) && roundNearEven ) uj &= ~1;
+	    zWPtr[indexWord( 4, 2 )] = extSigPtr[indexWord( 5, 3 )];
+	    zWPtr[indexWord( 4, 1 )] = extSigPtr[indexWord( 5, 2 )];
+	    zWPtr[indexWord( 4, 0 )] = uj;
+	    ui = extSigPtr[indexWordHi( 5 )];
+	} else {
+	    zWPtr[indexWord( 4, 0 )] = uj;
+	    ui = extSigPtr[indexWord( 5, 2 )] + 1;
+	    zWPtr[indexWord( 4, 1 )] = ui;
+	    uj = extSigPtr[indexWord( 5, 3 )];
+	    if ( ui ) {
+		zWPtr[indexWord( 4, 2 )] = uj;
+		ui = extSigPtr[indexWordHi( 5 )];
+	    } else {
+		++uj;
+		zWPtr[indexWord( 4, 2 )] = uj;
+		ui = extSigPtr[indexWordHi( 5 )];
+		if ( ! uj ) ++ui;
+	    }
+	}
+    } else {
+ noIncrementPackReturn:
+	zWPtr[indexWord( 4, 0 )] = uj;
+	ui = extSigPtr[indexWord( 5, 2 )];
+	zWPtr[indexWord( 4, 1 )] = ui;
+	uj |= ui;
+	ui = extSigPtr[indexWord( 5, 3 )];
+	zWPtr[indexWord( 4, 2 )] = ui;
+	uj |= ui;
+	ui = extSigPtr[indexWordHi( 5 )];
+	uj |= ui;
+	if ( ! uj ) exp = 0;
+    }
+    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp, ui );
+}
+
+static int
+softfloat_shiftNormSigF128M(
+     const uint32_t *wPtr, uint_fast8_t shiftDist, uint32_t *sigPtr )
+{
+    uint32_t wordSig;
+    int32_t exp;
+    uint32_t leadingBit;
+
+    wordSig = wPtr[indexWordHi( 4 )];
+    exp = expF128UI96( wordSig );
+    if ( exp ) {
+	softfloat_shortShiftLeft128M( wPtr, shiftDist, sigPtr );
+	leadingBit = 0x00010000<<shiftDist;
+	sigPtr[indexWordHi( 4 )] =
+	    (sigPtr[indexWordHi( 4 )] & (leadingBit - 1)) | leadingBit;
+    } else {
+	exp = 16;
+	wordSig &= 0x7FFFFFFF;
+	if ( ! wordSig ) {
+	    exp = -16;
+	    wordSig = wPtr[indexWord( 4, 2 )];
+	    if ( ! wordSig ) {
+		exp = -48;
+		wordSig = wPtr[indexWord( 4, 1 )];
+		if ( ! wordSig ) {
+		    wordSig = wPtr[indexWord( 4, 0 )];
+		    if ( ! wordSig ) return -128;
+		    exp = -80;
+		}
+	    }
+	}
+	exp -= softfloat_countLeadingZeros32( wordSig );
+	softfloat_shiftLeft128M( wPtr, 1 - exp + shiftDist, sigPtr );
+    }
+    return exp;
+}
+
+bool
+softfloat_tryPropagateNaNF128M(
+     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
+{
+
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+	softfloat_propagateNaNF128M( aWPtr, bWPtr, zWPtr );
+	return 1;
+    }
+    return 0;
+}
+
+static inline struct uint128
 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
 {
     struct uint128 z;
@@ -331,8 +804,8 @@
     return z;
 }
 
-struct uint128
- softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+static inline struct uint128
+softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
 {
     struct uint128 z;
     z.v0 = a0 - b0;
@@ -341,6 +814,61 @@
     return z;
 }
 
+static void
+softfloat_mul128MTo256M(
+     const uint32_t *aPtr, const uint32_t *bPtr, uint32_t *zPtr )
+{
+    uint32_t *lastZPtr, wordB;
+    uint64_t dwordProd;
+    uint32_t wordZ;
+    uint_fast8_t carry;
+
+    bPtr += indexWordLo( 4 );
+    lastZPtr = zPtr + indexMultiwordHi( 8, 5 );
+    zPtr += indexMultiwordLo( 8, 5 );
+    wordB = *bPtr;
+    dwordProd = (uint64_t) aPtr[indexWord( 4, 0 )] * wordB;
+    zPtr[indexWord( 5, 0 )] = dwordProd;
+    dwordProd = (uint64_t) aPtr[indexWord( 4, 1 )] * wordB + (dwordProd>>32);
+    zPtr[indexWord( 5, 1 )] = dwordProd;
+    dwordProd = (uint64_t) aPtr[indexWord( 4, 2 )] * wordB + (dwordProd>>32);
+    zPtr[indexWord( 5, 2 )] = dwordProd;
+    dwordProd = (uint64_t) aPtr[indexWord( 4, 3 )] * wordB + (dwordProd>>32);
+    zPtr[indexWord( 5, 3 )] = dwordProd;
+    zPtr[indexWord( 5, 4 )] = dwordProd>>32;
+    do {
+	bPtr += wordIncr;
+	zPtr += wordIncr;
+	wordB = *bPtr;
+	dwordProd = (uint64_t) aPtr[indexWord( 4, 0 )] * wordB;
+	wordZ = zPtr[indexWord( 5, 0 )] + (uint32_t) dwordProd;
+	zPtr[indexWord( 5, 0 )] = wordZ;
+	carry = (wordZ < (uint32_t) dwordProd);
+	dwordProd =
+	    (uint64_t) aPtr[indexWord( 4, 1 )] * wordB + (dwordProd>>32);
+	wordZ = zPtr[indexWord( 5, 1 )] + (uint32_t) dwordProd + carry;
+	zPtr[indexWord( 5, 1 )] = wordZ;
+	if ( wordZ != (uint32_t) dwordProd ) {
+	    carry = (wordZ < (uint32_t) dwordProd);
+	}
+	dwordProd =
+	    (uint64_t) aPtr[indexWord( 4, 2 )] * wordB + (dwordProd>>32);
+	wordZ = zPtr[indexWord( 5, 2 )] + (uint32_t) dwordProd + carry;
+	zPtr[indexWord( 5, 2 )] = wordZ;
+	if ( wordZ != (uint32_t) dwordProd ) {
+	    carry = (wordZ < (uint32_t) dwordProd);
+	}
+	dwordProd =
+	    (uint64_t) aPtr[indexWord( 4, 3 )] * wordB + (dwordProd>>32);
+	wordZ = zPtr[indexWord( 5, 3 )] + (uint32_t) dwordProd + carry;
+	zPtr[indexWord( 5, 3 )] = wordZ;
+	if ( wordZ != (uint32_t) dwordProd ) {
+	    carry = (wordZ < (uint32_t) dwordProd);
+	}
+	zPtr[indexWord( 5, 4 )] = (dwordProd>>32) + carry;
+    } while ( zPtr != lastZPtr );
+}
+
 float128_t
 softfloat_roundPackToF128(
      bool sign,
@@ -880,6 +1408,106 @@
     }
 }
 
+void
+f128M_mul( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t *zWPtr;
+    uint32_t uiA96;
+    int32_t expA;
+    uint32_t uiB96;
+    int32_t expB;
+    bool signZ;
+    const uint32_t *ptr;
+    uint32_t uiZ96, sigA[4];
+    uint_fast8_t shiftDist;
+    uint32_t sigB[4];
+    int32_t expZ;
+    uint32_t sigProd[8], *extSigZPtr;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    expA = expF128UI96( uiA96 );
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    expB = expF128UI96( uiB96 );
+    signZ = signF128UI96( uiA96 ) ^ signF128UI96( uiB96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+	if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
+	ptr = aWPtr;
+	if ( ! expA ) goto possiblyInvalid;
+	if ( ! expB ) {
+	    ptr = bWPtr;
+     possiblyInvalid:
+	    if (
+		! fracF128UI96( ptr[indexWordHi( 4 )] )
+		    && ! (ptr[indexWord( 4, 2 )] | ptr[indexWord( 4, 1 )]
+			      | ptr[indexWord( 4, 0 )])
+	    ) {
+		softfloat_invalidF128M( zWPtr );
+		return;
+	    }
+	}
+	uiZ96 = packToF128UI96( signZ, 0x7FFF, 0 );
+	goto uiZ96;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA ) {
+	sigA[indexWordHi( 4 )] = fracF128UI96( uiA96 ) | 0x00010000;
+	sigA[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+	sigA[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+	sigA[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+    } else {
+	expA = softfloat_shiftNormSigF128M( aWPtr, 0, sigA );
+	if ( expA == -128 ) goto zero;
+    }
+    if ( expB ) {
+	sigB[indexWordHi( 4 )] = fracF128UI96( uiB96 ) | 0x00010000;
+	sigB[indexWord( 4, 2 )] = bWPtr[indexWord( 4, 2 )];
+	sigB[indexWord( 4, 1 )] = bWPtr[indexWord( 4, 1 )];
+	sigB[indexWord( 4, 0 )] = bWPtr[indexWord( 4, 0 )];
+    } else {
+	expB = softfloat_shiftNormSigF128M( bWPtr, 0, sigB );
+	if ( expB == -128 ) goto zero;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA + expB - 0x4000;
+    softfloat_mul128MTo256M( sigA, sigB, sigProd );
+    if (
+	sigProd[indexWord( 8, 2 )]
+	    || (sigProd[indexWord( 8, 1 )] | sigProd[indexWord( 8, 0 )])
+    ) {
+	sigProd[indexWord( 8, 3 )] |= 1;
+    }
+    extSigZPtr = &sigProd[indexMultiwordHi( 8, 5 )];
+    shiftDist = 16;
+    if ( extSigZPtr[indexWordHi( 5 )] & 2 ) {
+	++expZ;
+	shiftDist = 15;
+    }
+    softfloat_shortShiftLeft160M( extSigZPtr, shiftDist, extSigZPtr );
+    softfloat_roundPackMToF128M( signZ, expZ, extSigZPtr, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ96 = packToF128UI96( signZ, 0, 0 );
+ uiZ96:
+    zWPtr[indexWordHi( 4 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+}
+
 #endif // SUPPORT_QUADFLOAT
 
 %}