QuadFloat.st
author Claus Gittinger <cg@exept.de>
Sat, 08 Jun 2019 13:35:43 +0200
changeset 5011 889548e8808e
parent 5008 9eb7df4b6ce0
child 5012 bd4e0475f9ea
permissions -rw-r--r--
#BUGFIX by cg class: QuadFloat changed: #equalFromQuadFloat: #lessFromQuadFloat: class: QuadFloat class added: #fromShortFloat:

"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

LimitedPrecisionReal variableByteSubclass:#QuadFloat
	instanceVariableNames:''
	classVariableNames:'QuadFloatZero QuadFloatOne Pi E Epsilon NaN PositiveInfinity
		NegativeInfinity Halfpi HalfpiNegative'
	poolDictionaries:''
	category:'Magnitude-Numbers'
!

!QuadFloat primitiveDefinitions!
%{
#if __POINTER_SIZE__ == 8
# define SUPPORT_QUADFLOAT
#endif

#ifdef SUPPORT_QUADFLOAT
# include <stdint.h>

/*----------------------------------------------------------------------------
| Software floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
enum {
    softfloat_tininess_beforeRounding = 0,
    softfloat_tininess_afterRounding  = 1
};

/*----------------------------------------------------------------------------
| Software floating-point exception flags.
*----------------------------------------------------------------------------*/
enum {
    softfloat_flag_inexact   =  1,
    softfloat_flag_underflow =  2,
    softfloat_flag_overflow  =  4,
    softfloat_flag_infinite  =  8,
    softfloat_flag_invalid   = 16
};
/*----------------------------------------------------------------------------
| Software floating-point rounding mode.  (Mode "odd" is supported only if
| SoftFloat is compiled with macro 'SOFTFLOAT_ROUND_ODD' defined.)
*----------------------------------------------------------------------------*/
enum {
    softfloat_round_near_even   = 0,
    softfloat_round_minMag      = 1,
    softfloat_round_min         = 2,
    softfloat_round_max         = 3,
    softfloat_round_near_maxMag = 4,
    softfloat_round_odd         = 6
};
#define init_detectTininess softfloat_tininess_afterRounding

#if defined(LITTLEENDIAN) || defined(__LSBFIRST__)
struct uint128          { uint64_t v0, v64; };
struct uint64_extra     { uint64_t extra, v; };
struct uint128_extra    { uint64_t extra; struct uint128 v; };
#else
struct uint128          { uint64_t v64, v0; };
struct uint64_extra     { uint64_t v, extra; };
struct uint128_extra    { struct uint128 v; uint64_t extra; };
#endif

typedef unsigned char bool;
typedef double float64_t;
union ui64_f64   { uint64_t ui; float64_t f; };
union ui128_f128 { struct uint128 ui; float128_t f; };
struct exp32_sig128 { int_fast32_t exp; struct uint128 sig; };

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 128-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF128UI96 0xFFFF8000
#define defaultNaNF128UI64 0
#define defaultNaNF128UI32 0
#define defaultNaNF128UI0  0

struct commonNaN {
    bool sign;
#ifdef LITTLEENDIAN
    uint64_t v0, v64;
#else
    uint64_t v64, v0;
#endif
};
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };

#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat primitiveVariables!
%{
#ifdef SUPPORT_QUADFLOAT

uint_fast8_t softfloat_exceptionFlags;
uint_fast8_t softfloat_roundingMode = softfloat_round_near_even;
uint_fast8_t softfloat_detectTininess = init_detectTininess;
uint_fast8_t softfloat_exceptionFlags = 0;

const uint_least8_t softfloat_countLeadingZeros8[256] = {
    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat primitiveFunctions!
%{
#ifdef SUPPORT_QUADFLOAT

#if defined(LITTLEENDIAN) || defined(__LSBFIRST__)
#define wordIncr 1
#define indexWord( total, n ) (n)
#define indexWordHi( total ) ((total) - 1)
#define indexWordLo( total ) 0
#define indexMultiword( total, m, n ) (n)
#define indexMultiwordHi( total, n ) ((total) - (n))
#define indexMultiwordLo( total, n ) 0
#define indexMultiwordHiBut( total, n ) (n)
#define indexMultiwordLoBut( total, n ) 0
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
#else
#define wordIncr -1
#define indexWord( total, n ) ((total) - 1 - (n))
#define indexWordHi( total ) 0
#define indexWordLo( total ) ((total) - 1)
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
#define indexMultiwordHi( total, n ) 0
#define indexMultiwordLo( total, n ) ((total) - (n))
#define indexMultiwordHiBut( total, n ) 0
#define indexMultiwordLoBut( total, n ) (n)
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
#endif

#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63))
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )

#define signF128UI64( a64 ) ((bool) ((uint64_t) (a64)>>63))
#define expF128UI64( a64 ) ((int_fast32_t) ((a64)>>48) & 0x7FFF)
#define fracF128UI64( a64 ) ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))
#define packToF128UI64( sign, exp, sig64 ) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<48) + (sig64))

#define signF128UI96( a96 ) ((bool) ((uint32_t) (a96)>>31))
#define expF128UI96( a96 ) ((int32_t) ((a96)>>16) & 0x7FFF)
#define fracF128UI96( a96 ) ((a96) & 0x0000FFFF)
#define packToF128UI96( sign, exp, sig96 ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<16) + (sig96))

#define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
#define softfloat_approxRecip32_1( a ) ((uint32_t) (UINT64_C( 0x7FFFFFFFFFFFFFFF ) / (uint32_t) (a)))

/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft160M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, dist, zPtr )
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftRightJamM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shiftRightJam160M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 5, aPtr, dist, zPtr )
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft128M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, dist, zPtr )
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft96M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 3, aPtr, dist, zPtr )

static inline void
softfloat_raiseFlags( uint_fast8_t flags ) {
    softfloat_exceptionFlags |= flags;
}

#if 1
static inline void
softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
{
    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
}
#else
# define softfloat_commonNaNToF128M( aPtr, zWPtr ) \
{ \
    (zWPtr)[indexWord( 4, 3 )] = defaultNaNF128UI96; \
    (zWPtr)[indexWord( 4, 2 )] = defaultNaNF128UI64; \
    (zWPtr)[indexWord( 4, 1 )] = defaultNaNF128UI32; \
    (zWPtr)[indexWord( 4, 0 )] = defaultNaNF128UI0; \
}
#endif

static inline void
softfloat_invalidF128M( uint32_t *zWPtr ) {
    softfloat_raiseFlags( softfloat_flag_invalid );
    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
}

static bool
f128M_isSignalingNaN( const float128_t *aPtr )
{
    const uint32_t *aWPtr;
    uint32_t uiA96;

    aWPtr = (const uint32_t *) aPtr;
    uiA96 = aWPtr[indexWordHi( 4 )];
    if ( (uiA96 & 0x7FFF8000) != 0x7FFF0000 ) return 0;
    return
	((uiA96 & 0x00007FFF) != 0)
	    || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
		     | aWPtr[indexWord( 4, 0 )])
		    != 0);
}

static void
softfloat_shortShiftRightJamM(
      uint_fast8_t size_words,
      const uint64_t *aPtr,
      uint_fast8_t dist,
      uint64_t *zPtr
  )
{
    uint_fast8_t uNegDist;
    unsigned int index, lastIndex;
    uint64_t partWordZ, wordA;

    uNegDist = -dist;
    index = indexWordLo( size_words );
    lastIndex = indexWordHi( size_words );
    wordA = aPtr[index];
    partWordZ = wordA>>dist;
    if ( partWordZ<<dist != wordA ) partWordZ |= 1;
    while ( index != lastIndex ) {
	wordA = aPtr[index + wordIncr];
	zPtr[index] = wordA<<(uNegDist & 63) | partWordZ;
	index += wordIncr;
	partWordZ = wordA>>dist;
    }
    zPtr[index] = partWordZ;
}

void
softfloat_shiftRightJamM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 )
{
    uint32_t wordJam, wordDist, *ptr;
    uint_fast8_t i, innerDist;

    wordJam = 0;
    wordDist = dist>>5;
    if ( wordDist ) {
	if ( size_words < wordDist ) wordDist = size_words;
	ptr = (uint32_t *) (aPtr + indexMultiwordLo( size_words, wordDist ));
	i = wordDist;
	do {
	    wordJam = *ptr++;
	    if ( wordJam ) break;
	    --i;
	} while ( i );
	ptr = zPtr;
    }
    if ( wordDist < size_words ) {
	aPtr += indexMultiwordHiBut( size_words, wordDist );
	innerDist = dist & 31;
	if ( innerDist ) {
	    softfloat_shortShiftRightJamM(
		size_words - wordDist,
		aPtr,
		innerDist,
		zPtr + indexMultiwordLoBut( size_words, wordDist )
	    );
	    if ( ! wordDist ) goto wordJam;
	} else {
	    aPtr += indexWordLo( size_words - wordDist );
	    ptr = zPtr + indexWordLo( size_words );
	    for ( i = size_words - wordDist; i; --i ) {
		*ptr = *aPtr;
		aPtr += wordIncr;
		ptr += wordIncr;
	    }
	}
	ptr = zPtr + indexMultiwordHi( size_words, wordDist );
    }
    do {
	*ptr++ = 0;
	--wordDist;
    } while ( wordDist );
 wordJam:
    if ( wordJam ) zPtr[indexWordLo( size_words )] |= 1;

}

void
 softfloat_shortShiftLeftM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint_fast8_t dist,
     uint32_t *zPtr
 )
{
    uint_fast8_t uNegDist;
    unsigned int index, lastIndex;
    uint32_t partWordZ, wordA;

    uNegDist = -dist;
    index = indexWordHi( size_words );
    lastIndex = indexWordLo( size_words );
    partWordZ = aPtr[index]<<dist;
    while ( index != lastIndex ) {
	wordA = aPtr[index - wordIncr];
	zPtr[index] = partWordZ | wordA>>(uNegDist & 31);
	index -= wordIncr;
	partWordZ = wordA<<dist;
    }
    zPtr[index] = partWordZ;
}

static void
softfloat_shiftLeftM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 )
{
    uint32_t wordDist;
    uint_fast8_t innerDist;
    uint32_t *destPtr;
    uint_fast8_t i;

    wordDist = dist>>5;
    if ( wordDist < size_words ) {
	aPtr += indexMultiwordLoBut( size_words, wordDist );
	innerDist = dist & 31;
	if ( innerDist ) {
	    softfloat_shortShiftLeftM(
		size_words - wordDist,
		aPtr,
		innerDist,
		zPtr + indexMultiwordHiBut( size_words, wordDist )
	    );
	    if ( ! wordDist ) return;
	} else {
	    aPtr += indexWordHi( size_words - wordDist );
	    destPtr = zPtr + indexWordHi( size_words );
	    for ( i = size_words - wordDist; i; --i ) {
		*destPtr = *aPtr;
		aPtr -= wordIncr;
		destPtr -= wordIncr;
	    }
	}
	zPtr += indexMultiwordLo( size_words, wordDist );
    } else {
	wordDist = size_words;
    }
    do {
	*zPtr++ = 0;
	--wordDist;
    } while ( wordDist );
}

/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftLeftM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shiftLeft128M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 4, aPtr, dist, zPtr )

bool
softfloat_isNaNF128M( const uint32_t *aWPtr )
{
    uint32_t uiA96;

    uiA96 = aWPtr[indexWordHi( 4 )];
    if ( (~uiA96 & 0x7FFF0000) != 0 ) return 0;
    return
	((uiA96 & 0x0000FFFF) != 0)
	    || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
		     | aWPtr[indexWord( 4, 0 )])
		    != 0);
}

static inline uint_fast8_t
softfloat_countLeadingZeros32( uint32_t a ) {
    return a ? __builtin_clz( a ) : 32;
}

static inline bool
softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 == b64) && (a0 == b0);
}

static inline bool
softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 < b64) || ((a64 == b64) && (a0 < b0));
}

static inline bool
softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 < b64) || ((a64 == b64) && (a0 <= b0));
}

void
softfloat_propagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
{
    bool isSigNaNA;
    const uint32_t *ptr;
    bool isSigNaNB;
    uint32_t uiA96, uiB96, wordMagA, wordMagB;

    isSigNaNA = f128M_isSignalingNaN( (const float128_t *) aWPtr );
    ptr = aWPtr;
    if ( ! bWPtr ) {
	if ( isSigNaNA ) softfloat_raiseFlags( softfloat_flag_invalid );
	goto copy;
    }
    isSigNaNB = f128M_isSignalingNaN( (const float128_t *) bWPtr );
    if ( isSigNaNA | isSigNaNB ) {
	softfloat_raiseFlags( softfloat_flag_invalid );
	if ( isSigNaNA ) {
	    if ( isSigNaNB ) goto returnLargerUIMag;
	    if ( softfloat_isNaNF128M( bWPtr ) ) goto copyB;
	    goto copy;
	} else {
	    if ( softfloat_isNaNF128M( aWPtr ) ) goto copy;
	    goto copyB;
	}
    }
 returnLargerUIMag:
    uiA96 = aWPtr[indexWordHi( 4 )];
    uiB96 = bWPtr[indexWordHi( 4 )];
    wordMagA = uiA96 & 0x7FFFFFFF;
    wordMagB = uiB96 & 0x7FFFFFFF;
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    wordMagA = aWPtr[indexWord( 4, 2 )];
    wordMagB = bWPtr[indexWord( 4, 2 )];
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    wordMagA = aWPtr[indexWord( 4, 1 )];
    wordMagB = bWPtr[indexWord( 4, 1 )];
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    wordMagA = aWPtr[indexWord( 4, 0 )];
    wordMagB = bWPtr[indexWord( 4, 0 )];
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    if ( uiA96 < uiB96 ) goto copy;
 copyB:
    ptr = bWPtr;
 copy:
    zWPtr[indexWordHi( 4 )] = ptr[indexWordHi( 4 )] | 0x00008000;
    zWPtr[indexWord( 4, 2 )] = ptr[indexWord( 4, 2 )];
    zWPtr[indexWord( 4, 1 )] = ptr[indexWord( 4, 1 )];
    zWPtr[indexWord( 4, 0 )] = ptr[indexWord( 4, 0 )];
}

static inline uint_fast8_t
softfloat_countLeadingZeros64( uint64_t a )
{
    uint_fast8_t count;
    uint32_t a32;

    count = 0;
    a32 = a>>32;
    if ( ! a32 ) {
	count = 32;
	a32 = a;
    }
    /*------------------------------------------------------------------------
    | From here, result is current count + count leading zeros of `a32'.
    *------------------------------------------------------------------------*/
    if ( a32 < 0x10000 ) {
	count += 16;
	a32 <<= 16;
    }
    if ( a32 < 0x1000000 ) {
	count += 8;
	a32 <<= 8;
    }
    count += softfloat_countLeadingZeros8[a32>>24];
    return count;
}

static inline struct exp16_sig64
softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
    int_fast8_t shiftDist;
    struct exp16_sig64 z;

    shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
    z.exp = 1 - shiftDist;
    z.sig = sig<<shiftDist;
    return z;
}

static inline struct uint128_extra
softfloat_shiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128_extra z;

    u8NegDist = -dist;
    if ( dist < 64 ) {
	z.v.v64 = a64>>dist;
	z.v.v0 = a64<<(u8NegDist & 63) | a0>>dist;
	z.extra = a0<<(u8NegDist & 63);
    } else {
	z.v.v64 = 0;
	if ( dist == 64 ) {
	    z.v.v0 = a64;
	    z.extra = a0;
	} else {
	    extra |= a0;
	    if ( dist < 128 ) {
		z.v.v0 = a64>>(dist & 63);
		z.extra = a64<<(u8NegDist & 63);
	    } else {
		z.v.v0 = 0;
		z.extra = (dist == 128) ? a64 : (a64 != 0);
	    }
	}
    }
    z.extra |= (extra != 0);
    return z;
}

static inline struct uint128_extra
softfloat_shortShiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
{
    uint_fast8_t negDist = -dist;
    struct uint128_extra z;
    z.v.v64 = a64>>dist;
    z.v.v0 = a64<<(negDist & 63) | a0>>dist;
    z.extra = a0<<(negDist & 63) | (extra != 0);
    return z;
}

static inline struct uint128
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128 z;

    if ( dist < 64 ) {
	u8NegDist = -dist;
	z.v64 = a64>>dist;
	z.v0 =
	    a64<<(u8NegDist & 63) | a0>>dist
		| ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
    } else {
	z.v64 = 0;
	z.v0 =
	    (dist < 127)
		? a64>>(dist & 63)
		      | (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
			     != 0)
		: ((a64 | a0) != 0);
    }
    return z;
}

static inline struct uint128
softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;
    z.v64 = a64<<dist | a0>>(-dist & 63);
    z.v0 = a0<<dist;
    return z;
}

static int_fast8_t
softfloat_compare128M( const uint32_t *aPtr, const uint32_t *bPtr )
{
    unsigned int index, lastIndex;
    uint32_t wordA, wordB;

    index = indexWordHi( 4 );
    lastIndex = indexWordLo( 4 );
    for (;;) {
	wordA = aPtr[index];
	wordB = bPtr[index];
	if ( wordA != wordB ) return (wordA < wordB) ? -1 : 1;
	if ( index == lastIndex ) break;
	index -= wordIncr;
    }
    return 0;
}

static void
softfloat_roundPackMToF128M(
     bool sign, int32_t exp, uint32_t *extSigPtr, uint32_t *zWPtr )
{
    uint_fast8_t roundingMode;
    bool roundNearEven;
    uint32_t sigExtra;
    bool doIncrement, isTiny;
    static const uint32_t maxSig[4] =
	INIT_UINTM4( 0x0001FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
    uint32_t ui, uj;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    sigExtra = extSigPtr[indexWordLo( 5 )];
    doIncrement = (0x80000000 <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
	doIncrement =
	    (roundingMode
		 == (sign ? softfloat_round_min : softfloat_round_max))
		&& sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) exp ) {
	if ( exp < 0 ) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    isTiny =
		   (softfloat_detectTininess
			== softfloat_tininess_beforeRounding)
		|| (exp < -1)
		|| ! doIncrement
		|| (softfloat_compare128M(
			extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
			< 0);
	    softfloat_shiftRightJam160M( extSigPtr, -exp, extSigPtr );
	    exp = 0;
	    sigExtra = extSigPtr[indexWordLo( 5 )];
	    if ( isTiny && sigExtra ) {
		softfloat_raiseFlags( softfloat_flag_underflow );
	    }
	    doIncrement = (0x80000000 <= sigExtra);
	    if (
		   ! roundNearEven
		&& (roundingMode != softfloat_round_near_maxMag)
	    ) {
		doIncrement =
		    (roundingMode
			 == (sign ? softfloat_round_min : softfloat_round_max))
			&& sigExtra;
	    }
	} else if (
	       (0x7FFD < exp)
	    || ((exp == 0x7FFD) && doIncrement
		    && (softfloat_compare128M(
			    extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
			    == 0))
	) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    softfloat_raiseFlags(
		softfloat_flag_overflow | softfloat_flag_inexact );
	    if (
		   roundNearEven
		|| (roundingMode == softfloat_round_near_maxMag)
		|| (roundingMode
			== (sign ? softfloat_round_min : softfloat_round_max))
	    ) {
		ui = packToF128UI96( sign, 0x7FFF, 0 );
		uj = 0;
	    } else {
		ui = packToF128UI96( sign, 0x7FFE, 0x0000FFFF );
		uj = 0xFFFFFFFF;
	    }
	    zWPtr[indexWordHi( 4 )] = ui;
	    zWPtr[indexWord( 4, 2 )] = uj;
	    zWPtr[indexWord( 4, 1 )] = uj;
	    zWPtr[indexWord( 4, 0 )] = uj;
	    return;
	}
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uj = extSigPtr[indexWord( 5, 1 )];
    if ( sigExtra ) {
	softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
	if ( roundingMode == softfloat_round_odd ) {
	    uj |= 1;
	    goto noIncrementPackReturn;
	}
#endif
    }
    if ( doIncrement ) {
	++uj;
	if ( uj ) {
	    if ( ! (sigExtra & 0x7FFFFFFF) && roundNearEven ) uj &= ~1;
	    zWPtr[indexWord( 4, 2 )] = extSigPtr[indexWord( 5, 3 )];
	    zWPtr[indexWord( 4, 1 )] = extSigPtr[indexWord( 5, 2 )];
	    zWPtr[indexWord( 4, 0 )] = uj;
	    ui = extSigPtr[indexWordHi( 5 )];
	} else {
	    zWPtr[indexWord( 4, 0 )] = uj;
	    ui = extSigPtr[indexWord( 5, 2 )] + 1;
	    zWPtr[indexWord( 4, 1 )] = ui;
	    uj = extSigPtr[indexWord( 5, 3 )];
	    if ( ui ) {
		zWPtr[indexWord( 4, 2 )] = uj;
		ui = extSigPtr[indexWordHi( 5 )];
	    } else {
		++uj;
		zWPtr[indexWord( 4, 2 )] = uj;
		ui = extSigPtr[indexWordHi( 5 )];
		if ( ! uj ) ++ui;
	    }
	}
    } else {
 noIncrementPackReturn:
	zWPtr[indexWord( 4, 0 )] = uj;
	ui = extSigPtr[indexWord( 5, 2 )];
	zWPtr[indexWord( 4, 1 )] = ui;
	uj |= ui;
	ui = extSigPtr[indexWord( 5, 3 )];
	zWPtr[indexWord( 4, 2 )] = ui;
	uj |= ui;
	ui = extSigPtr[indexWordHi( 5 )];
	uj |= ui;
	if ( ! uj ) exp = 0;
    }
    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp, ui );
}

static int
softfloat_shiftNormSigF128M(
     const uint32_t *wPtr, uint_fast8_t shiftDist, uint32_t *sigPtr )
{
    uint32_t wordSig;
    int32_t exp;
    uint32_t leadingBit;

    wordSig = wPtr[indexWordHi( 4 )];
    exp = expF128UI96( wordSig );
    if ( exp ) {
	softfloat_shortShiftLeft128M( wPtr, shiftDist, sigPtr );
	leadingBit = 0x00010000<<shiftDist;
	sigPtr[indexWordHi( 4 )] =
	    (sigPtr[indexWordHi( 4 )] & (leadingBit - 1)) | leadingBit;
    } else {
	exp = 16;
	wordSig &= 0x7FFFFFFF;
	if ( ! wordSig ) {
	    exp = -16;
	    wordSig = wPtr[indexWord( 4, 2 )];
	    if ( ! wordSig ) {
		exp = -48;
		wordSig = wPtr[indexWord( 4, 1 )];
		if ( ! wordSig ) {
		    wordSig = wPtr[indexWord( 4, 0 )];
		    if ( ! wordSig ) return -128;
		    exp = -80;
		}
	    }
	}
	exp -= softfloat_countLeadingZeros32( wordSig );
	softfloat_shiftLeft128M( wPtr, 1 - exp + shiftDist, sigPtr );
    }
    return exp;
}

bool
softfloat_tryPropagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
{

    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
	softfloat_propagateNaNF128M( aWPtr, bWPtr, zWPtr );
	return 1;
    }
    return 0;
}

static inline struct uint128
softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 + b0;
    z.v64 = a64 + b64 + (z.v0 < a0);
    return z;
}

static inline struct uint128
softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 - b0;
    z.v64 = a64 - b64;
    z.v64 -= (a0 < b0);
    return z;
}

static inline struct uint128
softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b )
{
    union { unsigned __int128 ui; struct uint128 s; } uZ;
    uZ.ui = ((unsigned __int128) a64<<64 | a0) * b;
    return uZ.s;
}

static void
softfloat_mul128MTo256M(
     const uint32_t *aPtr, const uint32_t *bPtr, uint32_t *zPtr )
{
    uint32_t *lastZPtr, wordB;
    uint64_t dwordProd;
    uint32_t wordZ;
    uint_fast8_t carry;

    bPtr += indexWordLo( 4 );
    lastZPtr = zPtr + indexMultiwordHi( 8, 5 );
    zPtr += indexMultiwordLo( 8, 5 );
    wordB = *bPtr;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 0 )] * wordB;
    zPtr[indexWord( 5, 0 )] = dwordProd;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 1 )] * wordB + (dwordProd>>32);
    zPtr[indexWord( 5, 1 )] = dwordProd;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 2 )] * wordB + (dwordProd>>32);
    zPtr[indexWord( 5, 2 )] = dwordProd;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 3 )] * wordB + (dwordProd>>32);
    zPtr[indexWord( 5, 3 )] = dwordProd;
    zPtr[indexWord( 5, 4 )] = dwordProd>>32;
    do {
	bPtr += wordIncr;
	zPtr += wordIncr;
	wordB = *bPtr;
	dwordProd = (uint64_t) aPtr[indexWord( 4, 0 )] * wordB;
	wordZ = zPtr[indexWord( 5, 0 )] + (uint32_t) dwordProd;
	zPtr[indexWord( 5, 0 )] = wordZ;
	carry = (wordZ < (uint32_t) dwordProd);
	dwordProd =
	    (uint64_t) aPtr[indexWord( 4, 1 )] * wordB + (dwordProd>>32);
	wordZ = zPtr[indexWord( 5, 1 )] + (uint32_t) dwordProd + carry;
	zPtr[indexWord( 5, 1 )] = wordZ;
	if ( wordZ != (uint32_t) dwordProd ) {
	    carry = (wordZ < (uint32_t) dwordProd);
	}
	dwordProd =
	    (uint64_t) aPtr[indexWord( 4, 2 )] * wordB + (dwordProd>>32);
	wordZ = zPtr[indexWord( 5, 2 )] + (uint32_t) dwordProd + carry;
	zPtr[indexWord( 5, 2 )] = wordZ;
	if ( wordZ != (uint32_t) dwordProd ) {
	    carry = (wordZ < (uint32_t) dwordProd);
	}
	dwordProd =
	    (uint64_t) aPtr[indexWord( 4, 3 )] * wordB + (dwordProd>>32);
	wordZ = zPtr[indexWord( 5, 3 )] + (uint32_t) dwordProd + carry;
	zPtr[indexWord( 5, 3 )] = wordZ;
	if ( wordZ != (uint32_t) dwordProd ) {
	    carry = (wordZ < (uint32_t) dwordProd);
	}
	zPtr[indexWord( 5, 4 )] = (dwordProd>>32) + carry;
    } while ( zPtr != lastZPtr );
}

float128_t
softfloat_roundPackToF128(
     bool sign,
     int_fast32_t exp,
     uint_fast64_t sig64,
     uint_fast64_t sig0,
     uint_fast64_t sigExtra
 )
{
    uint_fast8_t roundingMode;
    bool roundNearEven, doIncrement, isTiny;
    struct uint128_extra sig128Extra;
    uint_fast64_t uiZ64, uiZ0;
    struct uint128 sig128;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
	doIncrement =
	    (roundingMode
		 == (sign ? softfloat_round_min : softfloat_round_max))
		&& sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) exp ) {
	if ( exp < 0 ) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    isTiny =
		   (softfloat_detectTininess
			== softfloat_tininess_beforeRounding)
		|| (exp < -1)
		|| ! doIncrement
		|| softfloat_lt128(
		       sig64,
		       sig0,
		       UINT64_C( 0x0001FFFFFFFFFFFF ),
		       UINT64_C( 0xFFFFFFFFFFFFFFFF )
		   );
	    sig128Extra =
		softfloat_shiftRightJam128Extra( sig64, sig0, sigExtra, -exp );
	    sig64 = sig128Extra.v.v64;
	    sig0  = sig128Extra.v.v0;
	    sigExtra = sig128Extra.extra;
	    exp = 0;
	    if ( isTiny && sigExtra ) {
		softfloat_raiseFlags( softfloat_flag_underflow );
	    }
	    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
	    if (
		   ! roundNearEven
		&& (roundingMode != softfloat_round_near_maxMag)
	    ) {
		doIncrement =
		    (roundingMode
			 == (sign ? softfloat_round_min : softfloat_round_max))
			&& sigExtra;
	    }
	} else if (
	       (0x7FFD < exp)
	    || ((exp == 0x7FFD)
		    && softfloat_eq128(
			   sig64,
			   sig0,
			   UINT64_C( 0x0001FFFFFFFFFFFF ),
			   UINT64_C( 0xFFFFFFFFFFFFFFFF )
		       )
		    && doIncrement)
	) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    softfloat_raiseFlags(
		softfloat_flag_overflow | softfloat_flag_inexact );
	    if (
		   roundNearEven
		|| (roundingMode == softfloat_round_near_maxMag)
		|| (roundingMode
			== (sign ? softfloat_round_min : softfloat_round_max))
	    ) {
		uiZ64 = packToF128UI64( sign, 0x7FFF, 0 );
		uiZ0  = 0;
	    } else {
		uiZ64 = packToF128UI64( sign, 0x7FFE, UINT64_C( 0x0000FFFFFFFFFFFF ) );
		uiZ0 = UINT64_C( 0xFFFFFFFFFFFFFFFF );
	    }
	    goto uiZ;
	}
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( sigExtra ) {
	softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
	if ( roundingMode == softfloat_round_odd ) {
	    sig0 |= 1;
	    goto packReturn;
	}
#endif
    }
    if ( doIncrement ) {
	sig128 = softfloat_add128( sig64, sig0, 0, 1 );
	sig64 = sig128.v64;
	sig0 =
	    sig128.v0
		& ~(uint64_t)
		       (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
			    & roundNearEven);
    } else {
	if ( ! (sig64 | sig0) ) exp = 0;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 packReturn:
    uiZ64 = packToF128UI64( sign, exp, sig64 );
    uiZ0  = sig0;
 uiZ:
    uZ.ui.v64 = uiZ64;
    uZ.ui.v0  = uiZ0;
    return uZ.f;
}

float128_t
softfloat_normRoundPackToF128(
     bool sign, int_fast32_t exp, uint_fast64_t sig64, uint_fast64_t sig0 )
{
    int_fast8_t shiftDist;
    struct uint128 sig128;
    union ui128_f128 uZ;
    uint_fast64_t sigExtra;
    struct uint128_extra sig128Extra;

    if ( ! sig64 ) {
	exp -= 64;
	sig64 = sig0;
	sig0 = 0;
    }
    shiftDist = softfloat_countLeadingZeros64( sig64 ) - 15;
    exp -= shiftDist;
    if ( 0 <= shiftDist ) {
	if ( shiftDist ) {
	    sig128 = softfloat_shortShiftLeft128( sig64, sig0, shiftDist );
	    sig64 = sig128.v64;
	    sig0  = sig128.v0;
	}
	if ( (uint32_t) exp < 0x7FFD ) {
	    uZ.ui.v64 = packToF128UI64( sign, sig64 | sig0 ? exp : 0, sig64 );
	    uZ.ui.v0  = sig0;
	    return uZ.f;
	}
	sigExtra = 0;
    } else {
	sig128Extra =
	    softfloat_shortShiftRightJam128Extra( sig64, sig0, 0, -shiftDist );
	sig64 = sig128Extra.v.v64;
	sig0  = sig128Extra.v.v0;
	sigExtra = sig128Extra.extra;
    }
    return softfloat_roundPackToF128( sign, exp, sig64, sig0, sigExtra );
}

struct exp32_sig128
softfloat_normSubnormalF128Sig( uint_fast64_t sig64, uint_fast64_t sig0 )
{
    int_fast8_t shiftDist;
    struct exp32_sig128 z;

    if ( ! sig64 ) {
	shiftDist = softfloat_countLeadingZeros64( sig0 ) - 15;
	z.exp = -63 - shiftDist;
	if ( shiftDist < 0 ) {
	    z.sig.v64 = sig0>>-shiftDist;
	    z.sig.v0  = sig0<<(shiftDist & 63);
	} else {
	    z.sig.v64 = sig0<<shiftDist;
	    z.sig.v0  = 0;
	}
    } else {
	shiftDist = softfloat_countLeadingZeros64( sig64 ) - 15;
	z.exp = 1 - shiftDist;
	z.sig = softfloat_shortShiftLeft128( sig64, sig0, shiftDist );
    }
    return z;
}

struct uint128
softfloat_propagateNaNF128UI(
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0
 )
{
    bool isSigNaNA, isSigNaNB;
    uint_fast64_t uiNonsigA64, uiNonsigB64, uiMagA64, uiMagB64;
    struct uint128 uiZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    isSigNaNA = softfloat_isSigNaNF128UI( uiA64, uiA0 );
    isSigNaNB = softfloat_isSigNaNF128UI( uiB64, uiB0 );
    /*------------------------------------------------------------------------
    | Make NaNs non-signaling.
    *------------------------------------------------------------------------*/
    uiNonsigA64 = uiA64 | UINT64_C( 0x0000800000000000 );
    uiNonsigB64 = uiB64 | UINT64_C( 0x0000800000000000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( isSigNaNA | isSigNaNB ) {
	softfloat_raiseFlags( softfloat_flag_invalid );
	if ( isSigNaNA ) {
	    if ( isSigNaNB ) goto returnLargerMag;
	    if ( isNaNF128UI( uiB64, uiB0 ) ) goto returnB;
	    goto returnA;
	} else {
	    if ( isNaNF128UI( uiA64, uiA0 ) ) goto returnA;
	    goto returnB;
	}
    }
 returnLargerMag:
    uiMagA64 = uiA64 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    uiMagB64 = uiB64 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    if ( uiMagA64 < uiMagB64 ) goto returnB;
    if ( uiMagB64 < uiMagA64 ) goto returnA;
    if ( uiA0 < uiB0 ) goto returnB;
    if ( uiB0 < uiA0 ) goto returnA;
    if ( uiNonsigA64 < uiNonsigB64 ) goto returnA;
 returnB:
    uiZ.v64 = uiNonsigB64;
    uiZ.v0  = uiB0;
    return uiZ;
 returnA:
    uiZ.v64 = uiNonsigA64;
    uiZ.v0  = uiA0;
    return uiZ;

}

void
ui64_to_f128M( uint64_t a, float128_t *zPtr )
{
    uint32_t *zWPtr, uiZ96, uiZ64;
    uint_fast8_t shiftDist;
    uint32_t *ptr;

    zWPtr = (uint32_t *) zPtr;
    uiZ96 = 0;
    uiZ64 = 0;
    zWPtr[indexWord( 4, 1 )] = 0;
    zWPtr[indexWord( 4, 0 )] = 0;
    if ( a ) {
	shiftDist = softfloat_countLeadingZeros64( a ) + 17;
	if ( shiftDist < 32 ) {
	    ptr = zWPtr + indexMultiwordHi( 4, 3 );
	    ptr[indexWord( 3, 2 )] = 0;
	    ptr[indexWord( 3, 1 )] = a>>32;
	    ptr[indexWord( 3, 0 )] = a;
	    softfloat_shortShiftLeft96M( ptr, shiftDist, ptr );
	    ptr[indexWordHi( 3 )] =
		packToF128UI96( 0, 0x404E - shiftDist, ptr[indexWordHi( 3 )] );
	    return;
	}
	a <<= shiftDist - 32;
	uiZ96 = packToF128UI96( 0, 0x404E - shiftDist, a>>32 );
	uiZ64 = a;
    }
    zWPtr[indexWord( 4, 3 )] = uiZ96;
    zWPtr[indexWord( 4, 2 )] = uiZ64;
}

void
f64_to_f128M( float64_t a, float128_t *zPtr )
{
    uint32_t *zWPtr;
    union ui64_f64 uA;
    uint64_t uiA;
    bool sign;
    int_fast16_t exp;
    uint64_t frac;
    struct commonNaN commonNaN;
    uint32_t uiZ96;
    struct exp16_sig64 normExpSig;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr = (uint32_t *) zPtr;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF64UI( uiA );
    exp  = expF64UI( uiA );
    frac = fracF64UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr[indexWord( 4, 0 )] = 0;
    if ( exp == 0x7FF ) {
	if ( frac ) {
	    softfloat_f64UIToCommonNaN( uiA, &commonNaN );
	    softfloat_commonNaNToF128M( &commonNaN, zWPtr );
	    return;
	}
	uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
	goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
	if ( ! frac ) {
	    uiZ96 = packToF128UI96( sign, 0, 0 );
	    goto uiZ;
	}
	normExpSig = softfloat_normSubnormalF64Sig( frac );
	exp = normExpSig.exp - 1;
	frac = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr[indexWord( 4, 1 )] = (uint32_t) frac<<28;
    frac >>= 4;
    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp + 0x3C00, frac>>32 );
    zWPtr[indexWord( 4, 2 )] = frac;
    return;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    zWPtr[indexWord( 4, 3 )] = uiZ96;
    zWPtr[indexWord( 4, 2 )] = 0;
    zWPtr[indexWord( 4, 1 )] = 0;

}

static float128_t
softfloat_addMagsF128(
     uint64_t uiA64,
     uint64_t uiA0,
     uint64_t uiB64,
     uint64_t uiB0,
     bool signZ
 )
{
    int32_t expA;
    struct uint128 sigA;
    int32_t expB;
    struct uint128 sigB;
    int32_t expDiff;
    struct uint128 uiZ, sigZ;
    int32_t expZ;
    uint64_t sigZExtra;
    struct uint128_extra sig128Extra;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    expDiff = expA - expB;
    if ( ! expDiff ) {
	if ( expA == 0x7FFF ) {
	    if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
	    uiZ.v64 = uiA64;
	    uiZ.v0  = uiA0;
	    goto uiZ;
	}
	sigZ = softfloat_add128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
	if ( ! expA ) {
	    uiZ.v64 = packToF128UI64( signZ, 0, sigZ.v64 );
	    uiZ.v0  = sigZ.v0;
	    goto uiZ;
	}
	expZ = expA;
	sigZ.v64 |= UINT64_C( 0x0002000000000000 );
	sigZExtra = 0;
	goto shiftRight1;
    }
    if ( expDiff < 0 ) {
	if ( expB == 0x7FFF ) {
	    if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	    uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
	    uiZ.v0  = 0;
	    goto uiZ;
	}
	expZ = expB;
	if ( expA ) {
	    sigA.v64 |= UINT64_C( 0x0001000000000000 );
	} else {
	    ++expDiff;
	    sigZExtra = 0;
	    if ( ! expDiff ) goto newlyAligned;
	}
	sig128Extra =
	    softfloat_shiftRightJam128Extra( sigA.v64, sigA.v0, 0, -expDiff );
	sigA = sig128Extra.v;
	sigZExtra = sig128Extra.extra;
    } else {
	if ( expA == 0x7FFF ) {
	    if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	    uiZ.v64 = uiA64;
	    uiZ.v0  = uiA0;
	    goto uiZ;
	}
	expZ = expA;
	if ( expB ) {
	    sigB.v64 |= UINT64_C( 0x0001000000000000 );
	} else {
	    --expDiff;
	    sigZExtra = 0;
	    if ( ! expDiff ) goto newlyAligned;
	}
	sig128Extra =
	    softfloat_shiftRightJam128Extra( sigB.v64, sigB.v0, 0, expDiff );
	sigB = sig128Extra.v;
	sigZExtra = sig128Extra.extra;
    }
 newlyAligned:
    sigZ =
	softfloat_add128(
	    sigA.v64 | UINT64_C( 0x0001000000000000 ),
	    sigA.v0,
	    sigB.v64,
	    sigB.v0
	);
    --expZ;
    if ( sigZ.v64 < UINT64_C( 0x0002000000000000 ) ) goto roundAndPack;
    ++expZ;
 shiftRight1:
    sig128Extra =
	softfloat_shortShiftRightJam128Extra(
	    sigZ.v64, sigZ.v0, sigZExtra, 1 );
    sigZ = sig128Extra.v;
    sigZExtra = sig128Extra.extra;
 roundAndPack:
    return
	softfloat_roundPackToF128( signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}

float128_t
softfloat_subMagsF128(
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    struct uint128 sigA;
    int_fast32_t expB;
    struct uint128 sigB, sigZ;
    int_fast32_t expDiff, expZ;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 4 );
    sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 4 );
    expDiff = expA - expB;
    if ( 0 < expDiff ) goto expABigger;
    if ( expDiff < 0 ) goto expBBigger;
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
	softfloat_raiseFlags( softfloat_flag_invalid );
	uiZ.v64 = defaultNaNF128UI64;
	uiZ.v0  = defaultNaNF128UI0;
	goto uiZ;
    }
    expZ = expA;
    if ( ! expZ ) expZ = 1;
    if ( sigB.v64 < sigA.v64 ) goto aBigger;
    if ( sigA.v64 < sigB.v64 ) goto bBigger;
    if ( sigB.v0 < sigA.v0 ) goto aBigger;
    if ( sigA.v0 < sigB.v0 ) goto bBigger;
    uiZ.v64 =
	packToF128UI64(
	    (softfloat_roundingMode == softfloat_round_min), 0, 0 );
    uiZ.v0 = 0;
    goto uiZ;
 expBBigger:
    if ( expB == 0x7FFF ) {
	if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	uiZ.v64 = packToF128UI64( signZ ^ 1, 0x7FFF, 0 );
	uiZ.v0  = 0;
	goto uiZ;
    }
    if ( expA ) {
	sigA.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
	++expDiff;
	if ( ! expDiff ) goto newlyAlignedBBigger;
    }
    sigA = softfloat_shiftRightJam128( sigA.v64, sigA.v0, -expDiff );
 newlyAlignedBBigger:
    expZ = expB;
    sigB.v64 |= UINT64_C( 0x0010000000000000 );
 bBigger:
    signZ = ! signZ;
    sigZ = softfloat_sub128( sigB.v64, sigB.v0, sigA.v64, sigA.v0 );
    goto normRoundPack;
 expABigger:
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	uiZ.v64 = uiA64;
	uiZ.v0  = uiA0;
	goto uiZ;
    }
    if ( expB ) {
	sigB.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
	--expDiff;
	if ( ! expDiff ) goto newlyAlignedABigger;
    }
    sigB = softfloat_shiftRightJam128( sigB.v64, sigB.v0, expDiff );
 newlyAlignedABigger:
    expZ = expA;
    sigA.v64 |= UINT64_C( 0x0010000000000000 );
 aBigger:
    sigZ = softfloat_sub128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
 normRoundPack:
    return softfloat_normRoundPackToF128( signZ, expZ - 5, sigZ.v64, sigZ.v0 );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;
}

void
f128M_add( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint64_t *aWPtr, *bWPtr;
    uint64_t uiA64, uiA0;
    int signA;
    uint64_t uiB64, uiB0;
    int signB;

    aWPtr = (const uint64_t *) aPtr;
    bWPtr = (const uint64_t *) bPtr;
    uiA64 = aWPtr[indexWord( 2, 1 )];
    uiA0  = aWPtr[indexWord( 2, 0 )];
    signA = signF128UI64( uiA64 );
    uiB64 = bWPtr[indexWord( 2, 1 )];
    uiB0  = bWPtr[indexWord( 2, 0 )];
    signB = signF128UI64( uiB64 );

    if ( signA == signB ) {
	*zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    } else {
	*zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    }
}

void
f128M_sub( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint64_t *aWPtr, *bWPtr;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    uint_fast64_t uiB64, uiB0;
    bool signB;

    aWPtr = (const uint64_t *) aPtr;
    bWPtr = (const uint64_t *) bPtr;
    uiA64 = aWPtr[indexWord( 2, 1 )];
    uiA0  = aWPtr[indexWord( 2, 0 )];
    signA = signF128UI64( uiA64 );
    uiB64 = bWPtr[indexWord( 2, 1 )];
    uiB0  = bWPtr[indexWord( 2, 0 )];
    signB = signF128UI64( uiB64 );
    if ( signA == signB ) {
	*zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    } else {
	*zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    }
}

void
f128M_mul( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint32_t *aWPtr, *bWPtr;
    uint32_t *zWPtr;
    uint32_t uiA96;
    int32_t expA;
    uint32_t uiB96;
    int32_t expB;
    bool signZ;
    const uint32_t *ptr;
    uint32_t uiZ96, sigA[4];
    uint_fast8_t shiftDist;
    uint32_t sigB[4];
    int32_t expZ;
    uint32_t sigProd[8], *extSigZPtr;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    aWPtr = (const uint32_t *) aPtr;
    bWPtr = (const uint32_t *) bPtr;
    zWPtr = (uint32_t *) zPtr;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uiA96 = aWPtr[indexWordHi( 4 )];
    expA = expF128UI96( uiA96 );
    uiB96 = bWPtr[indexWordHi( 4 )];
    expB = expF128UI96( uiB96 );
    signZ = signF128UI96( uiA96 ) ^ signF128UI96( uiB96 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
	if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
	ptr = aWPtr;
	if ( ! expA ) goto possiblyInvalid;
	if ( ! expB ) {
	    ptr = bWPtr;
     possiblyInvalid:
	    if (
		! fracF128UI96( ptr[indexWordHi( 4 )] )
		    && ! (ptr[indexWord( 4, 2 )] | ptr[indexWord( 4, 1 )]
			      | ptr[indexWord( 4, 0 )])
	    ) {
		softfloat_invalidF128M( zWPtr );
		return;
	    }
	}
	uiZ96 = packToF128UI96( signZ, 0x7FFF, 0 );
	goto uiZ96;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA ) {
	sigA[indexWordHi( 4 )] = fracF128UI96( uiA96 ) | 0x00010000;
	sigA[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
	sigA[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
	sigA[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
    } else {
	expA = softfloat_shiftNormSigF128M( aWPtr, 0, sigA );
	if ( expA == -128 ) goto zero;
    }
    if ( expB ) {
	sigB[indexWordHi( 4 )] = fracF128UI96( uiB96 ) | 0x00010000;
	sigB[indexWord( 4, 2 )] = bWPtr[indexWord( 4, 2 )];
	sigB[indexWord( 4, 1 )] = bWPtr[indexWord( 4, 1 )];
	sigB[indexWord( 4, 0 )] = bWPtr[indexWord( 4, 0 )];
    } else {
	expB = softfloat_shiftNormSigF128M( bWPtr, 0, sigB );
	if ( expB == -128 ) goto zero;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA + expB - 0x4000;
    softfloat_mul128MTo256M( sigA, sigB, sigProd );
    if (
	sigProd[indexWord( 8, 2 )]
	    || (sigProd[indexWord( 8, 1 )] | sigProd[indexWord( 8, 0 )])
    ) {
	sigProd[indexWord( 8, 3 )] |= 1;
    }
    extSigZPtr = &sigProd[indexMultiwordHi( 8, 5 )];
    shiftDist = 16;
    if ( extSigZPtr[indexWordHi( 5 )] & 2 ) {
	++expZ;
	shiftDist = 15;
    }
    softfloat_shortShiftLeft160M( extSigZPtr, shiftDist, extSigZPtr );
    softfloat_roundPackMToF128M( signZ, expZ, extSigZPtr, zWPtr );
    return;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ96 = packToF128UI96( signZ, 0, 0 );
 uiZ96:
    zWPtr[indexWordHi( 4 )] = uiZ96;
    zWPtr[indexWord( 4, 2 )] = 0;
    zWPtr[indexWord( 4, 1 )] = 0;
    zWPtr[indexWord( 4, 0 )] = 0;
}

float128_t
f128_div( float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    int_fast32_t expA;
    struct uint128 sigA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signB;
    int_fast32_t expB;
    struct uint128 sigB;
    bool signZ;
    struct exp32_sig128 normExpSig;
    int_fast32_t expZ;
    struct uint128 rem;
    uint_fast32_t recip32;
    int ix;
    uint_fast64_t q64;
    uint_fast32_t q;
    struct uint128 term;
    uint_fast32_t qs[3];
    uint_fast64_t sigZExtra;
    struct uint128 sigZ, uiZ;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    signB = signF128UI64( uiB64 );
    expB  = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    signZ = signA ^ signB;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	if ( expB == 0x7FFF ) {
	    if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	    goto invalid;
	}
	goto infinity;
    }
    if ( expB == 0x7FFF ) {
	if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	goto zero;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expB ) {
	if ( ! (sigB.v64 | sigB.v0) ) {
	    if ( ! (expA | sigA.v64 | sigA.v0) ) goto invalid;
	    softfloat_raiseFlags( softfloat_flag_infinite );
	    goto infinity;
	}
	normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
	expB = normExpSig.exp;
	sigB = normExpSig.sig;
    }
    if ( ! expA ) {
	if ( ! (sigA.v64 | sigA.v0) ) goto zero;
	normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
	expA = normExpSig.exp;
	sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA - expB + 0x3FFE;
    sigA.v64 |= UINT64_C( 0x0001000000000000 );
    sigB.v64 |= UINT64_C( 0x0001000000000000 );
    rem = sigA;
    if ( softfloat_lt128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 ) ) {
	--expZ;
	rem = softfloat_add128( sigA.v64, sigA.v0, sigA.v64, sigA.v0 );
    }
    recip32 = softfloat_approxRecip32_1( sigB.v64>>17 );
    ix = 3;
    for (;;) {
	q64 = (uint_fast64_t) (uint32_t) (rem.v64>>19) * recip32;
	q = (q64 + 0x80000000)>>32;
	--ix;
	if ( ix < 0 ) break;
	rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
	term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
	rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
	if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
	    --q;
	    rem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
	}
	qs[ix] = q;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ((q + 1) & 7) < 2 ) {
	rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
	term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
	rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
	if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
	    --q;
	    rem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
	} else if ( softfloat_le128( sigB.v64, sigB.v0, rem.v64, rem.v0 ) ) {
	    ++q;
	    rem = softfloat_sub128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
	}
	if ( rem.v64 | rem.v0 ) q |= 1;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sigZExtra = (uint64_t) ((uint_fast64_t) q<<60);
    term = softfloat_shortShiftLeft128( 0, qs[1], 54 );
    sigZ =
	softfloat_add128(
	    (uint_fast64_t) qs[2]<<19, ((uint_fast64_t) qs[0]<<25) + (q>>4),
	    term.v64, term.v0
	);
    return
	softfloat_roundPackToF128( signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( softfloat_flag_invalid );
    uiZ.v64 = defaultNaNF128UI64;
    uiZ.v0  = defaultNaNF128UI0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 infinity:
    uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
    goto uiZ0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ.v64 = packToF128UI64( signZ, 0, 0 );
 uiZ0:
    uiZ.v0 = 0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;
}

static float128_t
f128_rem( float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    int_fast32_t expA;
    struct uint128 sigA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    int_fast32_t expB;
    struct uint128 sigB;
    struct exp32_sig128 normExpSig;
    struct uint128 rem;
    int_fast32_t expDiff;
    uint_fast32_t q, recip32;
    uint_fast64_t q64;
    struct uint128 term, altRem, meanRem;
    bool signRem;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    expB  = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
	if (
	    (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0))
	) {
	    goto propagateNaN;
	}
	goto invalid;
    }
    if ( expB == 0x7FFF ) {
	if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	return a;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expB ) {
	if ( ! (sigB.v64 | sigB.v0) ) goto invalid;
	normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
	expB = normExpSig.exp;
	sigB = normExpSig.sig;
    }
    if ( ! expA ) {
	if ( ! (sigA.v64 | sigA.v0) ) return a;
	normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
	expA = normExpSig.exp;
	sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sigA.v64 |= UINT64_C( 0x0001000000000000 );
    sigB.v64 |= UINT64_C( 0x0001000000000000 );
    rem = sigA;
    expDiff = expA - expB;
    if ( expDiff < 1 ) {
	if ( expDiff < -1 ) return a;
	if ( expDiff ) {
	    --expB;
	    sigB = softfloat_add128( sigB.v64, sigB.v0, sigB.v64, sigB.v0 );
	    q = 0;
	} else {
	    q = softfloat_le128( sigB.v64, sigB.v0, rem.v64, rem.v0 );
	    if ( q ) {
		rem = softfloat_sub128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
	    }
	}
    } else {
	recip32 = softfloat_approxRecip32_1( sigB.v64>>17 );
	expDiff -= 30;
	for (;;) {
	    q64 = (uint_fast64_t) (uint32_t) (rem.v64>>19) * recip32;
	    if ( expDiff < 0 ) break;
	    q = (q64 + 0x80000000)>>32;
	    rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
	    term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
	    rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
	    if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
		rem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
	    }
	    expDiff -= 29;
	}
	/*--------------------------------------------------------------------
	| (`expDiff' cannot be less than -29 here.)
	*--------------------------------------------------------------------*/
	q = (uint32_t) (q64>>32)>>(~expDiff & 31);
	rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, expDiff + 30 );
	term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
	rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
	if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
	    altRem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
	    goto selectRem;
	}
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    do {
	altRem = rem;
	++q;
	rem = softfloat_sub128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
    } while ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) );
 selectRem:
    meanRem = softfloat_add128( rem.v64, rem.v0, altRem.v64, altRem.v0 );
    if (
	(meanRem.v64 & UINT64_C( 0x8000000000000000 ))
	    || (! (meanRem.v64 | meanRem.v0) && (q & 1))
    ) {
	rem = altRem;
    }
    signRem = signA;
    if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
	signRem = ! signRem;
	rem = softfloat_sub128( 0, 0, rem.v64, rem.v0 );
    }
    return softfloat_normRoundPackToF128( signRem, expB - 1, rem.v64, rem.v0 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( softfloat_flag_invalid );
    uiZ.v64 = defaultNaNF128UI64;
    uiZ.v0  = defaultNaNF128UI0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;
}

bool
f128_isNan( float128_t a) {
    int_fast32_t expA;
    struct uint128 sigA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uA;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    return isNaNF128UI( uiA64, uiA0 );
//    expA  = expF128UI64( uiA64 );
//    sigA.v64 = fracF128UI64( uiA64 );
//    sigA.v0  = uiA0;
//
//    if ( expA == 0x7FFF ) {
//        if ( sigA.v64 | sigA.v0 ) return 1;
//    }
//    return 0;
}

bool
f128_isInf( float128_t a) {
    int_fast32_t expA;
    struct uint128 sigA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uA;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;

    if ( expA == 0x7FFF ) {
	if ( sigA.v64 == 0 ) return 1;
    }
    return 0;
}

static bool
f128_eq( float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
	if (
	       softfloat_isSigNaNF128UI( uiA64, uiA0 )
	    || softfloat_isSigNaNF128UI( uiB64, uiB0 )
	) {
	    softfloat_raiseFlags( softfloat_flag_invalid );
	}
	return 0;
    }
    return
	   (uiA0 == uiB0)
	&& (   (uiA64 == uiB64)
	    || (! uiA0 && ! ((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
	   );
}

static bool
f128_lt( float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
	softfloat_raiseFlags( softfloat_flag_invalid );
	return 0;
    }
    signA = signF128UI64( uiA64 );
    signB = signF128UI64( uiB64 );
    return
	(signA != signB)
	    ? signA
		  && (((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
			  | uiA0 | uiB0)
	    : ((uiA64 != uiB64) || (uiA0 != uiB0))
		  && (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));
}

static inline void
f128M_div( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    *zPtr = f128_div( *aPtr, *bPtr );
}

static inline void
f128M_rem( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    *zPtr = f128_rem( *aPtr, *bPtr );
}

static inline bool
f128M_eq( const float128_t *aPtr, const float128_t *bPtr )
{
    return f128_eq( *aPtr, *bPtr );
}

static inline bool
f128M_lt( const float128_t *aPtr, const float128_t *bPtr )
{
    return f128_lt( *aPtr, *bPtr );
}

#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat class methodsFor:'documentation'!

documentation
"
    QuadFloats represent rational numbers with limited precision
    and are mapped to IEEE quadruple precision format (128bit),
    also called binary128.

    If the underlying cpu supports them natively, the machine format (long double) is
    used. Otherwise, a software emulation is done, which is much slower.
    Thus only use them, if you really need the additional precision;
    if not, use Float (which are doubles) or LongFloats which usually have IEEE extended precision (80bit).

    QuadFloats give you definite 128 bit quadruple floats,
    thus, code using quadFloats is guaranteed to be portable from one architecture to another.

    Representation:
	    128bit quadruple IEEE floats (16bytes);
	    112 bit mantissa,
	    16 bit exponent,
	    34 decimal digits (approx.)

    On Sparc CPUs, this is a native supported type (long double) and fast;
    on x86 CPUs, this is emulated and slow.

    Mixed mode arithmetic:
	quadFloat op anyFloat    -> quadFloat
	anyFloat op quadFloat    -> quadFloat

    Range and precision of storage formats: see LimitedPrecisionReal >> documentation

    [author:]
	Claus Gittinger

    [see also:]
	Number
	Float ShortFloat LongFloat Fraction FixedPoint Integer Complex
	FloatArray DoubleArray
	https://en.wikipedia.org/wiki/Extended_precision
"
! !

!QuadFloat class methodsFor:'instance creation'!

basicNew
    "return a new quadFloat - here we return 0.0
     - QuadFloats are usually NOT created this way ...
     Its implemented here to allow things like binary store & load
     of quadFloats.
     (but it is not a good idea to store the bits of a float - the reader might have a
      totally different representation - so floats should be
      binary stored in a device independent format)."

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;

    if (sizeof(long double) == sizeof(float128_t)) {
	__qMKLFLOAT(newFloat, 0.0);   /* OBJECT ALLOCATION */
    } else {
	float128_t qf;
	f64_to_f128M(0.0, &qf);
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
    }
    RETURN (newFloat);
#endif /* SUPPORT_QUADFLOAT */
%}.
    self error:'QuadFloats not supported on this patform'

    "Created: / 06-06-2019 / 17:18:58 / Claus Gittinger"
!

fromFloat:aFloat
    "return a new quadFloat, given a float value"

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t f;

    if (__isFloatLike(aFloat)) {
	float64_t f = __floatVal(aFloat);
	float128_t qf;

	f64_to_f128M(f, &qf);
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
	RETURN (newFloat);
    }
#endif /* SUPPORT_QUADFLOAT */
%}.
    aFloat isFloat ifTrue:[
	self errorUnsupported
    ].
    ArgumentError raise

    "
     QuadFloat fromFloat:123.0
     123.0 asQuadFloat
     123 asQuadFloat
    "

    "Created: / 06-06-2019 / 18:01:03 / Claus Gittinger"
!

fromInteger:anInteger
    "return a new quadFloat, given an integer value"

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t f;

    if (__isSmallInteger(anInteger)) {
	INT iVal = __intVal(anInteger);
	float128_t qf;

	ui64_to_f128M( iVal, &qf );
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
	RETURN (newFloat);
    }
#endif /* SUPPORT_QUADFLOAT */
%}.
    ^ super fromInteger:anInteger

    "
     QuadFloat fromInteger:123
     123 asQuadFloat
    "
!

fromShortFloat:aShortFloat
    "return a new quadFloat, given a float value"

    ^ self fromFloat:(aShortFloat asFloat) 

    "
     QuadFloat fromShortFloat:123.0 asShortFloat
    "

    "Created: / 08-06-2019 / 03:28:37 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'coercing & converting'!

coerce:aNumber
    "convert the argument aNumber into an instance of the receiver's class and return it."

    ^ aNumber asQuadFloat.

    "Created: / 06-06-2019 / 16:51:01 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'constants'!

NaN
    "return a quadFloat which represents not-a-Number (i.e. an invalid number)"

    |nan|

    NaN isNil ifTrue:[
%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
	{
	    OBJ newFloat;
	    float128_t qf;

	    softfloat_commonNaNToF128M( NULL, (uint32_t*)(&qf) );
	    __qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
	    nan = newFloat;
	}
#endif /* SUPPORT_QUADFLOAT */
%}.
	NaN := nan
    ].
    ^ NaN
!

e
    "return the constant e as quadFloat"

    E isNil ifTrue:[
	"/ eDigits has enough digits for 128bit IEEE quads
	"/ do not use as a literal constant here - we cannot depend on the underlying C-compiler here...
	E  := self readFrom:(Number eDigits)
    ].
    ^ E

    "Created: / 06-06-2019 / 17:01:54 / Claus Gittinger"
!

pi
    "return the constant pi as quadFloat"

    Pi isNil ifTrue:[
	"/ piDigits has enough digits for 128bit IEEE quads
	"/ do not use as a literal constant here - we cannot depend on the underlying C-compiler here...
	Pi  := self readFrom:(Number piDigits)
    ].
    ^ Pi

    "Created: / 06-06-2019 / 17:09:51 / Claus Gittinger"
!

unity
    "return the neutral element for multiplication (1.0) as QuadFloat"

    QuadFloatOne isNil ifTrue:[
	QuadFloatOne := 1.0 asQuadFloat.
    ].
    ^ QuadFloatOne

    "Created: / 07-06-2019 / 03:26:38 / Claus Gittinger"
!

zero
    "return the neutral element for addition (0.0) as QuadFloat"

    QuadFloatZero isNil ifTrue:[
	QuadFloatZero := 0.0 asQuadFloat
    ].
    ^ QuadFloatZero

    "Created: / 07-06-2019 / 09:22:56 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'error reportng'!

errorUnsupported
    self error:'QuadFloats not supported on this patform'

    "Created: / 07-06-2019 / 02:44:39 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'queries'!

numBitsInMantissa
    "answer the number of bits in the mantissa
     the hidden bit is not counted here:

     This is an 128bit quadfloat,
	   where 112 bits are available in the mantissa:
	seeeeeee eeeeeeee mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm mmmmmmmm...
    "

    ^ 112

    "
     1.0 class numBitsInMantissa
     1.0 asShortFloat class numBitsInMantissa
     1.0 asLongFloat class numBitsInMantissa
     1.0 asQuadFloat class numBitsInMantissa
    "

    "Created: / 07-06-2019 / 03:24:20 / Claus Gittinger"
! !

!QuadFloat methodsFor:'arithmetic'!

* aNumber
    "return the product of the receiver and the argument."

thisContext isRecursive ifTrue:[self halt].
    ^ aNumber productFromQuadFloat:self
!

+ aNumber
    "return the sum of the receiver and the argument, aNumber"

    ^ aNumber sumFromQuadFloat:self
!

- aNumber
    "return the difference of the receiver and the argument, aNumber"

    ^ aNumber differenceFromQuadFloat:self
!

/ aNumber
    "return the quotient of the receiver and the argument, aNumber"

    aNumber isZero ifTrue:[
	"
	 No, you shalt not divide by zero
	"
	^ ZeroDivide raiseRequestWith:thisContext.
    ].
    ^ aNumber quotientFromQuadFloat:self
!

rem: aNumber
    "return the floating point remainder of the receiver and the argument, aNumber"

    aNumber isZero ifTrue:[
	"
	 No, you shalt not divide by zero
	"
	^ ZeroDivide raiseRequestWith:thisContext.
    ].
    ^ aNumber remainderFromLongFloat:self
! !

!QuadFloat methodsFor:'coercing & converting'!

generality
    "return the generality value - see ArithmeticValue>>retry:coercing:"

    ^ 93

    "Created: / 07-06-2019 / 09:30:58 / Claus Gittinger"
! !

!QuadFloat methodsFor:'comparing'!

< aNumber
    "return true, if the argument is greater"

    ^ aNumber lessFromQuadFloat:self

    "Created: / 07-06-2019 / 09:25:47 / Claus Gittinger"
!

= aNumber
    "return true, if the argument represents the same numeric value
     as the receiver, false otherwise"

    ^ aNumber equalFromQuadFloat:self

    "Created: / 07-06-2019 / 09:25:27 / Claus Gittinger"
!

hash
    "return a number for hashing; redefined, since floats compare
     by numeric value (i.e. 3.0 = 3), therefore 3.0 hash must be the same
     as 3 hash."

    |i|

    (self >= SmallInteger minVal and:[self <= SmallInteger maxVal]) ifTrue:[
	i := self asInteger.
	self = i ifTrue:[
	    ^ i hash
	].
    ].

    ^ self asFloat hash

    "
     1.2345 hash
     1.2345 asShortFloat hash
     1.2345 asLongFloat hash
     1.2345 asQuadFloat hash

     1.0 hash
     1.0 asShortFloat hash
     1.0 asLongFloat hash
     1.0 asQuadFloat hash

     0.5 asShortFloat hash
     0.5 asShortFloat hash
     0.5 asLongFloat hash
     0.5 asQuadFloat hash

     0.25 asShortFloat hash
     0.25 asShortFloat hash
     0.25 asLongFloat hash
     0.25 asQuadFloat hash
    "

    "Created: / 07-06-2019 / 09:28:07 / Claus Gittinger"
! !

!QuadFloat methodsFor:'double dispatching'!

differenceFromQuadFloat:aQuadFloat
%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_sub( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
!

equalFromQuadFloat:aQuadFloat
    "sent when aQuadFloat does not know how to compare agaist the receiver, self"

%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    RETURN (f128M_eq( &argVal, &myVal ) ? true : false);
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported

    "Modified: / 08-06-2019 / 13:31:48 / Claus Gittinger"
!

lessFromQuadFloat:aQuadFloat
    "sent when aQuadFloat does not know how to compare agaist the receiver, self"

%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    RETURN (f128M_lt( &argVal, &myVal ) ? true : false);
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
!

productFromQuadFloat:aQuadFloat
    "sent when aQuadFloat does not know how to multiply the receiver, self"

%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_mul( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
!

quotientFromQuadFloat:aQuadFloat
    "sent when aQuadFloat does not know how to multiply the receiver, self"

%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_div( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
!

sumFromQuadFloat:aQuadFloat
    "sent when aQuadFloat does not know how to add the receiver, self"

%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_add( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
! !

!QuadFloat methodsFor:'error reportng'!

errorUnsupported
    self class errorUnsupported

    "Modified: / 07-06-2019 / 02:44:51 / Claus Gittinger"
! !

!QuadFloat methodsFor:'queries'!

isFinite
    "return true, if the receiver is a finite float
     i.e. not NaN and not infinite."

%{  /* NOCONTEXT */
    float128_t myVal;

    myVal = __quadFloatVal(self);
    RETURN (f128_isInf(myVal) ? true : false)
%}.

    "
	1.0 isFinite
	self NaN isFinite
	self infinity isFinite
	(0.0 uncheckedDivide: 0.0) isFinite
	(1.0 uncheckedDivide: 0.0) isFinite
    "
!

isNaN
    "return true, if the receiver is a finite float
     i.e. not NaN and not infinite."

%{  /* NOCONTEXT */
    float128_t myVal;

    myVal = __quadFloatVal(self);
    RETURN (f128_isNan(myVal) ? true : false)
%}.

    "
	1.0 isFinite
	self NaN isFinite
	self infinity isFinite
	(0.0 uncheckedDivide: 0.0) isFinite
	(1.0 uncheckedDivide: 0.0) isFinite
    "
! !

!QuadFloat class methodsFor:'documentation'!

version_CVS
    ^ '$Header$'
! !