QuadFloat.st
author Claus Gittinger <cg@exept.de>
Fri, 07 Jun 2019 01:11:34 +0200
changeset 5000 608e4912b5cf
parent 4999 fd4435d4e583
child 5002 c053e5b6ad82
permissions -rw-r--r--
*** empty log message ***

"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

LimitedPrecisionReal variableByteSubclass:#QuadFloat
	instanceVariableNames:''
	classVariableNames:'QuadFloatZero QuadFloatOne Pi E Epsilon NaN PositiveInfinity
		NegativeInfinity Halfpi HalfpiNegative'
	poolDictionaries:''
	category:'Magnitude-Numbers'
!

!QuadFloat primitiveDefinitions!

%{
#if __POINTER_SIZE__ == 8
# define SUPPORT_QUADFLOAT
#endif

#ifdef SUPPORT_QUADFLOAT
# include <stdint.h>

/*----------------------------------------------------------------------------
| Software floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
enum {
    softfloat_tininess_beforeRounding = 0,
    softfloat_tininess_afterRounding  = 1
};

/*----------------------------------------------------------------------------
| Software floating-point exception flags.
*----------------------------------------------------------------------------*/
enum {
    softfloat_flag_inexact   =  1,
    softfloat_flag_underflow =  2,
    softfloat_flag_overflow  =  4,
    softfloat_flag_infinite  =  8,
    softfloat_flag_invalid   = 16
};
/*----------------------------------------------------------------------------
| Software floating-point rounding mode.  (Mode "odd" is supported only if
| SoftFloat is compiled with macro 'SOFTFLOAT_ROUND_ODD' defined.)
*----------------------------------------------------------------------------*/
enum {
    softfloat_round_near_even   = 0,
    softfloat_round_minMag      = 1,
    softfloat_round_min         = 2,
    softfloat_round_max         = 3,
    softfloat_round_near_maxMag = 4,
    softfloat_round_odd         = 6
};
#define init_detectTininess softfloat_tininess_afterRounding

#if defined(LITTLEENDIAN) || defined(__LSB_FIRST__)
struct uint128          { uint64_t v0, v64; };
struct uint64_extra     { uint64_t extra, v; };
struct uint128_extra    { uint64_t extra; struct uint128 v; };
#else
struct uint128          { uint64_t v64, v0; };
struct uint64_extra     { uint64_t v, extra; };
struct uint128_extra    { struct uint128 v; uint64_t extra; };
#endif

typedef unsigned char bool;
typedef double float64_t;
union ui64_f64   { uint64_t ui; float64_t f; };
union ui128_f128 { struct uint128 ui; float128_t f; };

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 128-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF128UI96 0xFFFF8000
#define defaultNaNF128UI64 0
#define defaultNaNF128UI32 0
#define defaultNaNF128UI0  0

struct commonNaN {
    bool sign;
#ifdef LITTLEENDIAN
    uint64_t v0, v64;
#else
    uint64_t v64, v0;
#endif
};
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };

#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat primitiveVariables!

%{
#ifdef SUPPORT_QUADFLOAT

uint_fast8_t softfloat_exceptionFlags;
uint_fast8_t softfloat_roundingMode = softfloat_round_near_even;
uint_fast8_t softfloat_detectTininess = init_detectTininess;
uint_fast8_t softfloat_exceptionFlags = 0;

const uint_least8_t softfloat_countLeadingZeros8[256] = {
    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat primitiveFunctions!

%{
#ifdef SUPPORT_QUADFLOAT

#if defined(LITTLEENDIAN) || defined(__LSB_FIRST__)
#define wordIncr 1
#define indexWord( total, n ) (n)
#define indexWordHi( total ) ((total) - 1)
#define indexWordLo( total ) 0
#define indexMultiword( total, m, n ) (n)
#define indexMultiwordHi( total, n ) ((total) - (n))
#define indexMultiwordLo( total, n ) 0
#define indexMultiwordHiBut( total, n ) (n)
#define indexMultiwordLoBut( total, n ) 0
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
#else
#define wordIncr -1
#define indexWord( total, n ) ((total) - 1 - (n))
#define indexWordHi( total ) 0
#define indexWordLo( total ) ((total) - 1)
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
#define indexMultiwordHi( total, n ) 0
#define indexMultiwordLo( total, n ) ((total) - (n))
#define indexMultiwordHiBut( total, n ) 0
#define indexMultiwordLoBut( total, n ) (n)
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
#endif

#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63))
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )

#define signF128UI64( a64 ) ((bool) ((uint64_t) (a64)>>63))
#define expF128UI64( a64 ) ((int_fast32_t) ((a64)>>48) & 0x7FFF)
#define fracF128UI64( a64 ) ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))
#define packToF128UI64( sign, exp, sig64 ) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<48) + (sig64))

#define signF128UI96( a96 ) ((bool) ((uint32_t) (a96)>>31))
#define expF128UI96( a96 ) ((int32_t) ((a96)>>16) & 0x7FFF)
#define fracF128UI96( a96 ) ((a96) & 0x0000FFFF)
#define packToF128UI96( sign, exp, sig96 ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<16) + (sig96))

#define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))

/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft160M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, dist, zPtr )
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftRightJamM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shiftRightJam160M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 5, aPtr, dist, zPtr )
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft128M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, dist, zPtr )

static inline void
softfloat_raiseFlags( uint_fast8_t flags ) {
    softfloat_exceptionFlags |= flags;
}

#if 1
static inline void
softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
{
    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
}
#else
# define softfloat_commonNaNToF128M( aPtr, zWPtr ) \
{ \
    (zWPtr)[indexWord( 4, 3 )] = defaultNaNF128UI96; \
    (zWPtr)[indexWord( 4, 2 )] = defaultNaNF128UI64; \
    (zWPtr)[indexWord( 4, 1 )] = defaultNaNF128UI32; \
    (zWPtr)[indexWord( 4, 0 )] = defaultNaNF128UI0; \
}
#endif

static inline void
softfloat_invalidF128M( uint32_t *zWPtr ) {
    softfloat_raiseFlags( softfloat_flag_invalid );
    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
}

static bool
f128M_isSignalingNaN( const float128_t *aPtr )
{
    const uint32_t *aWPtr;
    uint32_t uiA96;

    aWPtr = (const uint32_t *) aPtr;
    uiA96 = aWPtr[indexWordHi( 4 )];
    if ( (uiA96 & 0x7FFF8000) != 0x7FFF0000 ) return 0;
    return
	((uiA96 & 0x00007FFF) != 0)
	    || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
		     | aWPtr[indexWord( 4, 0 )])
		    != 0);
}

static void
softfloat_shortShiftRightJamM(
      uint_fast8_t size_words,
      const uint64_t *aPtr,
      uint_fast8_t dist,
      uint64_t *zPtr
  )
{
    uint_fast8_t uNegDist;
    unsigned int index, lastIndex;
    uint64_t partWordZ, wordA;

    uNegDist = -dist;
    index = indexWordLo( size_words );
    lastIndex = indexWordHi( size_words );
    wordA = aPtr[index];
    partWordZ = wordA>>dist;
    if ( partWordZ<<dist != wordA ) partWordZ |= 1;
    while ( index != lastIndex ) {
	wordA = aPtr[index + wordIncr];
	zPtr[index] = wordA<<(uNegDist & 63) | partWordZ;
	index += wordIncr;
	partWordZ = wordA>>dist;
    }
    zPtr[index] = partWordZ;
}

void
softfloat_shiftRightJamM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 )
{
    uint32_t wordJam, wordDist, *ptr;
    uint_fast8_t i, innerDist;

    wordJam = 0;
    wordDist = dist>>5;
    if ( wordDist ) {
	if ( size_words < wordDist ) wordDist = size_words;
	ptr = (uint32_t *) (aPtr + indexMultiwordLo( size_words, wordDist ));
	i = wordDist;
	do {
	    wordJam = *ptr++;
	    if ( wordJam ) break;
	    --i;
	} while ( i );
	ptr = zPtr;
    }
    if ( wordDist < size_words ) {
	aPtr += indexMultiwordHiBut( size_words, wordDist );
	innerDist = dist & 31;
	if ( innerDist ) {
	    softfloat_shortShiftRightJamM(
		size_words - wordDist,
		aPtr,
		innerDist,
		zPtr + indexMultiwordLoBut( size_words, wordDist )
	    );
	    if ( ! wordDist ) goto wordJam;
	} else {
	    aPtr += indexWordLo( size_words - wordDist );
	    ptr = zPtr + indexWordLo( size_words );
	    for ( i = size_words - wordDist; i; --i ) {
		*ptr = *aPtr;
		aPtr += wordIncr;
		ptr += wordIncr;
	    }
	}
	ptr = zPtr + indexMultiwordHi( size_words, wordDist );
    }
    do {
	*ptr++ = 0;
	--wordDist;
    } while ( wordDist );
 wordJam:
    if ( wordJam ) zPtr[indexWordLo( size_words )] |= 1;

}

void
 softfloat_shortShiftLeftM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint_fast8_t dist,
     uint32_t *zPtr
 )
{
    uint_fast8_t uNegDist;
    unsigned int index, lastIndex;
    uint32_t partWordZ, wordA;

    uNegDist = -dist;
    index = indexWordHi( size_words );
    lastIndex = indexWordLo( size_words );
    partWordZ = aPtr[index]<<dist;
    while ( index != lastIndex ) {
	wordA = aPtr[index - wordIncr];
	zPtr[index] = partWordZ | wordA>>(uNegDist & 31);
	index -= wordIncr;
	partWordZ = wordA<<dist;
    }
    zPtr[index] = partWordZ;
}

static void
softfloat_shiftLeftM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 )
{
    uint32_t wordDist;
    uint_fast8_t innerDist;
    uint32_t *destPtr;
    uint_fast8_t i;

    wordDist = dist>>5;
    if ( wordDist < size_words ) {
	aPtr += indexMultiwordLoBut( size_words, wordDist );
	innerDist = dist & 31;
	if ( innerDist ) {
	    softfloat_shortShiftLeftM(
		size_words - wordDist,
		aPtr,
		innerDist,
		zPtr + indexMultiwordHiBut( size_words, wordDist )
	    );
	    if ( ! wordDist ) return;
	} else {
	    aPtr += indexWordHi( size_words - wordDist );
	    destPtr = zPtr + indexWordHi( size_words );
	    for ( i = size_words - wordDist; i; --i ) {
		*destPtr = *aPtr;
		aPtr -= wordIncr;
		destPtr -= wordIncr;
	    }
	}
	zPtr += indexMultiwordLo( size_words, wordDist );
    } else {
	wordDist = size_words;
    }
    do {
	*zPtr++ = 0;
	--wordDist;
    } while ( wordDist );
}

/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftLeftM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shiftLeft128M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 4, aPtr, dist, zPtr )

bool
softfloat_isNaNF128M( const uint32_t *aWPtr )
{
    uint32_t uiA96;

    uiA96 = aWPtr[indexWordHi( 4 )];
    if ( (~uiA96 & 0x7FFF0000) != 0 ) return 0;
    return
	((uiA96 & 0x0000FFFF) != 0)
	    || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
		     | aWPtr[indexWord( 4, 0 )])
		    != 0);
}

static inline uint_fast8_t
softfloat_countLeadingZeros32( uint32_t a ) {
    return a ? __builtin_clz( a ) : 32;
}

static inline bool
softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 == b64) && (a0 == b0);
}

static inline bool
softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 < b64) || ((a64 == b64) && (a0 < b0));
}

void
softfloat_propagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
{
    bool isSigNaNA;
    const uint32_t *ptr;
    bool isSigNaNB;
    uint32_t uiA96, uiB96, wordMagA, wordMagB;

    isSigNaNA = f128M_isSignalingNaN( (const float128_t *) aWPtr );
    ptr = aWPtr;
    if ( ! bWPtr ) {
	if ( isSigNaNA ) softfloat_raiseFlags( softfloat_flag_invalid );
	goto copy;
    }
    isSigNaNB = f128M_isSignalingNaN( (const float128_t *) bWPtr );
    if ( isSigNaNA | isSigNaNB ) {
	softfloat_raiseFlags( softfloat_flag_invalid );
	if ( isSigNaNA ) {
	    if ( isSigNaNB ) goto returnLargerUIMag;
	    if ( softfloat_isNaNF128M( bWPtr ) ) goto copyB;
	    goto copy;
	} else {
	    if ( softfloat_isNaNF128M( aWPtr ) ) goto copy;
	    goto copyB;
	}
    }
 returnLargerUIMag:
    uiA96 = aWPtr[indexWordHi( 4 )];
    uiB96 = bWPtr[indexWordHi( 4 )];
    wordMagA = uiA96 & 0x7FFFFFFF;
    wordMagB = uiB96 & 0x7FFFFFFF;
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    wordMagA = aWPtr[indexWord( 4, 2 )];
    wordMagB = bWPtr[indexWord( 4, 2 )];
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    wordMagA = aWPtr[indexWord( 4, 1 )];
    wordMagB = bWPtr[indexWord( 4, 1 )];
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    wordMagA = aWPtr[indexWord( 4, 0 )];
    wordMagB = bWPtr[indexWord( 4, 0 )];
    if ( wordMagA < wordMagB ) goto copyB;
    if ( wordMagB < wordMagA ) goto copy;
    if ( uiA96 < uiB96 ) goto copy;
 copyB:
    ptr = bWPtr;
 copy:
    zWPtr[indexWordHi( 4 )] = ptr[indexWordHi( 4 )] | 0x00008000;
    zWPtr[indexWord( 4, 2 )] = ptr[indexWord( 4, 2 )];
    zWPtr[indexWord( 4, 1 )] = ptr[indexWord( 4, 1 )];
    zWPtr[indexWord( 4, 0 )] = ptr[indexWord( 4, 0 )];
}

static inline uint_fast8_t
softfloat_countLeadingZeros64( uint64_t a )
{
    uint_fast8_t count;
    uint32_t a32;

    count = 0;
    a32 = a>>32;
    if ( ! a32 ) {
	count = 32;
	a32 = a;
    }
    /*------------------------------------------------------------------------
    | From here, result is current count + count leading zeros of `a32'.
    *------------------------------------------------------------------------*/
    if ( a32 < 0x10000 ) {
	count += 16;
	a32 <<= 16;
    }
    if ( a32 < 0x1000000 ) {
	count += 8;
	a32 <<= 8;
    }
    count += softfloat_countLeadingZeros8[a32>>24];
    return count;
}

static inline struct exp16_sig64
softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
    int_fast8_t shiftDist;
    struct exp16_sig64 z;

    shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
    z.exp = 1 - shiftDist;
    z.sig = sig<<shiftDist;
    return z;
}

static inline struct uint128_extra
softfloat_shiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128_extra z;

    u8NegDist = -dist;
    if ( dist < 64 ) {
	z.v.v64 = a64>>dist;
	z.v.v0 = a64<<(u8NegDist & 63) | a0>>dist;
	z.extra = a0<<(u8NegDist & 63);
    } else {
	z.v.v64 = 0;
	if ( dist == 64 ) {
	    z.v.v0 = a64;
	    z.extra = a0;
	} else {
	    extra |= a0;
	    if ( dist < 128 ) {
		z.v.v0 = a64>>(dist & 63);
		z.extra = a64<<(u8NegDist & 63);
	    } else {
		z.v.v0 = 0;
		z.extra = (dist == 128) ? a64 : (a64 != 0);
	    }
	}
    }
    z.extra |= (extra != 0);
    return z;
}

static inline struct uint128_extra
softfloat_shortShiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
{
    uint_fast8_t negDist = -dist;
    struct uint128_extra z;
    z.v.v64 = a64>>dist;
    z.v.v0 = a64<<(negDist & 63) | a0>>dist;
    z.extra = a0<<(negDist & 63) | (extra != 0);
    return z;
}

static inline struct uint128
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128 z;

    if ( dist < 64 ) {
	u8NegDist = -dist;
	z.v64 = a64>>dist;
	z.v0 =
	    a64<<(u8NegDist & 63) | a0>>dist
		| ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
    } else {
	z.v64 = 0;
	z.v0 =
	    (dist < 127)
		? a64>>(dist & 63)
		      | (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
			     != 0)
		: ((a64 | a0) != 0);
    }
    return z;
}

static inline struct uint128
softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;
    z.v64 = a64<<dist | a0>>(-dist & 63);
    z.v0 = a0<<dist;
    return z;
}

static int_fast8_t
softfloat_compare128M( const uint32_t *aPtr, const uint32_t *bPtr )
{
    unsigned int index, lastIndex;
    uint32_t wordA, wordB;

    index = indexWordHi( 4 );
    lastIndex = indexWordLo( 4 );
    for (;;) {
	wordA = aPtr[index];
	wordB = bPtr[index];
	if ( wordA != wordB ) return (wordA < wordB) ? -1 : 1;
	if ( index == lastIndex ) break;
	index -= wordIncr;
    }
    return 0;
}

static void
softfloat_roundPackMToF128M(
     bool sign, int32_t exp, uint32_t *extSigPtr, uint32_t *zWPtr )
{
    uint_fast8_t roundingMode;
    bool roundNearEven;
    uint32_t sigExtra;
    bool doIncrement, isTiny;
    static const uint32_t maxSig[4] =
	INIT_UINTM4( 0x0001FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
    uint32_t ui, uj;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    sigExtra = extSigPtr[indexWordLo( 5 )];
    doIncrement = (0x80000000 <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
	doIncrement =
	    (roundingMode
		 == (sign ? softfloat_round_min : softfloat_round_max))
		&& sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) exp ) {
	if ( exp < 0 ) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    isTiny =
		   (softfloat_detectTininess
			== softfloat_tininess_beforeRounding)
		|| (exp < -1)
		|| ! doIncrement
		|| (softfloat_compare128M(
			extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
			< 0);
	    softfloat_shiftRightJam160M( extSigPtr, -exp, extSigPtr );
	    exp = 0;
	    sigExtra = extSigPtr[indexWordLo( 5 )];
	    if ( isTiny && sigExtra ) {
		softfloat_raiseFlags( softfloat_flag_underflow );
	    }
	    doIncrement = (0x80000000 <= sigExtra);
	    if (
		   ! roundNearEven
		&& (roundingMode != softfloat_round_near_maxMag)
	    ) {
		doIncrement =
		    (roundingMode
			 == (sign ? softfloat_round_min : softfloat_round_max))
			&& sigExtra;
	    }
	} else if (
	       (0x7FFD < exp)
	    || ((exp == 0x7FFD) && doIncrement
		    && (softfloat_compare128M(
			    extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
			    == 0))
	) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    softfloat_raiseFlags(
		softfloat_flag_overflow | softfloat_flag_inexact );
	    if (
		   roundNearEven
		|| (roundingMode == softfloat_round_near_maxMag)
		|| (roundingMode
			== (sign ? softfloat_round_min : softfloat_round_max))
	    ) {
		ui = packToF128UI96( sign, 0x7FFF, 0 );
		uj = 0;
	    } else {
		ui = packToF128UI96( sign, 0x7FFE, 0x0000FFFF );
		uj = 0xFFFFFFFF;
	    }
	    zWPtr[indexWordHi( 4 )] = ui;
	    zWPtr[indexWord( 4, 2 )] = uj;
	    zWPtr[indexWord( 4, 1 )] = uj;
	    zWPtr[indexWord( 4, 0 )] = uj;
	    return;
	}
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uj = extSigPtr[indexWord( 5, 1 )];
    if ( sigExtra ) {
	softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
	if ( roundingMode == softfloat_round_odd ) {
	    uj |= 1;
	    goto noIncrementPackReturn;
	}
#endif
    }
    if ( doIncrement ) {
	++uj;
	if ( uj ) {
	    if ( ! (sigExtra & 0x7FFFFFFF) && roundNearEven ) uj &= ~1;
	    zWPtr[indexWord( 4, 2 )] = extSigPtr[indexWord( 5, 3 )];
	    zWPtr[indexWord( 4, 1 )] = extSigPtr[indexWord( 5, 2 )];
	    zWPtr[indexWord( 4, 0 )] = uj;
	    ui = extSigPtr[indexWordHi( 5 )];
	} else {
	    zWPtr[indexWord( 4, 0 )] = uj;
	    ui = extSigPtr[indexWord( 5, 2 )] + 1;
	    zWPtr[indexWord( 4, 1 )] = ui;
	    uj = extSigPtr[indexWord( 5, 3 )];
	    if ( ui ) {
		zWPtr[indexWord( 4, 2 )] = uj;
		ui = extSigPtr[indexWordHi( 5 )];
	    } else {
		++uj;
		zWPtr[indexWord( 4, 2 )] = uj;
		ui = extSigPtr[indexWordHi( 5 )];
		if ( ! uj ) ++ui;
	    }
	}
    } else {
 noIncrementPackReturn:
	zWPtr[indexWord( 4, 0 )] = uj;
	ui = extSigPtr[indexWord( 5, 2 )];
	zWPtr[indexWord( 4, 1 )] = ui;
	uj |= ui;
	ui = extSigPtr[indexWord( 5, 3 )];
	zWPtr[indexWord( 4, 2 )] = ui;
	uj |= ui;
	ui = extSigPtr[indexWordHi( 5 )];
	uj |= ui;
	if ( ! uj ) exp = 0;
    }
    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp, ui );
}

static int
softfloat_shiftNormSigF128M(
     const uint32_t *wPtr, uint_fast8_t shiftDist, uint32_t *sigPtr )
{
    uint32_t wordSig;
    int32_t exp;
    uint32_t leadingBit;

    wordSig = wPtr[indexWordHi( 4 )];
    exp = expF128UI96( wordSig );
    if ( exp ) {
	softfloat_shortShiftLeft128M( wPtr, shiftDist, sigPtr );
	leadingBit = 0x00010000<<shiftDist;
	sigPtr[indexWordHi( 4 )] =
	    (sigPtr[indexWordHi( 4 )] & (leadingBit - 1)) | leadingBit;
    } else {
	exp = 16;
	wordSig &= 0x7FFFFFFF;
	if ( ! wordSig ) {
	    exp = -16;
	    wordSig = wPtr[indexWord( 4, 2 )];
	    if ( ! wordSig ) {
		exp = -48;
		wordSig = wPtr[indexWord( 4, 1 )];
		if ( ! wordSig ) {
		    wordSig = wPtr[indexWord( 4, 0 )];
		    if ( ! wordSig ) return -128;
		    exp = -80;
		}
	    }
	}
	exp -= softfloat_countLeadingZeros32( wordSig );
	softfloat_shiftLeft128M( wPtr, 1 - exp + shiftDist, sigPtr );
    }
    return exp;
}

bool
softfloat_tryPropagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
{

    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
	softfloat_propagateNaNF128M( aWPtr, bWPtr, zWPtr );
	return 1;
    }
    return 0;
}

static inline struct uint128
softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 + b0;
    z.v64 = a64 + b64 + (z.v0 < a0);
    return z;
}

static inline struct uint128
softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 - b0;
    z.v64 = a64 - b64;
    z.v64 -= (a0 < b0);
    return z;
}

static void
softfloat_mul128MTo256M(
     const uint32_t *aPtr, const uint32_t *bPtr, uint32_t *zPtr )
{
    uint32_t *lastZPtr, wordB;
    uint64_t dwordProd;
    uint32_t wordZ;
    uint_fast8_t carry;

    bPtr += indexWordLo( 4 );
    lastZPtr = zPtr + indexMultiwordHi( 8, 5 );
    zPtr += indexMultiwordLo( 8, 5 );
    wordB = *bPtr;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 0 )] * wordB;
    zPtr[indexWord( 5, 0 )] = dwordProd;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 1 )] * wordB + (dwordProd>>32);
    zPtr[indexWord( 5, 1 )] = dwordProd;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 2 )] * wordB + (dwordProd>>32);
    zPtr[indexWord( 5, 2 )] = dwordProd;
    dwordProd = (uint64_t) aPtr[indexWord( 4, 3 )] * wordB + (dwordProd>>32);
    zPtr[indexWord( 5, 3 )] = dwordProd;
    zPtr[indexWord( 5, 4 )] = dwordProd>>32;
    do {
	bPtr += wordIncr;
	zPtr += wordIncr;
	wordB = *bPtr;
	dwordProd = (uint64_t) aPtr[indexWord( 4, 0 )] * wordB;
	wordZ = zPtr[indexWord( 5, 0 )] + (uint32_t) dwordProd;
	zPtr[indexWord( 5, 0 )] = wordZ;
	carry = (wordZ < (uint32_t) dwordProd);
	dwordProd =
	    (uint64_t) aPtr[indexWord( 4, 1 )] * wordB + (dwordProd>>32);
	wordZ = zPtr[indexWord( 5, 1 )] + (uint32_t) dwordProd + carry;
	zPtr[indexWord( 5, 1 )] = wordZ;
	if ( wordZ != (uint32_t) dwordProd ) {
	    carry = (wordZ < (uint32_t) dwordProd);
	}
	dwordProd =
	    (uint64_t) aPtr[indexWord( 4, 2 )] * wordB + (dwordProd>>32);
	wordZ = zPtr[indexWord( 5, 2 )] + (uint32_t) dwordProd + carry;
	zPtr[indexWord( 5, 2 )] = wordZ;
	if ( wordZ != (uint32_t) dwordProd ) {
	    carry = (wordZ < (uint32_t) dwordProd);
	}
	dwordProd =
	    (uint64_t) aPtr[indexWord( 4, 3 )] * wordB + (dwordProd>>32);
	wordZ = zPtr[indexWord( 5, 3 )] + (uint32_t) dwordProd + carry;
	zPtr[indexWord( 5, 3 )] = wordZ;
	if ( wordZ != (uint32_t) dwordProd ) {
	    carry = (wordZ < (uint32_t) dwordProd);
	}
	zPtr[indexWord( 5, 4 )] = (dwordProd>>32) + carry;
    } while ( zPtr != lastZPtr );
}

float128_t
softfloat_roundPackToF128(
     bool sign,
     int_fast32_t exp,
     uint_fast64_t sig64,
     uint_fast64_t sig0,
     uint_fast64_t sigExtra
 )
{
    uint_fast8_t roundingMode;
    bool roundNearEven, doIncrement, isTiny;
    struct uint128_extra sig128Extra;
    uint_fast64_t uiZ64, uiZ0;
    struct uint128 sig128;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
	doIncrement =
	    (roundingMode
		 == (sign ? softfloat_round_min : softfloat_round_max))
		&& sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) exp ) {
	if ( exp < 0 ) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    isTiny =
		   (softfloat_detectTininess
			== softfloat_tininess_beforeRounding)
		|| (exp < -1)
		|| ! doIncrement
		|| softfloat_lt128(
		       sig64,
		       sig0,
		       UINT64_C( 0x0001FFFFFFFFFFFF ),
		       UINT64_C( 0xFFFFFFFFFFFFFFFF )
		   );
	    sig128Extra =
		softfloat_shiftRightJam128Extra( sig64, sig0, sigExtra, -exp );
	    sig64 = sig128Extra.v.v64;
	    sig0  = sig128Extra.v.v0;
	    sigExtra = sig128Extra.extra;
	    exp = 0;
	    if ( isTiny && sigExtra ) {
		softfloat_raiseFlags( softfloat_flag_underflow );
	    }
	    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
	    if (
		   ! roundNearEven
		&& (roundingMode != softfloat_round_near_maxMag)
	    ) {
		doIncrement =
		    (roundingMode
			 == (sign ? softfloat_round_min : softfloat_round_max))
			&& sigExtra;
	    }
	} else if (
	       (0x7FFD < exp)
	    || ((exp == 0x7FFD)
		    && softfloat_eq128(
			   sig64,
			   sig0,
			   UINT64_C( 0x0001FFFFFFFFFFFF ),
			   UINT64_C( 0xFFFFFFFFFFFFFFFF )
		       )
		    && doIncrement)
	) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    softfloat_raiseFlags(
		softfloat_flag_overflow | softfloat_flag_inexact );
	    if (
		   roundNearEven
		|| (roundingMode == softfloat_round_near_maxMag)
		|| (roundingMode
			== (sign ? softfloat_round_min : softfloat_round_max))
	    ) {
		uiZ64 = packToF128UI64( sign, 0x7FFF, 0 );
		uiZ0  = 0;
	    } else {
		uiZ64 =
		    packToF128UI64(
			sign, 0x7FFE, UINT64_C( 0x0000FFFFFFFFFFFF ) );
		uiZ0 = UINT64_C( 0xFFFFFFFFFFFFFFFF );
	    }
	    goto uiZ;
	}
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( sigExtra ) {
	softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
	if ( roundingMode == softfloat_round_odd ) {
	    sig0 |= 1;
	    goto packReturn;
	}
#endif
    }
    if ( doIncrement ) {
	sig128 = softfloat_add128( sig64, sig0, 0, 1 );
	sig64 = sig128.v64;
	sig0 =
	    sig128.v0
		& ~(uint64_t)
		       (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
			    & roundNearEven);
    } else {
	if ( ! (sig64 | sig0) ) exp = 0;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 packReturn:
    uiZ64 = packToF128UI64( sign, exp, sig64 );
    uiZ0  = sig0;
 uiZ:
    uZ.ui.v64 = uiZ64;
    uZ.ui.v0  = uiZ0;
    return uZ.f;
}

float128_t
 softfloat_normRoundPackToF128(
     bool sign, int_fast32_t exp, uint_fast64_t sig64, uint_fast64_t sig0 )
{
    int_fast8_t shiftDist;
    struct uint128 sig128;
    union ui128_f128 uZ;
    uint_fast64_t sigExtra;
    struct uint128_extra sig128Extra;

    if ( ! sig64 ) {
	exp -= 64;
	sig64 = sig0;
	sig0 = 0;
    }
    shiftDist = softfloat_countLeadingZeros64( sig64 ) - 15;
    exp -= shiftDist;
    if ( 0 <= shiftDist ) {
	if ( shiftDist ) {
	    sig128 = softfloat_shortShiftLeft128( sig64, sig0, shiftDist );
	    sig64 = sig128.v64;
	    sig0  = sig128.v0;
	}
	if ( (uint32_t) exp < 0x7FFD ) {
	    uZ.ui.v64 = packToF128UI64( sign, sig64 | sig0 ? exp : 0, sig64 );
	    uZ.ui.v0  = sig0;
	    return uZ.f;
	}
	sigExtra = 0;
    } else {
	sig128Extra =
	    softfloat_shortShiftRightJam128Extra( sig64, sig0, 0, -shiftDist );
	sig64 = sig128Extra.v.v64;
	sig0  = sig128Extra.v.v0;
	sigExtra = sig128Extra.extra;
    }
    return softfloat_roundPackToF128( sign, exp, sig64, sig0, sigExtra );
}

struct uint128
softfloat_propagateNaNF128UI(
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0
 )
{
    bool isSigNaNA, isSigNaNB;
    uint_fast64_t uiNonsigA64, uiNonsigB64, uiMagA64, uiMagB64;
    struct uint128 uiZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    isSigNaNA = softfloat_isSigNaNF128UI( uiA64, uiA0 );
    isSigNaNB = softfloat_isSigNaNF128UI( uiB64, uiB0 );
    /*------------------------------------------------------------------------
    | Make NaNs non-signaling.
    *------------------------------------------------------------------------*/
    uiNonsigA64 = uiA64 | UINT64_C( 0x0000800000000000 );
    uiNonsigB64 = uiB64 | UINT64_C( 0x0000800000000000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( isSigNaNA | isSigNaNB ) {
	softfloat_raiseFlags( softfloat_flag_invalid );
	if ( isSigNaNA ) {
	    if ( isSigNaNB ) goto returnLargerMag;
	    if ( isNaNF128UI( uiB64, uiB0 ) ) goto returnB;
	    goto returnA;
	} else {
	    if ( isNaNF128UI( uiA64, uiA0 ) ) goto returnA;
	    goto returnB;
	}
    }
 returnLargerMag:
    uiMagA64 = uiA64 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    uiMagB64 = uiB64 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    if ( uiMagA64 < uiMagB64 ) goto returnB;
    if ( uiMagB64 < uiMagA64 ) goto returnA;
    if ( uiA0 < uiB0 ) goto returnB;
    if ( uiB0 < uiA0 ) goto returnA;
    if ( uiNonsigA64 < uiNonsigB64 ) goto returnA;
 returnB:
    uiZ.v64 = uiNonsigB64;
    uiZ.v0  = uiB0;
    return uiZ;
 returnA:
    uiZ.v64 = uiNonsigA64;
    uiZ.v0  = uiA0;
    return uiZ;

}

void
f64_to_f128M( float64_t a, float128_t *zPtr )
{
    uint32_t *zWPtr;
    union ui64_f64 uA;
    uint64_t uiA;
    bool sign;
    int_fast16_t exp;
    uint64_t frac;
    struct commonNaN commonNaN;
    uint32_t uiZ96;
    struct exp16_sig64 normExpSig;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr = (uint32_t *) zPtr;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF64UI( uiA );
    exp  = expF64UI( uiA );
    frac = fracF64UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr[indexWord( 4, 0 )] = 0;
    if ( exp == 0x7FF ) {
	if ( frac ) {
	    softfloat_f64UIToCommonNaN( uiA, &commonNaN );
	    softfloat_commonNaNToF128M( &commonNaN, zWPtr );
	    return;
	}
	uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
	goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
	if ( ! frac ) {
	    uiZ96 = packToF128UI96( sign, 0, 0 );
	    goto uiZ;
	}
	normExpSig = softfloat_normSubnormalF64Sig( frac );
	exp = normExpSig.exp - 1;
	frac = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr[indexWord( 4, 1 )] = (uint32_t) frac<<28;
    frac >>= 4;
    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp + 0x3C00, frac>>32 );
    zWPtr[indexWord( 4, 2 )] = frac;
    return;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    zWPtr[indexWord( 4, 3 )] = uiZ96;
    zWPtr[indexWord( 4, 2 )] = 0;
    zWPtr[indexWord( 4, 1 )] = 0;

}

static float128_t
softfloat_addMagsF128(
     uint64_t uiA64,
     uint64_t uiA0,
     uint64_t uiB64,
     uint64_t uiB0,
     bool signZ
 )
{
    int32_t expA;
    struct uint128 sigA;
    int32_t expB;
    struct uint128 sigB;
    int32_t expDiff;
    struct uint128 uiZ, sigZ;
    int32_t expZ;
    uint64_t sigZExtra;
    struct uint128_extra sig128Extra;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    expDiff = expA - expB;
    if ( ! expDiff ) {
	if ( expA == 0x7FFF ) {
	    if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
	    uiZ.v64 = uiA64;
	    uiZ.v0  = uiA0;
	    goto uiZ;
	}
	sigZ = softfloat_add128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
	if ( ! expA ) {
	    uiZ.v64 = packToF128UI64( signZ, 0, sigZ.v64 );
	    uiZ.v0  = sigZ.v0;
	    goto uiZ;
	}
	expZ = expA;
	sigZ.v64 |= UINT64_C( 0x0002000000000000 );
	sigZExtra = 0;
	goto shiftRight1;
    }
    if ( expDiff < 0 ) {
	if ( expB == 0x7FFF ) {
	    if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	    uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
	    uiZ.v0  = 0;
	    goto uiZ;
	}
	expZ = expB;
	if ( expA ) {
	    sigA.v64 |= UINT64_C( 0x0001000000000000 );
	} else {
	    ++expDiff;
	    sigZExtra = 0;
	    if ( ! expDiff ) goto newlyAligned;
	}
	sig128Extra =
	    softfloat_shiftRightJam128Extra( sigA.v64, sigA.v0, 0, -expDiff );
	sigA = sig128Extra.v;
	sigZExtra = sig128Extra.extra;
    } else {
	if ( expA == 0x7FFF ) {
	    if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	    uiZ.v64 = uiA64;
	    uiZ.v0  = uiA0;
	    goto uiZ;
	}
	expZ = expA;
	if ( expB ) {
	    sigB.v64 |= UINT64_C( 0x0001000000000000 );
	} else {
	    --expDiff;
	    sigZExtra = 0;
	    if ( ! expDiff ) goto newlyAligned;
	}
	sig128Extra =
	    softfloat_shiftRightJam128Extra( sigB.v64, sigB.v0, 0, expDiff );
	sigB = sig128Extra.v;
	sigZExtra = sig128Extra.extra;
    }
 newlyAligned:
    sigZ =
	softfloat_add128(
	    sigA.v64 | UINT64_C( 0x0001000000000000 ),
	    sigA.v0,
	    sigB.v64,
	    sigB.v0
	);
    --expZ;
    if ( sigZ.v64 < UINT64_C( 0x0002000000000000 ) ) goto roundAndPack;
    ++expZ;
 shiftRight1:
    sig128Extra =
	softfloat_shortShiftRightJam128Extra(
	    sigZ.v64, sigZ.v0, sigZExtra, 1 );
    sigZ = sig128Extra.v;
    sigZExtra = sig128Extra.extra;
 roundAndPack:
    return
	softfloat_roundPackToF128( signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}

float128_t
softfloat_subMagsF128(
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    struct uint128 sigA;
    int_fast32_t expB;
    struct uint128 sigB, sigZ;
    int_fast32_t expDiff, expZ;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 4 );
    sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 4 );
    expDiff = expA - expB;
    if ( 0 < expDiff ) goto expABigger;
    if ( expDiff < 0 ) goto expBBigger;
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
	softfloat_raiseFlags( softfloat_flag_invalid );
	uiZ.v64 = defaultNaNF128UI64;
	uiZ.v0  = defaultNaNF128UI0;
	goto uiZ;
    }
    expZ = expA;
    if ( ! expZ ) expZ = 1;
    if ( sigB.v64 < sigA.v64 ) goto aBigger;
    if ( sigA.v64 < sigB.v64 ) goto bBigger;
    if ( sigB.v0 < sigA.v0 ) goto aBigger;
    if ( sigA.v0 < sigB.v0 ) goto bBigger;
    uiZ.v64 =
	packToF128UI64(
	    (softfloat_roundingMode == softfloat_round_min), 0, 0 );
    uiZ.v0 = 0;
    goto uiZ;
 expBBigger:
    if ( expB == 0x7FFF ) {
	if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	uiZ.v64 = packToF128UI64( signZ ^ 1, 0x7FFF, 0 );
	uiZ.v0  = 0;
	goto uiZ;
    }
    if ( expA ) {
	sigA.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
	++expDiff;
	if ( ! expDiff ) goto newlyAlignedBBigger;
    }
    sigA = softfloat_shiftRightJam128( sigA.v64, sigA.v0, -expDiff );
 newlyAlignedBBigger:
    expZ = expB;
    sigB.v64 |= UINT64_C( 0x0010000000000000 );
 bBigger:
    signZ = ! signZ;
    sigZ = softfloat_sub128( sigB.v64, sigB.v0, sigA.v64, sigA.v0 );
    goto normRoundPack;
 expABigger:
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	uiZ.v64 = uiA64;
	uiZ.v0  = uiA0;
	goto uiZ;
    }
    if ( expB ) {
	sigB.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
	--expDiff;
	if ( ! expDiff ) goto newlyAlignedABigger;
    }
    sigB = softfloat_shiftRightJam128( sigB.v64, sigB.v0, expDiff );
 newlyAlignedABigger:
    expZ = expA;
    sigA.v64 |= UINT64_C( 0x0010000000000000 );
 aBigger:
    sigZ = softfloat_sub128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
 normRoundPack:
    return softfloat_normRoundPackToF128( signZ, expZ - 5, sigZ.v64, sigZ.v0 );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}

void
f128M_add( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint64_t *aWPtr, *bWPtr;
    uint64_t uiA64, uiA0;
    int signA;
    uint64_t uiB64, uiB0;
    int signB;

    aWPtr = (const uint64_t *) aPtr;
    bWPtr = (const uint64_t *) bPtr;
    uiA64 = aWPtr[indexWord( 2, 1 )];
    uiA0  = aWPtr[indexWord( 2, 0 )];
    signA = signF128UI64( uiA64 );
    uiB64 = bWPtr[indexWord( 2, 1 )];
    uiB0  = bWPtr[indexWord( 2, 0 )];
    signB = signF128UI64( uiB64 );

    if ( signA == signB ) {
	*zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    } else {
	*zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    }
}

void
f128M_sub( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint64_t *aWPtr, *bWPtr;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    uint_fast64_t uiB64, uiB0;
    bool signB;

    aWPtr = (const uint64_t *) aPtr;
    bWPtr = (const uint64_t *) bPtr;
    uiA64 = aWPtr[indexWord( 2, 1 )];
    uiA0  = aWPtr[indexWord( 2, 0 )];
    signA = signF128UI64( uiA64 );
    uiB64 = bWPtr[indexWord( 2, 1 )];
    uiB0  = bWPtr[indexWord( 2, 0 )];
    signB = signF128UI64( uiB64 );
    if ( signA == signB ) {
	*zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    } else {
	*zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    }
}

void
f128M_mul( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint32_t *aWPtr, *bWPtr;
    uint32_t *zWPtr;
    uint32_t uiA96;
    int32_t expA;
    uint32_t uiB96;
    int32_t expB;
    bool signZ;
    const uint32_t *ptr;
    uint32_t uiZ96, sigA[4];
    uint_fast8_t shiftDist;
    uint32_t sigB[4];
    int32_t expZ;
    uint32_t sigProd[8], *extSigZPtr;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    aWPtr = (const uint32_t *) aPtr;
    bWPtr = (const uint32_t *) bPtr;
    zWPtr = (uint32_t *) zPtr;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uiA96 = aWPtr[indexWordHi( 4 )];
    expA = expF128UI96( uiA96 );
    uiB96 = bWPtr[indexWordHi( 4 )];
    expB = expF128UI96( uiB96 );
    signZ = signF128UI96( uiA96 ) ^ signF128UI96( uiB96 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
	if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
	ptr = aWPtr;
	if ( ! expA ) goto possiblyInvalid;
	if ( ! expB ) {
	    ptr = bWPtr;
     possiblyInvalid:
	    if (
		! fracF128UI96( ptr[indexWordHi( 4 )] )
		    && ! (ptr[indexWord( 4, 2 )] | ptr[indexWord( 4, 1 )]
			      | ptr[indexWord( 4, 0 )])
	    ) {
		softfloat_invalidF128M( zWPtr );
		return;
	    }
	}
	uiZ96 = packToF128UI96( signZ, 0x7FFF, 0 );
	goto uiZ96;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA ) {
	sigA[indexWordHi( 4 )] = fracF128UI96( uiA96 ) | 0x00010000;
	sigA[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
	sigA[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
	sigA[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
    } else {
	expA = softfloat_shiftNormSigF128M( aWPtr, 0, sigA );
	if ( expA == -128 ) goto zero;
    }
    if ( expB ) {
	sigB[indexWordHi( 4 )] = fracF128UI96( uiB96 ) | 0x00010000;
	sigB[indexWord( 4, 2 )] = bWPtr[indexWord( 4, 2 )];
	sigB[indexWord( 4, 1 )] = bWPtr[indexWord( 4, 1 )];
	sigB[indexWord( 4, 0 )] = bWPtr[indexWord( 4, 0 )];
    } else {
	expB = softfloat_shiftNormSigF128M( bWPtr, 0, sigB );
	if ( expB == -128 ) goto zero;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA + expB - 0x4000;
    softfloat_mul128MTo256M( sigA, sigB, sigProd );
    if (
	sigProd[indexWord( 8, 2 )]
	    || (sigProd[indexWord( 8, 1 )] | sigProd[indexWord( 8, 0 )])
    ) {
	sigProd[indexWord( 8, 3 )] |= 1;
    }
    extSigZPtr = &sigProd[indexMultiwordHi( 8, 5 )];
    shiftDist = 16;
    if ( extSigZPtr[indexWordHi( 5 )] & 2 ) {
	++expZ;
	shiftDist = 15;
    }
    softfloat_shortShiftLeft160M( extSigZPtr, shiftDist, extSigZPtr );
    softfloat_roundPackMToF128M( signZ, expZ, extSigZPtr, zWPtr );
    return;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ96 = packToF128UI96( signZ, 0, 0 );
 uiZ96:
    zWPtr[indexWordHi( 4 )] = uiZ96;
    zWPtr[indexWord( 4, 2 )] = 0;
    zWPtr[indexWord( 4, 1 )] = 0;
    zWPtr[indexWord( 4, 0 )] = 0;
}

#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat class methodsFor:'documentation'!

documentation
"
    QuadFloats represent rational numbers with limited precision
    and are mapped to IEEE quadruple precision format (128bit).
    If the underlying cpu supports them natively, the machine format (long double) is
    used. Otherwise, a software emulation is done, which is much slower.
    Thus only use them, if you really need the additional precision;
    if not, use Float (which are doubles) or LongFloats which usually have IEEE extended precision (80bit).

    QuadFloats give you definite 128 bit quadruple floats,
    thus, code using quadFloats is guaranteed to be portable from one architecture to another.

    Representation:
	    128bit quadruple IEEE floats (16bytes);
	    112 bit mantissa,
	    16 bit exponent,
	    34 decimal digits (approx.)

    On Sparc CPUs, this is a native supported type (long double) and fast;
    on x86 CPUs, this is emulated and slow.

    Mixed mode arithmetic:
	quadFloat op anyFloat    -> longFloat
	longFloat op complex     -> complex

    Range and precision of storage formats: see LimitedPrecisionReal >> documentation

    [author:]
	Claus Gittinger

    [see also:]
	Number
	Float ShortFloat LongFloat Fraction FixedPoint Integer Complex
	FloatArray DoubleArray
	https://en.wikipedia.org/wiki/Extended_precision
"
! !

!QuadFloat class methodsFor:'instance creation'!

basicNew
    "return a new quadFloat - here we return 0.0
     - QuadFloats are usually NOT created this way ...
     Its implemented here to allow things like binary store & load
     of quadFloats.
     (but it is not a good idea to store the bits of a float - the reader might have a
      totally different representation - so floats should be
      binary stored in a device independent format)."

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;

    if (sizeof(long double) == sizeof(float128_t)) {
	__qMKLFLOAT(newFloat, 0.0);   /* OBJECT ALLOCATION */
    } else {
	float128_t qf;
	f64_to_f128M(0.0, &qf);
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
    }
    RETURN (newFloat);
#endif /* SUPPORT_QUADFLOAT */
%}.
    self error:'QuadFloats not supported on this patform'

    "Created: / 06-06-2019 / 17:18:58 / Claus Gittinger"
!

fromFloat:aFloat
    "return a new quadFloat, given a float value"

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t f;

    if (__isFloatLike(aFloat)) {
	float64_t f = __floatVal(aFloat);
	float128_t qf;

	f64_to_f128M(f, &qf);
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
	RETURN (newFloat);
    }
#endif /* SUPPORT_QUADFLOAT */
%}.
    aFloat isFloat ifTrue:[
	self errorUnsupported
    ].
    ArgumentError raise

    "
     QuadFloat fromFloat:123.0
     123.0 asQuadFloat
     123 asQuadFloat
    "

    "Created: / 06-06-2019 / 18:01:03 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'coercing & converting'!

coerce:aNumber
    "convert the argument aNumber into an instance of the receiver's class and return it."

    ^ aNumber asQuadFloat.

    "Created: / 06-06-2019 / 16:51:01 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'constants'!

NaN
    "return a shortFloat which represents not-a-Number (i.e. an invalid number)"

    NaN isNil ifTrue:[
	NaN := super NaN
    ].
    ^ NaN

    "
     self NaN
    "

    "Created: / 06-06-2019 / 16:56:09 / Claus Gittinger"
!

e
    "return the constant e as quadFloat"

    E isNil ifTrue:[
	"/ eDigits has enough digits for 128bit IEEE quads
	"/ do not use as a literal constant here - we cannot depend on the underlying C-compiler here...
	E  := self readFrom:(Number eDigits)
    ].
    ^ E

    "Created: / 06-06-2019 / 17:01:54 / Claus Gittinger"
!

pi
    "return the constant pi as quadFloat"

    Pi isNil ifTrue:[
	"/ piDigits has enough digits for 128bit IEEE quads
	"/ do not use as a literal constant here - we cannot depend on the underlying C-compiler here...
	Pi  := self readFrom:(Number piDigits)
    ].
    ^ Pi

    "Created: / 06-06-2019 / 17:09:51 / Claus Gittinger"
! !

!QuadFloat methodsFor:'double dispatching'!

errorUnsupported
    self error:'QuadFloats not supported on this patform'
!

sumFromQuadFloat:aQuadFloat
%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_add( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
!

differenceFromQuadFloat:aQuadFloat
%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_sub( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
! !

!QuadFloat methodsFor:'arithmetic'!

* aNumber
    "return the product of the receiver and the argument."

    ^ aNumber productFromQuadFloat:self
!

+ aNumber
    "return the sum of the receiver and the argument, aNumber"

    ^ aNumber sumFromQuadFloat:self
!

- aNumber
    "return the difference of the receiver and the argument, aNumber"

    ^ aNumber differenceFromQuadFloat:self
!

/ aNumber
    "return the quotient of the receiver and the argument, aNumber"

    aNumber isZero ifTrue:[
	"
	 No, you shalt not divide by zero
	"
	^ ZeroDivide raiseRequestWith:thisContext.
    ].
    ^ aNumber quotientFromQuadFloat:self
!

rem: aNumber
    "return the floating point remainder of the receiver and the argument, aNumber"

    aNumber isZero ifTrue:[
	"
	 No, you shalt not divide by zero
	"
	^ ZeroDivide raiseRequestWith:thisContext.
    ].
    ^ aNumber remainderFromLongFloat:self
! !

!QuadFloat class methodsFor:'documentation'!

version_CVS
    ^ '$Header$'
! !