QuadFloat.st
author Claus Gittinger <cg@exept.de>
Thu, 06 Jun 2019 23:31:10 +0200
changeset 4997 55c76587b49c
parent 4994 246159e1784d
child 4998 1ed27f918576
permissions -rw-r--r--
*** empty log message ***

"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

LimitedPrecisionReal variableByteSubclass:#QuadFloat
	instanceVariableNames:''
	classVariableNames:'QuadFloatZero QuadFloatOne Pi E Epsilon NaN PositiveInfinity
		NegativeInfinity Halfpi HalfpiNegative'
	poolDictionaries:''
	category:'Magnitude-Numbers'
!

!QuadFloat primitiveDefinitions!

%{
#if __POINTER_SIZE__ == 8
# define SUPPORT_QUADFLOAT
#endif

#ifdef SUPPORT_QUADFLOAT
/*----------------------------------------------------------------------------
| Software floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
enum {
    softfloat_tininess_beforeRounding = 0,
    softfloat_tininess_afterRounding  = 1
};

/*----------------------------------------------------------------------------
| Software floating-point exception flags.
*----------------------------------------------------------------------------*/
enum {
    softfloat_flag_inexact   =  1,
    softfloat_flag_underflow =  2,
    softfloat_flag_overflow  =  4,
    softfloat_flag_infinite  =  8,
    softfloat_flag_invalid   = 16
};
/*----------------------------------------------------------------------------
| Software floating-point rounding mode.  (Mode "odd" is supported only if
| SoftFloat is compiled with macro 'SOFTFLOAT_ROUND_ODD' defined.)
*----------------------------------------------------------------------------*/
enum {
    softfloat_round_near_even   = 0,
    softfloat_round_minMag      = 1,
    softfloat_round_min         = 2,
    softfloat_round_max         = 3,
    softfloat_round_near_maxMag = 4,
    softfloat_round_odd         = 6
};
#define init_detectTininess softfloat_tininess_afterRounding

#if defined(LITTLEENDIAN) || defined(__LSB_FIRST__)
struct uint128          { uint64_t v0, v64; };
struct uint64_extra     { uint64_t extra, v; };
struct uint128_extra    { uint64_t extra; struct uint128 v; };
#else
struct uint128          { uint64_t v64, v0; };
struct uint64_extra     { uint64_t v, extra; };
struct uint128_extra    { struct uint128 v; uint64_t extra; };
#endif

typedef unsigned char bool;
typedef double float64_t;
union ui64_f64   { uint64_t ui; float64_t f; };
union ui128_f128 { struct uint128 ui; float128_t f; };

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 128-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF128UI96 0xFFFF8000
#define defaultNaNF128UI64 0
#define defaultNaNF128UI32 0
#define defaultNaNF128UI0  0

struct commonNaN {
    bool sign;
#ifdef LITTLEENDIAN
    uint64_t v0, v64;
#else
    uint64_t v64, v0;
#endif
};
struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };

#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat primitiveVariables!

%{
#ifdef SUPPORT_QUADFLOAT

uint_fast8_t softfloat_exceptionFlags;
uint_fast8_t softfloat_roundingMode = softfloat_round_near_even;
uint_fast8_t softfloat_detectTininess = init_detectTininess;
uint_fast8_t softfloat_exceptionFlags = 0;

const uint_least8_t softfloat_countLeadingZeros8[256] = {
    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat primitiveFunctions!

%{
#ifdef SUPPORT_QUADFLOAT

#if defined(LITTLEENDIAN) || defined(__LSB_FIRST__)
#define wordIncr 1
#define indexWord( total, n ) (n)
#define indexWordHi( total ) ((total) - 1)
#define indexWordLo( total ) 0
#define indexMultiword( total, m, n ) (n)
#define indexMultiwordHi( total, n ) ((total) - (n))
#define indexMultiwordLo( total, n ) 0
#define indexMultiwordHiBut( total, n ) (n)
#define indexMultiwordLoBut( total, n ) 0
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
#else
#define wordIncr -1
#define indexWord( total, n ) ((total) - 1 - (n))
#define indexWordHi( total ) 0
#define indexWordLo( total ) ((total) - 1)
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
#define indexMultiwordHi( total, n ) 0
#define indexMultiwordLo( total, n ) ((total) - (n))
#define indexMultiwordHiBut( total, n ) 0
#define indexMultiwordLoBut( total, n ) (n)
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
#endif

#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63))
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )

#define signF128UI64( a64 ) ((bool) ((uint64_t) (a64)>>63))
#define expF128UI64( a64 ) ((int_fast32_t) ((a64)>>48) & 0x7FFF)
#define fracF128UI64( a64 ) ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))
#define packToF128UI64( sign, exp, sig64 ) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<48) + (sig64))

#define signF128UI96( a96 ) ((bool) ((uint32_t) (a96)>>31))
#define expF128UI96( a96 ) ((int32_t) ((a96)>>16) & 0x7FFF)
#define fracF128UI96( a96 ) ((a96) & 0x0000FFFF)
#define packToF128UI96( sign, exp, sig96 ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<16) + (sig96))

#define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))

#if 0
static inline
void
 softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
{
    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
}
#else
# define softfloat_commonNaNToF128M( aPtr, zWPtr ) \
{ \
    (zWPtr)[indexWord( 4, 3 )] = defaultNaNF128UI96; \
    (zWPtr)[indexWord( 4, 2 )] = defaultNaNF128UI64; \
    (zWPtr)[indexWord( 4, 1 )] = defaultNaNF128UI32; \
    (zWPtr)[indexWord( 4, 0 )] = defaultNaNF128UI0; \
}
#endif

void
softfloat_raiseFlags( uint_fast8_t flags ) {
    softfloat_exceptionFlags |= flags;
}

bool
softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 == b64) && (a0 == b0);
}

bool
softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 ){
    return (a64 < b64) || ((a64 == b64) && (a0 < b0));
}

uint_fast8_t
softfloat_countLeadingZeros64( uint64_t a )
{
    uint_fast8_t count;
    uint32_t a32;

    count = 0;
    a32 = a>>32;
    if ( ! a32 ) {
	count = 32;
	a32 = a;
    }
    /*------------------------------------------------------------------------
    | From here, result is current count + count leading zeros of `a32'.
    *------------------------------------------------------------------------*/
    if ( a32 < 0x10000 ) {
	count += 16;
	a32 <<= 16;
    }
    if ( a32 < 0x1000000 ) {
	count += 8;
	a32 <<= 8;
    }
    count += softfloat_countLeadingZeros8[a32>>24];
    return count;
}

struct exp16_sig64
softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
    int_fast8_t shiftDist;
    struct exp16_sig64 z;

    shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
    z.exp = 1 - shiftDist;
    z.sig = sig<<shiftDist;
    return z;

}

struct uint128_extra
softfloat_shiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128_extra z;

    u8NegDist = -dist;
    if ( dist < 64 ) {
	z.v.v64 = a64>>dist;
	z.v.v0 = a64<<(u8NegDist & 63) | a0>>dist;
	z.extra = a0<<(u8NegDist & 63);
    } else {
	z.v.v64 = 0;
	if ( dist == 64 ) {
	    z.v.v0 = a64;
	    z.extra = a0;
	} else {
	    extra |= a0;
	    if ( dist < 128 ) {
		z.v.v0 = a64>>(dist & 63);
		z.extra = a64<<(u8NegDist & 63);
	    } else {
		z.v.v0 = 0;
		z.extra = (dist == 128) ? a64 : (a64 != 0);
	    }
	}
    }
    z.extra |= (extra != 0);
    return z;
}

struct uint128_extra
softfloat_shortShiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
{
    uint_fast8_t negDist = -dist;
    struct uint128_extra z;
    z.v.v64 = a64>>dist;
    z.v.v0 = a64<<(negDist & 63) | a0>>dist;
    z.extra = a0<<(negDist & 63) | (extra != 0);
    return z;
}

struct uint128
 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128 z;

    if ( dist < 64 ) {
	u8NegDist = -dist;
	z.v64 = a64>>dist;
	z.v0 =
	    a64<<(u8NegDist & 63) | a0>>dist
		| ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
    } else {
	z.v64 = 0;
	z.v0 =
	    (dist < 127)
		? a64>>(dist & 63)
		      | (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
			     != 0)
		: ((a64 | a0) != 0);
    }
    return z;
}

struct uint128
softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;
    z.v64 = a64<<dist | a0>>(-dist & 63);
    z.v0 = a0<<dist;
    return z;
}

struct uint128
softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 + b0;
    z.v64 = a64 + b64 + (z.v0 < a0);
    return z;
}

struct uint128
 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 - b0;
    z.v64 = a64 - b64;
    z.v64 -= (a0 < b0);
    return z;
}

float128_t
softfloat_roundPackToF128(
     bool sign,
     int_fast32_t exp,
     uint_fast64_t sig64,
     uint_fast64_t sig0,
     uint_fast64_t sigExtra
 )
{
    uint_fast8_t roundingMode;
    bool roundNearEven, doIncrement, isTiny;
    struct uint128_extra sig128Extra;
    uint_fast64_t uiZ64, uiZ0;
    struct uint128 sig128;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
	doIncrement =
	    (roundingMode
		 == (sign ? softfloat_round_min : softfloat_round_max))
		&& sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) exp ) {
	if ( exp < 0 ) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    isTiny =
		   (softfloat_detectTininess
			== softfloat_tininess_beforeRounding)
		|| (exp < -1)
		|| ! doIncrement
		|| softfloat_lt128(
		       sig64,
		       sig0,
		       UINT64_C( 0x0001FFFFFFFFFFFF ),
		       UINT64_C( 0xFFFFFFFFFFFFFFFF )
		   );
	    sig128Extra =
		softfloat_shiftRightJam128Extra( sig64, sig0, sigExtra, -exp );
	    sig64 = sig128Extra.v.v64;
	    sig0  = sig128Extra.v.v0;
	    sigExtra = sig128Extra.extra;
	    exp = 0;
	    if ( isTiny && sigExtra ) {
		softfloat_raiseFlags( softfloat_flag_underflow );
	    }
	    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
	    if (
		   ! roundNearEven
		&& (roundingMode != softfloat_round_near_maxMag)
	    ) {
		doIncrement =
		    (roundingMode
			 == (sign ? softfloat_round_min : softfloat_round_max))
			&& sigExtra;
	    }
	} else if (
	       (0x7FFD < exp)
	    || ((exp == 0x7FFD)
		    && softfloat_eq128(
			   sig64,
			   sig0,
			   UINT64_C( 0x0001FFFFFFFFFFFF ),
			   UINT64_C( 0xFFFFFFFFFFFFFFFF )
		       )
		    && doIncrement)
	) {
	    /*----------------------------------------------------------------
	    *----------------------------------------------------------------*/
	    softfloat_raiseFlags(
		softfloat_flag_overflow | softfloat_flag_inexact );
	    if (
		   roundNearEven
		|| (roundingMode == softfloat_round_near_maxMag)
		|| (roundingMode
			== (sign ? softfloat_round_min : softfloat_round_max))
	    ) {
		uiZ64 = packToF128UI64( sign, 0x7FFF, 0 );
		uiZ0  = 0;
	    } else {
		uiZ64 =
		    packToF128UI64(
			sign, 0x7FFE, UINT64_C( 0x0000FFFFFFFFFFFF ) );
		uiZ0 = UINT64_C( 0xFFFFFFFFFFFFFFFF );
	    }
	    goto uiZ;
	}
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( sigExtra ) {
	softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
	if ( roundingMode == softfloat_round_odd ) {
	    sig0 |= 1;
	    goto packReturn;
	}
#endif
    }
    if ( doIncrement ) {
	sig128 = softfloat_add128( sig64, sig0, 0, 1 );
	sig64 = sig128.v64;
	sig0 =
	    sig128.v0
		& ~(uint64_t)
		       (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
			    & roundNearEven);
    } else {
	if ( ! (sig64 | sig0) ) exp = 0;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 packReturn:
    uiZ64 = packToF128UI64( sign, exp, sig64 );
    uiZ0  = sig0;
 uiZ:
    uZ.ui.v64 = uiZ64;
    uZ.ui.v0  = uiZ0;
    return uZ.f;
}

float128_t
 softfloat_normRoundPackToF128(
     bool sign, int_fast32_t exp, uint_fast64_t sig64, uint_fast64_t sig0 )
{
    int_fast8_t shiftDist;
    struct uint128 sig128;
    union ui128_f128 uZ;
    uint_fast64_t sigExtra;
    struct uint128_extra sig128Extra;

    if ( ! sig64 ) {
	exp -= 64;
	sig64 = sig0;
	sig0 = 0;
    }
    shiftDist = softfloat_countLeadingZeros64( sig64 ) - 15;
    exp -= shiftDist;
    if ( 0 <= shiftDist ) {
	if ( shiftDist ) {
	    sig128 = softfloat_shortShiftLeft128( sig64, sig0, shiftDist );
	    sig64 = sig128.v64;
	    sig0  = sig128.v0;
	}
	if ( (uint32_t) exp < 0x7FFD ) {
	    uZ.ui.v64 = packToF128UI64( sign, sig64 | sig0 ? exp : 0, sig64 );
	    uZ.ui.v0  = sig0;
	    return uZ.f;
	}
	sigExtra = 0;
    } else {
	sig128Extra =
	    softfloat_shortShiftRightJam128Extra( sig64, sig0, 0, -shiftDist );
	sig64 = sig128Extra.v.v64;
	sig0  = sig128Extra.v.v0;
	sigExtra = sig128Extra.extra;
    }
    return softfloat_roundPackToF128( sign, exp, sig64, sig0, sigExtra );
}

struct uint128
softfloat_propagateNaNF128UI(
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0
 )
{
    bool isSigNaNA, isSigNaNB;
    uint_fast64_t uiNonsigA64, uiNonsigB64, uiMagA64, uiMagB64;
    struct uint128 uiZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    isSigNaNA = softfloat_isSigNaNF128UI( uiA64, uiA0 );
    isSigNaNB = softfloat_isSigNaNF128UI( uiB64, uiB0 );
    /*------------------------------------------------------------------------
    | Make NaNs non-signaling.
    *------------------------------------------------------------------------*/
    uiNonsigA64 = uiA64 | UINT64_C( 0x0000800000000000 );
    uiNonsigB64 = uiB64 | UINT64_C( 0x0000800000000000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( isSigNaNA | isSigNaNB ) {
	softfloat_raiseFlags( softfloat_flag_invalid );
	if ( isSigNaNA ) {
	    if ( isSigNaNB ) goto returnLargerMag;
	    if ( isNaNF128UI( uiB64, uiB0 ) ) goto returnB;
	    goto returnA;
	} else {
	    if ( isNaNF128UI( uiA64, uiA0 ) ) goto returnA;
	    goto returnB;
	}
    }
 returnLargerMag:
    uiMagA64 = uiA64 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    uiMagB64 = uiB64 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    if ( uiMagA64 < uiMagB64 ) goto returnB;
    if ( uiMagB64 < uiMagA64 ) goto returnA;
    if ( uiA0 < uiB0 ) goto returnB;
    if ( uiB0 < uiA0 ) goto returnA;
    if ( uiNonsigA64 < uiNonsigB64 ) goto returnA;
 returnB:
    uiZ.v64 = uiNonsigB64;
    uiZ.v0  = uiB0;
    return uiZ;
 returnA:
    uiZ.v64 = uiNonsigA64;
    uiZ.v0  = uiA0;
    return uiZ;

}

void
f64_to_f128M( float64_t a, float128_t *zPtr )
{
    uint32_t *zWPtr;
    union ui64_f64 uA;
    uint64_t uiA;
    bool sign;
    int_fast16_t exp;
    uint64_t frac;
    struct commonNaN commonNaN;
    uint32_t uiZ96;
    struct exp16_sig64 normExpSig;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr = (uint32_t *) zPtr;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF64UI( uiA );
    exp  = expF64UI( uiA );
    frac = fracF64UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr[indexWord( 4, 0 )] = 0;
    if ( exp == 0x7FF ) {
	if ( frac ) {
	    softfloat_f64UIToCommonNaN( uiA, &commonNaN );
	    softfloat_commonNaNToF128M( &commonNaN, zWPtr );
	    return;
	}
	uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
	goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
	if ( ! frac ) {
	    uiZ96 = packToF128UI96( sign, 0, 0 );
	    goto uiZ;
	}
	normExpSig = softfloat_normSubnormalF64Sig( frac );
	exp = normExpSig.exp - 1;
	frac = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    zWPtr[indexWord( 4, 1 )] = (uint32_t) frac<<28;
    frac >>= 4;
    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp + 0x3C00, frac>>32 );
    zWPtr[indexWord( 4, 2 )] = frac;
    return;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    zWPtr[indexWord( 4, 3 )] = uiZ96;
    zWPtr[indexWord( 4, 2 )] = 0;
    zWPtr[indexWord( 4, 1 )] = 0;

}

static float128_t
softfloat_addMagsF128(
     uint64_t uiA64,
     uint64_t uiA0,
     uint64_t uiB64,
     uint64_t uiB0,
     bool signZ
 )
{
    int32_t expA;
    struct uint128 sigA;
    int32_t expB;
    struct uint128 sigB;
    int32_t expDiff;
    struct uint128 uiZ, sigZ;
    int32_t expZ;
    uint64_t sigZExtra;
    struct uint128_extra sig128Extra;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    expDiff = expA - expB;
    if ( ! expDiff ) {
	if ( expA == 0x7FFF ) {
	    if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
	    uiZ.v64 = uiA64;
	    uiZ.v0  = uiA0;
	    goto uiZ;
	}
	sigZ = softfloat_add128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
	if ( ! expA ) {
	    uiZ.v64 = packToF128UI64( signZ, 0, sigZ.v64 );
	    uiZ.v0  = sigZ.v0;
	    goto uiZ;
	}
	expZ = expA;
	sigZ.v64 |= UINT64_C( 0x0002000000000000 );
	sigZExtra = 0;
	goto shiftRight1;
    }
    if ( expDiff < 0 ) {
	if ( expB == 0x7FFF ) {
	    if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	    uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
	    uiZ.v0  = 0;
	    goto uiZ;
	}
	expZ = expB;
	if ( expA ) {
	    sigA.v64 |= UINT64_C( 0x0001000000000000 );
	} else {
	    ++expDiff;
	    sigZExtra = 0;
	    if ( ! expDiff ) goto newlyAligned;
	}
	sig128Extra =
	    softfloat_shiftRightJam128Extra( sigA.v64, sigA.v0, 0, -expDiff );
	sigA = sig128Extra.v;
	sigZExtra = sig128Extra.extra;
    } else {
	if ( expA == 0x7FFF ) {
	    if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	    uiZ.v64 = uiA64;
	    uiZ.v0  = uiA0;
	    goto uiZ;
	}
	expZ = expA;
	if ( expB ) {
	    sigB.v64 |= UINT64_C( 0x0001000000000000 );
	} else {
	    --expDiff;
	    sigZExtra = 0;
	    if ( ! expDiff ) goto newlyAligned;
	}
	sig128Extra =
	    softfloat_shiftRightJam128Extra( sigB.v64, sigB.v0, 0, expDiff );
	sigB = sig128Extra.v;
	sigZExtra = sig128Extra.extra;
    }
 newlyAligned:
    sigZ =
	softfloat_add128(
	    sigA.v64 | UINT64_C( 0x0001000000000000 ),
	    sigA.v0,
	    sigB.v64,
	    sigB.v0
	);
    --expZ;
    if ( sigZ.v64 < UINT64_C( 0x0002000000000000 ) ) goto roundAndPack;
    ++expZ;
 shiftRight1:
    sig128Extra =
	softfloat_shortShiftRightJam128Extra(
	    sigZ.v64, sigZ.v0, sigZExtra, 1 );
    sigZ = sig128Extra.v;
    sigZExtra = sig128Extra.extra;
 roundAndPack:
    return
	softfloat_roundPackToF128( signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}

float128_t
softfloat_subMagsF128(
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    struct uint128 sigA;
    int_fast32_t expB;
    struct uint128 sigB, sigZ;
    int_fast32_t expDiff, expZ;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 4 );
    sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 4 );
    expDiff = expA - expB;
    if ( 0 < expDiff ) goto expABigger;
    if ( expDiff < 0 ) goto expBBigger;
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
	softfloat_raiseFlags( softfloat_flag_invalid );
	uiZ.v64 = defaultNaNF128UI64;
	uiZ.v0  = defaultNaNF128UI0;
	goto uiZ;
    }
    expZ = expA;
    if ( ! expZ ) expZ = 1;
    if ( sigB.v64 < sigA.v64 ) goto aBigger;
    if ( sigA.v64 < sigB.v64 ) goto bBigger;
    if ( sigB.v0 < sigA.v0 ) goto aBigger;
    if ( sigA.v0 < sigB.v0 ) goto bBigger;
    uiZ.v64 =
	packToF128UI64(
	    (softfloat_roundingMode == softfloat_round_min), 0, 0 );
    uiZ.v0 = 0;
    goto uiZ;
 expBBigger:
    if ( expB == 0x7FFF ) {
	if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
	uiZ.v64 = packToF128UI64( signZ ^ 1, 0x7FFF, 0 );
	uiZ.v0  = 0;
	goto uiZ;
    }
    if ( expA ) {
	sigA.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
	++expDiff;
	if ( ! expDiff ) goto newlyAlignedBBigger;
    }
    sigA = softfloat_shiftRightJam128( sigA.v64, sigA.v0, -expDiff );
 newlyAlignedBBigger:
    expZ = expB;
    sigB.v64 |= UINT64_C( 0x0010000000000000 );
 bBigger:
    signZ = ! signZ;
    sigZ = softfloat_sub128( sigB.v64, sigB.v0, sigA.v64, sigA.v0 );
    goto normRoundPack;
 expABigger:
    if ( expA == 0x7FFF ) {
	if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
	uiZ.v64 = uiA64;
	uiZ.v0  = uiA0;
	goto uiZ;
    }
    if ( expB ) {
	sigB.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
	--expDiff;
	if ( ! expDiff ) goto newlyAlignedABigger;
    }
    sigB = softfloat_shiftRightJam128( sigB.v64, sigB.v0, expDiff );
 newlyAlignedABigger:
    expZ = expA;
    sigA.v64 |= UINT64_C( 0x0010000000000000 );
 aBigger:
    sigZ = softfloat_sub128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
 normRoundPack:
    return softfloat_normRoundPackToF128( signZ, expZ - 5, sigZ.v64, sigZ.v0 );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}

void
f128M_add( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
{
    const uint64_t *aWPtr, *bWPtr;
    uint64_t uiA64, uiA0;
    int signA;
    uint64_t uiB64, uiB0;
    int signB;

    aWPtr = (const uint64_t *) aPtr;
    bWPtr = (const uint64_t *) bPtr;
    uiA64 = aWPtr[indexWord( 2, 1 )];
    uiA0  = aWPtr[indexWord( 2, 0 )];
    signA = signF128UI64( uiA64 );
    uiB64 = bWPtr[indexWord( 2, 1 )];
    uiB0  = bWPtr[indexWord( 2, 0 )];
    signB = signF128UI64( uiB64 );

    if ( signA == signB ) {
	*zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    } else {
	*zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
    }

}
#endif // SUPPORT_QUADFLOAT

%}
! !

!QuadFloat class methodsFor:'documentation'!

documentation
"
    QuadFloats represent rational numbers with limited precision
    and are mapped to IEEE quadruple precision format (128bit).
    If the underlying cpu supports them natively, the machine format (long double) is
    used. Otherwise, a software emulation is done, which is much slower.
    Thus only use them, if you really need the additional precision;
    if not, use Float (which are doubles) or LongFloats which usually have IEEE extended precision (80bit).

    QuadFloats give you definite 128 bit quadruple floats,
    thus, code using quadFloats is guaranteed to be portable from one architecture to another.

    Representation:
	    128bit quadruple IEEE floats (16bytes);
	    112 bit mantissa,
	    16 bit exponent,
	    34 decimal digits (approx.)

    On Sparc CPUs, this is a native supported type (long double) and fast;
    on x86 CPUs, this is emulated and slow.

    Mixed mode arithmetic:
	quadFloat op anyFloat    -> longFloat
	longFloat op complex     -> complex

    Range and precision of storage formats: see LimitedPrecisionReal >> documentation

    [author:]
	Claus Gittinger

    [see also:]
	Number
	Float ShortFloat LongFloat Fraction FixedPoint Integer Complex
	FloatArray DoubleArray
	https://en.wikipedia.org/wiki/Extended_precision
"
! !

!QuadFloat class methodsFor:'instance creation'!

basicNew
    "return a new quadFloat - here we return 0.0
     - QuadFloats are usually NOT created this way ...
     Its implemented here to allow things like binary store & load
     of quadFloats.
     (but it is not a good idea to store the bits of a float - the reader might have a
      totally different representation - so floats should be
      binary stored in a device independent format)."

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;

    if (sizeof(long double) == sizeof(float128_t)) {
	__qMKLFLOAT(newFloat, 0.0);   /* OBJECT ALLOCATION */
    } else {
	float128_t qf;
	f64_to_f128M(0.0, &qf);
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
    }
    RETURN (newFloat);
#endif /* SUPPORT_QUADFLOAT */
%}.
    self error:'QuadFloats not supported on this patform'

    "Created: / 06-06-2019 / 17:18:58 / Claus Gittinger"
!

fromFloat:aFloat
    "return a new quadFloat, given a float value"

%{  /* NOCONTEXT */
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t f;

    if (__isFloatLike(aFloat)) {
	float64_t f = __floatVal(aFloat);
	float128_t qf;

	f64_to_f128M(f, &qf);
	__qMKQFLOAT(newFloat, qf);   /* OBJECT ALLOCATION */
	RETURN (newFloat);
    }
#endif /* SUPPORT_QUADFLOAT */
%}.
    aFloat isFloat ifTrue:[
	self errorUnsupported
    ].
    ArgumentError raise

    "
     QuadFloat fromFloat:123.0
     123.0 asQuadFloat
     123 asQuadFloat
    "

    "Created: / 06-06-2019 / 18:01:03 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'coercing & converting'!

coerce:aNumber
    "convert the argument aNumber into an instance of the receiver's class and return it."

    ^ aNumber asQuadFloat.

    "Created: / 06-06-2019 / 16:51:01 / Claus Gittinger"
! !

!QuadFloat class methodsFor:'constants'!

NaN
    "return a shortFloat which represents not-a-Number (i.e. an invalid number)"

    NaN isNil ifTrue:[
	NaN := super NaN
    ].
    ^ NaN

    "
     self NaN
    "

    "Created: / 06-06-2019 / 16:56:09 / Claus Gittinger"
!

e
    "return the constant e as quadFloat"

    E isNil ifTrue:[
	"/ eDigits has enough digits for 128bit IEEE quads
	"/ do not use as a literal constant here - we cannot depend on the underlying C-compiler here...
	E  := self readFrom:(Number eDigits)
    ].
    ^ E

    "Created: / 06-06-2019 / 17:01:54 / Claus Gittinger"
!

pi
    "return the constant pi as quadFloat"

    Pi isNil ifTrue:[
	"/ piDigits has enough digits for 128bit IEEE quads
	"/ do not use as a literal constant here - we cannot depend on the underlying C-compiler here...
	Pi  := self readFrom:(Number piDigits)
    ].
    ^ Pi

    "Created: / 06-06-2019 / 17:09:51 / Claus Gittinger"
! !

!QuadFloat methodsFor:'double dispatching'!

errorUnsupported
    self error:'QuadFloats not supported on this patform'
!

sumFromQuadFloat:aQuadFloat
%{
#ifdef SUPPORT_QUADFLOAT
    OBJ newFloat;
    float128_t result, myVal, argVal;

    myVal = __quadFloatVal(self);
    argVal = __quadFloatVal(aQuadFloat);
    f128M_add( &myVal, &argVal, &result );
    __qMKQFLOAT(newFloat, result);
    RETURN ( newFloat );
#endif // SUPPORT_QUADFLOAT
%}.
    self errorUnsupported
! !

!QuadFloat methodsFor:'arithmetic'!

* aNumber
    "return the product of the receiver and the argument."

    ^ aNumber productFromQuadFloat:self
!

+ aNumber
    "return the sum of the receiver and the argument, aNumber"

    ^ aNumber sumFromQuadFloat:self
!

- aNumber
    "return the difference of the receiver and the argument, aNumber"

    ^ aNumber differenceFromQuadFloat:self
!

/ aNumber
    "return the quotient of the receiver and the argument, aNumber"

    aNumber isZero ifTrue:[
	"
	 No, you shalt not divide by zero
	"
	^ ZeroDivide raiseRequestWith:thisContext.
    ].
    ^ aNumber quotientFromQuadFloat:self
!

rem: aNumber
    "return the floating point remainder of the receiver and the argument, aNumber"

    aNumber isZero ifTrue:[
	"
	 No, you shalt not divide by zero
	"
	^ ZeroDivide raiseRequestWith:thisContext.
    ].
    ^ aNumber remainderFromLongFloat:self
! !

!QuadFloat class methodsFor:'documentation'!

version_CVS
    ^ '$Header$'
! !