--- a/HalfFloatArray.st Tue Mar 19 11:31:44 2019 +0100
+++ b/HalfFloatArray.st Tue Mar 19 14:07:46 2019 +0100
@@ -26,52 +26,68 @@
typedef unsigned short uint16;
typedef unsigned int uint32;
+#if defined(__GNUC__) || defined(__CLANG__) || defined(__MINGW__)
+inline float
+XMConvertHalfToFloat(HALF Value) {
+ __m128i V1 = _mm_cvtsi32_si128( static_cast<uint32_t>(Value) );
+ __m128 V2 = _mm_cvtph_ps( V1 );
+ return _mm_cvtss_f32( V2 );
+}
+
+inline HALF
+XMConvertFloatToHalf(float Value) {
+ __m128 V1 = _mm_set_ss( Value );
+ __m128i V2 = _mm_cvtps_ph( V1, 0 );
+ return static_cast<HALF>( _mm_cvtsi128_si32(V2) );
+}
+#endif
+
//
// convert a halffloat (16-bit float) to a float
//
float
__STX_halffloat_to_float(halffloat h) {
- int e;
- uint16 hs, he, hm;
- uint32 xs, xe, xm;
- int32 xes;
- union {
- uint32 u32;
- float f32;
- } u;
+ int e;
+ uint16 hs, he, hm;
+ uint32 xs, xe, xm;
+ int32 xes;
+ union {
+ uint32 u32;
+ float f32;
+ } u;
- if( (h & 0x7FFFu) == 0 ) { // Signed zero
- u.u32 = ((uint32) h) << 16; // Return the signed zero
- } else { // Not zero
- hs = h & 0x8000u; // Pick off sign bit
- he = h & 0x7C00u; // Pick off exponent bits
- hm = h & 0x03FFu; // Pick off mantissa bits
- if( he == 0 ) { // Denormal will convert to normalized
- e = -1; // The following loop figures out how much extra to adjust the exponent
- do {
- e++;
- hm <<= 1;
- } while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
- xs = ((uint32) hs) << 16; // Sign bit
- xes = ((uint32) (he >> 10)) - 15 + 127 - e; // Exponent unbias the halfp, then bias the single
- xe = (uint32) (xes << 23); // Exponent
- xm = ((uint32) (hm & 0x03FFu)) << 13; // Mantissa
- u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
- } else if( he == 0x7C00u ) { // Inf or NaN (all the exponent bits are set)
- if( hm == 0 ) { // If mantissa is zero ...
- u.u32 = (((uint32) hs) << 16) | ((uint32) 0x7F800000u); // Signed Inf
- } else {
- u.u32 = (uint32) 0xFFC00000u; // NaN, only 1st mantissa bit set
- }
- } else { // Normalized number
- xs = ((uint32) hs) << 16; // Sign bit
- xes = ((uint32) (he >> 10)) - 15 + 127; // Exponent unbias the halfp, then bias the single
- xe = (uint32) (xes << 23); // Exponent
- xm = ((uint32) hm) << 13; // Mantissa
- u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
- }
- }
- return u.f32;
+ if( (h & 0x7FFFu) == 0 ) { // Signed zero
+ u.u32 = ((uint32) h) << 16; // Return the signed zero
+ } else { // Not zero
+ hs = h & 0x8000u; // Pick off sign bit
+ he = h & 0x7C00u; // Pick off exponent bits
+ hm = h & 0x03FFu; // Pick off mantissa bits
+ if( he == 0 ) { // Denormal will convert to normalized
+ e = -1; // The following loop figures out how much extra to adjust the exponent
+ do {
+ e++;
+ hm <<= 1;
+ } while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
+ xs = ((uint32) hs) << 16; // Sign bit
+ xes = ((uint32) (he >> 10)) - 15 + 127 - e; // Exponent unbias the halfp, then bias the single
+ xe = (uint32) (xes << 23); // Exponent
+ xm = ((uint32) (hm & 0x03FFu)) << 13; // Mantissa
+ u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
+ } else if( he == 0x7C00u ) { // Inf or NaN (all the exponent bits are set)
+ if( hm == 0 ) { // If mantissa is zero ...
+ u.u32 = (((uint32) hs) << 16) | ((uint32) 0x7F800000u); // Signed Inf
+ } else {
+ u.u32 = (uint32) 0xFFC00000u; // NaN, only 1st mantissa bit set
+ }
+ } else { // Normalized number
+ xs = ((uint32) hs) << 16; // Sign bit
+ xes = ((uint32) (he >> 10)) - 15 + 127; // Exponent unbias the halfp, then bias the single
+ xe = (uint32) (xes << 23); // Exponent
+ xm = ((uint32) hm) << 13; // Mantissa
+ u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
+ }
+ }
+ return u.f32;
}
//
@@ -79,57 +95,57 @@
//
halffloat
__STX_float_to_halffloat(float f32) {
- uint16 hs, he, hm;
- uint32 x, xs, xe, xm;
- int hes;
- union {
- uint32 u32;
- float f32;
- } u;
- halffloat h;
+ uint16 hs, he, hm;
+ uint32 x, xs, xe, xm;
+ int hes;
+ union {
+ uint32 u32;
+ float f32;
+ } u;
+ halffloat h;
- u.f32 = f32;
- x = u.u32;
- if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero
- h = (uint16) (x >> 16); // Return the signed zero
- } else { // Not zero
- xs = x & 0x80000000u; // Pick off sign bit
- xe = x & 0x7F800000u; // Pick off exponent bits
- xm = x & 0x007FFFFFu; // Pick off mantissa bits
- if( xe == 0 ) { // Denormal will underflow, return a signed zero
- h = (uint16) (xs >> 16);
- } else if( xe == 0x7F800000u ) { // Inf or NaN (all the exponent bits are set)
- if( xm == 0 ) { // If mantissa is zero ...
- h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
- } else {
- h = (uint16) 0xFE00u; // NaN, only 1st mantissa bit set
- }
- } else { // Normalized number
- hs = (uint16) (xs >> 16); // Sign bit
- hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
- if( hes >= 0x1F ) { // Overflow
- h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
- } else if( hes <= 0 ) { // Underflow
- if( (14 - hes) > 24 ) { // Mantissa shifted all the way off & no rounding possibility
- hm = (uint16) 0u; // Set mantissa to zero
- } else {
- xm |= 0x00800000u; // Add the hidden leading bit
- hm = (uint16) (xm >> (14 - hes)); // Mantissa
- if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
- hm += (uint16) 1u; // Round, might overflow into exp bit, but this is OK
- }
- h = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
- } else {
- he = (uint16) (hes << 10); // Exponent
- hm = (uint16) (xm >> 13); // Mantissa
- if( xm & 0x00001000u ) // Check for rounding
- h = (hs | he | hm) + (uint16) 1u; // Round, might overflow to inf, this is OK
- else
- h = (hs | he | hm); // No rounding
- }
- }
- }
- return h;
+ u.f32 = f32;
+ x = u.u32;
+ if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero
+ h = (uint16) (x >> 16); // Return the signed zero
+ } else { // Not zero
+ xs = x & 0x80000000u; // Pick off sign bit
+ xe = x & 0x7F800000u; // Pick off exponent bits
+ xm = x & 0x007FFFFFu; // Pick off mantissa bits
+ if( xe == 0 ) { // Denormal will underflow, return a signed zero
+ h = (uint16) (xs >> 16);
+ } else if( xe == 0x7F800000u ) { // Inf or NaN (all the exponent bits are set)
+ if( xm == 0 ) { // If mantissa is zero ...
+ h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
+ } else {
+ h = (uint16) 0xFE00u; // NaN, only 1st mantissa bit set
+ }
+ } else { // Normalized number
+ hs = (uint16) (xs >> 16); // Sign bit
+ hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
+ if( hes >= 0x1F ) { // Overflow
+ h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
+ } else if( hes <= 0 ) { // Underflow
+ if( (14 - hes) > 24 ) { // Mantissa shifted all the way off & no rounding possibility
+ hm = (uint16) 0u; // Set mantissa to zero
+ } else {
+ xm |= 0x00800000u; // Add the hidden leading bit
+ hm = (uint16) (xm >> (14 - hes)); // Mantissa
+ if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
+ hm += (uint16) 1u; // Round, might overflow into exp bit, but this is OK
+ }
+ h = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
+ } else {
+ he = (uint16) (hes << 10); // Exponent
+ hm = (uint16) (xm >> 13); // Mantissa
+ if( xm & 0x00001000u ) // Check for rounding
+ h = (hs | he | hm) + (uint16) 1u; // Round, might overflow to inf, this is OK
+ else
+ h = (hs | he | hm); // No rounding
+ }
+ }
+ }
+ return h;
}
%}