--- a/HalfFloatArray.st	Tue Mar 19 11:31:44 2019 +0100
+++ b/HalfFloatArray.st	Tue Mar 19 14:07:46 2019 +0100
@@ -26,52 +26,68 @@
 typedef unsigned short uint16;
 typedef unsigned int   uint32;
 
+#if defined(__GNUC__) || defined(__CLANG__) || defined(__MINGW__)
+inline float 
+XMConvertHalfToFloat(HALF Value) {
+    __m128i V1 = _mm_cvtsi32_si128( static_cast<uint32_t>(Value) );
+    __m128 V2 = _mm_cvtph_ps( V1 );
+    return _mm_cvtss_f32( V2 );
+}
+
+inline HALF 
+XMConvertFloatToHalf(float Value) {
+    __m128 V1 = _mm_set_ss( Value );
+    __m128i V2 = _mm_cvtps_ph( V1, 0 );
+    return static_cast<HALF>( _mm_cvtsi128_si32(V2) );
+}
+#endif
+
 //
 // convert a halffloat (16-bit float) to a float
 //
 float
 __STX_halffloat_to_float(halffloat h) {
-	int e;
-	uint16 hs, he, hm;
-	uint32 xs, xe, xm;
-	int32 xes;
-	union {
-	    uint32 u32;
-	    float f32;
-	} u;
+        int e;
+        uint16 hs, he, hm;
+        uint32 xs, xe, xm;
+        int32 xes;
+        union {
+            uint32 u32;
+            float f32;
+        } u;
 
-	if( (h & 0x7FFFu) == 0 ) {  // Signed zero
-	    u.u32 = ((uint32) h) << 16;  // Return the signed zero
-	} else { // Not zero
-	    hs = h & 0x8000u;  // Pick off sign bit
-	    he = h & 0x7C00u;  // Pick off exponent bits
-	    hm = h & 0x03FFu;  // Pick off mantissa bits
-	    if( he == 0 ) {  // Denormal will convert to normalized
-		e = -1; // The following loop figures out how much extra to adjust the exponent
-		do {
-		    e++;
-		    hm <<= 1;
-		} while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
-		xs = ((uint32) hs) << 16; // Sign bit
-		xes = ((uint32) (he >> 10)) - 15 + 127 - e; // Exponent unbias the halfp, then bias the single
-		xe = (uint32) (xes << 23); // Exponent
-		xm = ((uint32) (hm & 0x03FFu)) << 13; // Mantissa
-		u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
-	    } else if( he == 0x7C00u ) {  // Inf or NaN (all the exponent bits are set)
-		if( hm == 0 ) { // If mantissa is zero ...
-		    u.u32 = (((uint32) hs) << 16) | ((uint32) 0x7F800000u); // Signed Inf
-		} else {
-		    u.u32 = (uint32) 0xFFC00000u; // NaN, only 1st mantissa bit set
-		}
-	    } else { // Normalized number
-		xs = ((uint32) hs) << 16; // Sign bit
-		xes = ((uint32) (he >> 10)) - 15 + 127; // Exponent unbias the halfp, then bias the single
-		xe = (uint32) (xes << 23); // Exponent
-		xm = ((uint32) hm) << 13; // Mantissa
-		u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
-	    }
-	}
-	return u.f32;
+        if( (h & 0x7FFFu) == 0 ) {  // Signed zero
+            u.u32 = ((uint32) h) << 16;  // Return the signed zero
+        } else { // Not zero
+            hs = h & 0x8000u;  // Pick off sign bit
+            he = h & 0x7C00u;  // Pick off exponent bits
+            hm = h & 0x03FFu;  // Pick off mantissa bits
+            if( he == 0 ) {  // Denormal will convert to normalized
+                e = -1; // The following loop figures out how much extra to adjust the exponent
+                do {
+                    e++;
+                    hm <<= 1;
+                } while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
+                xs = ((uint32) hs) << 16; // Sign bit
+                xes = ((uint32) (he >> 10)) - 15 + 127 - e; // Exponent unbias the halfp, then bias the single
+                xe = (uint32) (xes << 23); // Exponent
+                xm = ((uint32) (hm & 0x03FFu)) << 13; // Mantissa
+                u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
+            } else if( he == 0x7C00u ) {  // Inf or NaN (all the exponent bits are set)
+                if( hm == 0 ) { // If mantissa is zero ...
+                    u.u32 = (((uint32) hs) << 16) | ((uint32) 0x7F800000u); // Signed Inf
+                } else {
+                    u.u32 = (uint32) 0xFFC00000u; // NaN, only 1st mantissa bit set
+                }
+            } else { // Normalized number
+                xs = ((uint32) hs) << 16; // Sign bit
+                xes = ((uint32) (he >> 10)) - 15 + 127; // Exponent unbias the halfp, then bias the single
+                xe = (uint32) (xes << 23); // Exponent
+                xm = ((uint32) hm) << 13; // Mantissa
+                u.u32 = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
+            }
+        }
+        return u.f32;
 }
 
 //
@@ -79,57 +95,57 @@
 //
 halffloat
 __STX_float_to_halffloat(float f32) {
-	uint16    hs, he, hm;
-	uint32 x, xs, xe, xm;
-	int hes;
-	union {
-	    uint32 u32;
-	    float f32;
-	} u;
-	halffloat h;
+        uint16    hs, he, hm;
+        uint32 x, xs, xe, xm;
+        int hes;
+        union {
+            uint32 u32;
+            float f32;
+        } u;
+        halffloat h;
 
-	u.f32 = f32;
-	x = u.u32;
-	if( (x & 0x7FFFFFFFu) == 0 ) {  // Signed zero
-	    h = (uint16) (x >> 16);  // Return the signed zero
-	} else { // Not zero
-	    xs = x & 0x80000000u;  // Pick off sign bit
-	    xe = x & 0x7F800000u;  // Pick off exponent bits
-	    xm = x & 0x007FFFFFu;  // Pick off mantissa bits
-	    if( xe == 0 ) {  // Denormal will underflow, return a signed zero
-		h = (uint16) (xs >> 16);
-	    } else if( xe == 0x7F800000u ) {  // Inf or NaN (all the exponent bits are set)
-		if( xm == 0 ) { // If mantissa is zero ...
-		    h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
-		} else {
-		    h = (uint16) 0xFE00u; // NaN, only 1st mantissa bit set
-		}
-	    } else { // Normalized number
-		hs = (uint16) (xs >> 16); // Sign bit
-		hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
-		if( hes >= 0x1F ) {  // Overflow
-		    h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
-		} else if( hes <= 0 ) {  // Underflow
-		    if( (14 - hes) > 24 ) {  // Mantissa shifted all the way off & no rounding possibility
-			hm = (uint16) 0u;  // Set mantissa to zero
-		    } else {
-			xm |= 0x00800000u;  // Add the hidden leading bit
-			hm = (uint16) (xm >> (14 - hes)); // Mantissa
-			if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
-			    hm += (uint16) 1u; // Round, might overflow into exp bit, but this is OK
-		    }
-		    h = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
-		} else {
-		    he = (uint16) (hes << 10); // Exponent
-		    hm = (uint16) (xm >> 13); // Mantissa
-		    if( xm & 0x00001000u ) // Check for rounding
-			h = (hs | he | hm) + (uint16) 1u; // Round, might overflow to inf, this is OK
-		    else
-			h = (hs | he | hm);  // No rounding
-		}
-	    }
-	}
-	return h;
+        u.f32 = f32;
+        x = u.u32;
+        if( (x & 0x7FFFFFFFu) == 0 ) {  // Signed zero
+            h = (uint16) (x >> 16);  // Return the signed zero
+        } else { // Not zero
+            xs = x & 0x80000000u;  // Pick off sign bit
+            xe = x & 0x7F800000u;  // Pick off exponent bits
+            xm = x & 0x007FFFFFu;  // Pick off mantissa bits
+            if( xe == 0 ) {  // Denormal will underflow, return a signed zero
+                h = (uint16) (xs >> 16);
+            } else if( xe == 0x7F800000u ) {  // Inf or NaN (all the exponent bits are set)
+                if( xm == 0 ) { // If mantissa is zero ...
+                    h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
+                } else {
+                    h = (uint16) 0xFE00u; // NaN, only 1st mantissa bit set
+                }
+            } else { // Normalized number
+                hs = (uint16) (xs >> 16); // Sign bit
+                hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
+                if( hes >= 0x1F ) {  // Overflow
+                    h = (uint16) ((xs >> 16) | 0x7C00u); // Signed Inf
+                } else if( hes <= 0 ) {  // Underflow
+                    if( (14 - hes) > 24 ) {  // Mantissa shifted all the way off & no rounding possibility
+                        hm = (uint16) 0u;  // Set mantissa to zero
+                    } else {
+                        xm |= 0x00800000u;  // Add the hidden leading bit
+                        hm = (uint16) (xm >> (14 - hes)); // Mantissa
+                        if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
+                            hm += (uint16) 1u; // Round, might overflow into exp bit, but this is OK
+                    }
+                    h = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
+                } else {
+                    he = (uint16) (hes << 10); // Exponent
+                    hm = (uint16) (xm >> 13); // Mantissa
+                    if( xm & 0x00001000u ) // Check for rounding
+                        h = (hs | he | hm) + (uint16) 1u; // Round, might overflow to inf, this is OK
+                    else
+                        h = (hs | he | hm);  // No rounding
+                }
+            }
+        }
+        return h;
 }
 
 %}
changeset 4888	1b19c584754c
parent 4835	8f8949b8bac9
child 4889	7d32114f8359