hg/stx-libbasic2: comparison QDouble.st

equal deleted inserted replaced

-:0a320155d78a
+:879309cae427
 # ifndef NAN
 #  include <bits/nan.h>
 # endif
 #endif
+#if defined(__x86__) || defined(__x86_64__)
+# ifndef _FPU_EXTENDED
+#  define _FPU_EXTENDED 0x0300
+# endif
+# ifndef _FPU_DOUBLE
+#  define _FPU_DOUBLE 0x0200
+# endif
+# if defined( __win32__ ) && (defined( __BORLANDC__ ) || defined( __VISUALC__ ))
+#  define fpu_fix_start(old_cw_ptr)\
+{\
+*old_cw_ptr = _control87(0, 0); \
+_control87(_FPU_DOUBLE, _FPU_EXTENDED);\
+}
+#  define fpu_fix_end(old_cw_ptr)\
+{\
+_control87(*old_cw_ptr, _FPU_EXTENDED);\
+}
+# else // assume MINGW, GCC or CLANG
+#  ifndef _FPU_GETCW
+#   define _FPU_GETCW(x) asm volatile ("fnstcw %0":"=m" (x));
+#  endif
+#  ifndef _FPU_SETCW
+#   define _FPU_SETCW(x) asm volatile ("fldcw %0": :"m" (x));
+#  endif
+#  define fpu_fix_start(old_cw_ptr)\
+{\
+volatile unsigned short cw, new_cw;\
+_FPU_GETCW(cw);\
+new_cw = (cw & ~_FPU_EXTENDED) | _FPU_DOUBLE;\
+_FPU_SETCW(new_cw);\
+*old_cw_ptr = cw;\
+}
+#  define fpu_fix_end(old_cw_ptr)\
+{\
+volatile unsigned short cw = *old_cw_ptr;\
+_FPU_SETCW(cw);\
+}
+# endif
+#endif
 struct qd_real {
 double x[4];    /* The Components. */
 };
 struct __quadDoubleStruct {
 double s = a + b;\
 err = b - (s - a);\
 rslt = s; \
 }
+#define m_quick_two_diff(rslt, a, b, err) {\
+double s = a - b;\
+err = (a - s) - b;\
+rslt = s;\
+}
 #define m_two_sum(rslt, a, b, err) {\
 double s = a + b;\
 double bb = s - a;\
 err = (a - (s - bb)) + (b - bb);\
+rslt = s;\
+}
+/* Computes fl(a-b) and err(a-b).  */
+#define m_two_diff(rslt, a, b, err) {\
+double s = a - b;\
+double bb = s - a;\
+err = (a - (s - bb)) - (b + bb);\
 rslt = s;\
 }
 #define m_three_sum(a, b, c) { \
 double t1, t2, t3; \
 double b = __floatVal(aFloat);
 double p0, p1, p2, p3;
 double q0, q1, q2;
 double s0, s1, s2, s3, s4;
 OBJ newQD;
+int savedCV;
+fpu_fix_start(&savedCV);
 m_two_prod(p0, a[0], b, q0);
 m_two_prod(p1, a[1], b, q1);
 m_two_prod(p2, a[2], b, q2);
 p3 = a[3] * b;
 s3 = q1;
 s4 = q2 + p2;
 m_renorm5(s0, s1, s2, s3, s4);
+fpu_fix_end(&savedCV);
 __qNew_qdReal(newQD, s0, s1, s2, s3);
 RETURN( newQD );
 }
 %}.
 ^ super productFromFloat:aFloat.
 "
+(QDouble fromFloat:1.0) productFromFloat:2.0
+((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) productFromFloat:2.0
+((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) productFromFloat:2e20
 2.0 * (QDouble fromFloat:1.0)
-1e20 * (QDouble fromFloat:1.0)
+2.0 * (QDouble fromFloat:3.0)
+2.0 * ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0))
+(2.0 * ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0))) - (QDouble fromFloat:1e20) - (QDouble fromFloat:1e20)
 (2.0 * (QDouble fromFloat:1.0)) asFloat
 (1e20 * (QDouble fromFloat:1.0)) asFloat
 (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray
 "
 "Created: / 13-06-2017 / 00:58:56 / cg"
+"Modified: / 14-06-2017 / 11:42:57 / cg"
 !
 productFromQDouble:aQDouble
 %{
 if (__Class(aQDouble) == QDouble) {
 double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue;
 double *b = __QuadDoubleInstPtr(aQDouble)->d_quadDoubleValue;
 OBJ newQD;
-// sloppy
+// sloppy
 double p0, p1, p2, p3, p4, p5;
 double q0, q1, q2, q3, q4, q5;
 double t0, t1;
 double s0, s1, s2;
+int savedCV;
+fpu_fix_start(&savedCV);
 m_two_prod(p0, a[0], b[0], q0);
+fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[0], p0, q0);
 m_two_prod(p1, a[0], b[1], q1);
 m_two_prod(p2, a[1], b[0], q2);
+fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[1], p1, q1);
+fprintf(stderr, "%f * %f -> %f, %f\n", a[1], b[0], p2, q2);
 m_two_prod(p3, a[0], b[2], q3);
 m_two_prod(p4, a[1], b[1], q4);
 m_two_prod(p5, a[2], b[0], q5);
+fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[2], p3, q3);
+fprintf(stderr, "%f * %f -> %f, %f\n", a[1], b[1], p4, q4);
+fprintf(stderr, "%f * %f -> %f, %f\n", a[2], b[0], p5, q5);
 /* Start Accumulation */
 m_three_sum(p1, p2, q0);
 /* Six-Three Sum  of p2, q1, q2, p3, p4, p5. */
 /* O(eps^3) order terms */
 s1 += a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0] + q0 + q3 + q4 + q5;
 m_renorm5(p0, p1, s0, s1, s2);
+fpu_fix_end(&savedCV);
 __qNew_qdReal(newQD, p0, p1, s0, s1);
 RETURN( newQD );
 }
 %}.
 ^ super productFromQDouble:aQDouble.
 "
+(QDouble fromFloat:1.0) * 2.0
 2.0 * (QDouble fromFloat:1.0)
+(QDouble fromFloat:1.0) * (QDouble fromFloat:2.0)
 1e20 * (QDouble fromFloat:1.0)
 (2.0 * (QDouble fromFloat:1.0)) asFloat
 (1e20 * (QDouble fromFloat:1.0)) asFloat
 (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray
 "
 "Created: / 13-06-2017 / 01:06:22 / cg"
+"Modified: / 14-06-2017 / 11:43:28 / cg"
 !
 quotientFromQDouble:aQDouble
 "sloppy"
 r renorm.
 ^ r
 "
 2.0 / (QDouble fromFloat:2.0)
+2.0 / (QDouble fromFloat:1.0)
 1e20 / (QDouble fromFloat:1.0)
 (2.0 / (QDouble fromFloat:1.0)) asFloat
 (1e20 / (QDouble fromFloat:1.0)) asFloat
 (QDouble fromFloat:2.0) / 2.0
 double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue;
 double b = __floatVal(aFloat);
 double c0, c1, c2, c3;
 double e;
 OBJ newQD;
+int savedCV;
+fpu_fix_start(&savedCV);
 m_two_sum(c0, a[0], b, e);
 m_two_sum(c1 ,a[1], e, e);
 m_two_sum(c2, a[2], e, e);
 m_two_sum(c3, a[3], e, e);
 m_renorm5(c0, c1, c2, c3, e);
+fpu_fix_end(&savedCV);
 __qNew_qdReal(newQD, c0, c1, c2, c3);
 RETURN( newQD );
 }
 %}.
 ^ super sumFromFloat:aFloat.
 (1e20 + (QDouble fromFloat:1.0)) asDoubleArray
 (1e20 + (QDouble fromFloat:1.0) + 1e-20) asDoubleArray
 "
 "Created: / 12-06-2017 / 17:16:41 / cg"
-"Modified: / 12-06-2017 / 22:57:03 / cg"
+"Modified: / 14-06-2017 / 11:43:47 / cg"
 !
 sumFromQDouble:aQDouble
 %{
 if (__Class(aQDouble) == QDouble) {
 // sloppy_add...
 /*
 double s0, s1, s2, s3;
 double t0, t1, t2, t3;
+int savedCV;
+fpu_fix_start(&savedCV);
 m_two_sum(s0, a[0], b[0], t0);
 m_two_sum(s1, a[1], b[1], t1);
 m_two_sum(s2, a[2], b[2], t2);
 m_two_sum(s3, a[3], b[3], t3);
 m_two_sum(s1, s1, t0, t0);
 m_three_sum(s2, t0, t1);
 m_three_sum2(s3, t0, t2);
 t0 = t0 + t1 + t3;
+fpu_fix_end(&savedCV);
 m_renorm5(s0, s1, s2, s3, t0);
 return qd_real(s0, s1, s2, s3, t0);
 */
 /* Same as above, but addition re-organized to minimize
 data dependency ... unfortunately some compilers are
 not very smart to do this automatically */
 double s0, s1, s2, s3;
 double t0, t1, t2, t3;
 double v0, v1, v2, v3;
 double u0, u1, u2, u3;
 double w0, w1, w2, w3;
+int savedCV;
+fpu_fix_start(&savedCV);
 s0 = a[0] + b[0];
 s1 = a[1] + b[1];
 s2 = a[2] + b[2];
 s3 = a[3] + b[3];
 m_three_sum2(s3, t0, t2);
 t0 = t0 + t1 + t3;
 /* renormalize */
 m_renorm5(s0, s1, s2, s3, t0);
+fpu_fix_end(&savedCV);
 __qNew_qdReal(newQD, s0, s1, s2, s3);
 RETURN(newQD);
 #else
 // ieee_add...
 int i, j, k;
 double s, t;
 double u, v;   /* double-length accumulator */
 double x[4] = {0.0, 0.0, 0.0, 0.0};
+int savedCV;
+fpu_fix_start(&savedCV);
 i = j = k = 0;
 if (abs(a[i]) > abs(b[j]))
 u = a[i++];
 else
 u = b[j++];
 x[3] += a[k];
 for (k = j; k < 4; k++)
 x[3] += b[k];
 m_renorm4(x[0], x[1], x[2], x[3]);
+fpu_fix_end(&savedCV);
 __qNew_qdReal(newQD, x[0], x[1], x[2], x[3]);
 RETURN(newQD);
 #endif
 }
 %}.
 (1e-20 + (QDouble fromFloat:1.0)) asDoubleArray
 (1e20 + (QDouble fromFloat:1.0)) asDoubleArray
 "
 "Created: / 12-06-2017 / 21:15:43 / cg"
-"Modified: / 13-06-2017 / 00:30:45 / cg"
+"Modified: / 14-06-2017 / 11:44:53 / cg"
 ! !
 !QDouble methodsFor:'inspecting'!
 inspectorExtraAttributes

changeset 4387	879309cae427
parent 4386	0a320155d78a
child 4388	742f099741bf