QDouble.st
changeset 4387 879309cae427
parent 4386 0a320155d78a
child 4388 742f099741bf
equal deleted inserted replaced
4386:0a320155d78a 4387:879309cae427
    53 # ifndef NAN
    53 # ifndef NAN
    54 #  include <bits/nan.h>
    54 #  include <bits/nan.h>
    55 # endif
    55 # endif
    56 #endif
    56 #endif
    57 
    57 
       
    58 #if defined(__x86__) || defined(__x86_64__)
       
    59 
       
    60 # ifndef _FPU_EXTENDED
       
    61 #  define _FPU_EXTENDED 0x0300
       
    62 # endif
       
    63 
       
    64 # ifndef _FPU_DOUBLE
       
    65 #  define _FPU_DOUBLE 0x0200
       
    66 # endif
       
    67 
       
    68 # if defined( __win32__ ) && (defined( __BORLANDC__ ) || defined( __VISUALC__ )) 
       
    69 
       
    70 #  define fpu_fix_start(old_cw_ptr)\
       
    71     {\
       
    72         *old_cw_ptr = _control87(0, 0); \        
       
    73         _control87(_FPU_DOUBLE, _FPU_EXTENDED);\
       
    74     }
       
    75 
       
    76 #  define fpu_fix_end(old_cw_ptr)\
       
    77     {\
       
    78         _control87(*old_cw_ptr, _FPU_EXTENDED);\
       
    79     }
       
    80 
       
    81 # else // assume MINGW, GCC or CLANG
       
    82 
       
    83 #  ifndef _FPU_GETCW
       
    84 #   define _FPU_GETCW(x) asm volatile ("fnstcw %0":"=m" (x));
       
    85 #  endif
       
    86 #  ifndef _FPU_SETCW
       
    87 #   define _FPU_SETCW(x) asm volatile ("fldcw %0": :"m" (x));
       
    88 #  endif
       
    89 
       
    90 #  define fpu_fix_start(old_cw_ptr)\
       
    91     {\
       
    92         volatile unsigned short cw, new_cw;\
       
    93         _FPU_GETCW(cw);\
       
    94         new_cw = (cw & ~_FPU_EXTENDED) | _FPU_DOUBLE;\
       
    95         _FPU_SETCW(new_cw);\
       
    96         *old_cw_ptr = cw;\
       
    97     }
       
    98 
       
    99 #  define fpu_fix_end(old_cw_ptr)\
       
   100     {\
       
   101         volatile unsigned short cw = *old_cw_ptr;\
       
   102         _FPU_SETCW(cw);\
       
   103     }
       
   104 
       
   105 # endif
       
   106 
       
   107 #endif
       
   108 
       
   109 
    58 struct qd_real {
   110 struct qd_real {
    59     double x[4];    /* The Components. */
   111     double x[4];    /* The Components. */
    60 };
   112 };
    61 
   113 
    62 struct __quadDoubleStruct {
   114 struct __quadDoubleStruct {
    97   double s = a + b;\
   149   double s = a + b;\
    98   err = b - (s - a);\
   150   err = b - (s - a);\
    99   rslt = s; \
   151   rslt = s; \
   100 }
   152 }
   101 
   153 
       
   154 #define m_quick_two_diff(rslt, a, b, err) {\
       
   155   double s = a - b;\
       
   156   err = (a - s) - b;\
       
   157   rslt = s;\
       
   158 }
       
   159 
   102 #define m_two_sum(rslt, a, b, err) {\
   160 #define m_two_sum(rslt, a, b, err) {\
   103   double s = a + b;\
   161   double s = a + b;\
   104   double bb = s - a;\
   162   double bb = s - a;\
   105   err = (a - (s - bb)) + (b - bb);\
   163   err = (a - (s - bb)) + (b - bb);\
       
   164   rslt = s;\
       
   165 }
       
   166 
       
   167 /* Computes fl(a-b) and err(a-b).  */
       
   168 #define m_two_diff(rslt, a, b, err) {\
       
   169   double s = a - b;\
       
   170   double bb = s - a;\
       
   171   err = (a - (s - bb)) - (b + bb);\
   106   rslt = s;\
   172   rslt = s;\
   107 }
   173 }
   108 
   174 
   109 #define m_three_sum(a, b, c) { \
   175 #define m_three_sum(a, b, c) { \
   110   double t1, t2, t3; \
   176   double t1, t2, t3; \
  1128         double b = __floatVal(aFloat);
  1194         double b = __floatVal(aFloat);
  1129         double p0, p1, p2, p3;
  1195         double p0, p1, p2, p3;
  1130         double q0, q1, q2;
  1196         double q0, q1, q2;
  1131         double s0, s1, s2, s3, s4;
  1197         double s0, s1, s2, s3, s4;
  1132         OBJ newQD;
  1198         OBJ newQD;
       
  1199         int savedCV;
       
  1200         
       
  1201         fpu_fix_start(&savedCV);
  1133         
  1202         
  1134         m_two_prod(p0, a[0], b, q0);
  1203         m_two_prod(p0, a[0], b, q0);
  1135         m_two_prod(p1, a[1], b, q1);
  1204         m_two_prod(p1, a[1], b, q1);
  1136         m_two_prod(p2, a[2], b, q2);
  1205         m_two_prod(p2, a[2], b, q2);
  1137         p3 = a[3] * b;
  1206         p3 = a[3] * b;
  1146         s3 = q1;
  1215         s3 = q1;
  1147 
  1216 
  1148         s4 = q2 + p2;
  1217         s4 = q2 + p2;
  1149 
  1218 
  1150         m_renorm5(s0, s1, s2, s3, s4);
  1219         m_renorm5(s0, s1, s2, s3, s4);
       
  1220         fpu_fix_end(&savedCV);
       
  1221 
  1151         __qNew_qdReal(newQD, s0, s1, s2, s3);
  1222         __qNew_qdReal(newQD, s0, s1, s2, s3);
  1152         RETURN( newQD );
  1223         RETURN( newQD );
  1153     }
  1224     }
  1154 %}.
  1225 %}.
  1155     ^ super productFromFloat:aFloat.
  1226     ^ super productFromFloat:aFloat.
  1156 
  1227 
  1157     "
  1228     "
       
  1229      (QDouble fromFloat:1.0) productFromFloat:2.0
       
  1230      ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) productFromFloat:2.0
       
  1231      ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) productFromFloat:2e20
       
  1232 
  1158      2.0 * (QDouble fromFloat:1.0)
  1233      2.0 * (QDouble fromFloat:1.0)
  1159      1e20 * (QDouble fromFloat:1.0)
  1234      2.0 * (QDouble fromFloat:3.0)
       
  1235      
       
  1236      2.0 * ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) 
       
  1237      (2.0 * ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0))) - (QDouble fromFloat:1e20) - (QDouble fromFloat:1e20)
       
  1238 
  1160      (2.0 * (QDouble fromFloat:1.0)) asFloat
  1239      (2.0 * (QDouble fromFloat:1.0)) asFloat
  1161      (1e20 * (QDouble fromFloat:1.0)) asFloat
  1240      (1e20 * (QDouble fromFloat:1.0)) asFloat
  1162 
  1241 
  1163      (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray
  1242      (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray
  1164     "
  1243     "
  1165 
  1244 
  1166     "Created: / 13-06-2017 / 00:58:56 / cg"
  1245     "Created: / 13-06-2017 / 00:58:56 / cg"
       
  1246     "Modified: / 14-06-2017 / 11:42:57 / cg"
  1167 !
  1247 !
  1168 
  1248 
  1169 productFromQDouble:aQDouble
  1249 productFromQDouble:aQDouble
  1170 %{
  1250 %{
  1171     if (__Class(aQDouble) == QDouble) {
  1251     if (__Class(aQDouble) == QDouble) {
  1172         double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue;
  1252         double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue;
  1173         double *b = __QuadDoubleInstPtr(aQDouble)->d_quadDoubleValue;
  1253         double *b = __QuadDoubleInstPtr(aQDouble)->d_quadDoubleValue;
  1174         OBJ newQD;
  1254         OBJ newQD;
  1175 // sloppy        
  1255 
       
  1256         // sloppy        
  1176         double p0, p1, p2, p3, p4, p5;
  1257         double p0, p1, p2, p3, p4, p5;
  1177         double q0, q1, q2, q3, q4, q5;
  1258         double q0, q1, q2, q3, q4, q5;
  1178         double t0, t1;
  1259         double t0, t1;
  1179         double s0, s1, s2;
  1260         double s0, s1, s2;
       
  1261         int savedCV;
       
  1262         
       
  1263         fpu_fix_start(&savedCV);
  1180 
  1264 
  1181         m_two_prod(p0, a[0], b[0], q0);
  1265         m_two_prod(p0, a[0], b[0], q0);
       
  1266 fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[0], p0, q0);
  1182 
  1267 
  1183         m_two_prod(p1, a[0], b[1], q1);
  1268         m_two_prod(p1, a[0], b[1], q1);
  1184         m_two_prod(p2, a[1], b[0], q2);
  1269         m_two_prod(p2, a[1], b[0], q2);
       
  1270 fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[1], p1, q1);
       
  1271 fprintf(stderr, "%f * %f -> %f, %f\n", a[1], b[0], p2, q2);
  1185 
  1272 
  1186         m_two_prod(p3, a[0], b[2], q3);
  1273         m_two_prod(p3, a[0], b[2], q3);
  1187         m_two_prod(p4, a[1], b[1], q4);
  1274         m_two_prod(p4, a[1], b[1], q4);
  1188         m_two_prod(p5, a[2], b[0], q5);
  1275         m_two_prod(p5, a[2], b[0], q5);
       
  1276 
       
  1277 fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[2], p3, q3);
       
  1278 fprintf(stderr, "%f * %f -> %f, %f\n", a[1], b[1], p4, q4);
       
  1279 fprintf(stderr, "%f * %f -> %f, %f\n", a[2], b[0], p5, q5);
  1189 
  1280 
  1190         /* Start Accumulation */
  1281         /* Start Accumulation */
  1191         m_three_sum(p1, p2, q0);
  1282         m_three_sum(p1, p2, q0);
  1192 
  1283 
  1193         /* Six-Three Sum  of p2, q1, q2, p3, p4, p5. */
  1284         /* Six-Three Sum  of p2, q1, q2, p3, p4, p5. */
  1202 
  1293 
  1203         /* O(eps^3) order terms */
  1294         /* O(eps^3) order terms */
  1204         s1 += a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0] + q0 + q3 + q4 + q5;
  1295         s1 += a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0] + q0 + q3 + q4 + q5;
  1205         m_renorm5(p0, p1, s0, s1, s2);
  1296         m_renorm5(p0, p1, s0, s1, s2);
  1206 
  1297 
       
  1298         fpu_fix_end(&savedCV);
       
  1299 
  1207         __qNew_qdReal(newQD, p0, p1, s0, s1);
  1300         __qNew_qdReal(newQD, p0, p1, s0, s1);
  1208         RETURN( newQD );
  1301         RETURN( newQD );
  1209     }
  1302     }
  1210 %}.
  1303 %}.
  1211     ^ super productFromQDouble:aQDouble.
  1304     ^ super productFromQDouble:aQDouble.
  1212 
  1305 
  1213     "
  1306     "
       
  1307      (QDouble fromFloat:1.0) * 2.0
  1214      2.0 * (QDouble fromFloat:1.0)
  1308      2.0 * (QDouble fromFloat:1.0)
       
  1309      (QDouble fromFloat:1.0) * (QDouble fromFloat:2.0)
       
  1310 
  1215      1e20 * (QDouble fromFloat:1.0)
  1311      1e20 * (QDouble fromFloat:1.0)
  1216      (2.0 * (QDouble fromFloat:1.0)) asFloat
  1312      (2.0 * (QDouble fromFloat:1.0)) asFloat
  1217      (1e20 * (QDouble fromFloat:1.0)) asFloat
  1313      (1e20 * (QDouble fromFloat:1.0)) asFloat
  1218 
  1314 
  1219      (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray
  1315      (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray
  1220     "
  1316     "
  1221 
  1317 
  1222     "Created: / 13-06-2017 / 01:06:22 / cg"
  1318     "Created: / 13-06-2017 / 01:06:22 / cg"
       
  1319     "Modified: / 14-06-2017 / 11:43:28 / cg"
  1223 !
  1320 !
  1224 
  1321 
  1225 quotientFromQDouble:aQDouble
  1322 quotientFromQDouble:aQDouble
  1226     "sloppy"
  1323     "sloppy"
  1227     
  1324     
  1242     r renorm.
  1339     r renorm.
  1243     ^ r
  1340     ^ r
  1244 
  1341 
  1245     "
  1342     "
  1246      2.0 / (QDouble fromFloat:2.0)
  1343      2.0 / (QDouble fromFloat:2.0)
       
  1344      2.0 / (QDouble fromFloat:1.0)
  1247      1e20 / (QDouble fromFloat:1.0)
  1345      1e20 / (QDouble fromFloat:1.0)
  1248      (2.0 / (QDouble fromFloat:1.0)) asFloat
  1346      (2.0 / (QDouble fromFloat:1.0)) asFloat
  1249      (1e20 / (QDouble fromFloat:1.0)) asFloat
  1347      (1e20 / (QDouble fromFloat:1.0)) asFloat
  1250 
  1348 
  1251      (QDouble fromFloat:2.0) / 2.0
  1349      (QDouble fromFloat:2.0) / 2.0
  1265         double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue; 
  1363         double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue; 
  1266         double b = __floatVal(aFloat);
  1364         double b = __floatVal(aFloat);
  1267         double c0, c1, c2, c3;
  1365         double c0, c1, c2, c3;
  1268         double e;
  1366         double e;
  1269         OBJ newQD;
  1367         OBJ newQD;
       
  1368         int savedCV;
       
  1369         
       
  1370         fpu_fix_start(&savedCV);
  1270 
  1371 
  1271         m_two_sum(c0, a[0], b, e);
  1372         m_two_sum(c0, a[0], b, e);
  1272         m_two_sum(c1 ,a[1], e, e);
  1373         m_two_sum(c1 ,a[1], e, e);
  1273         m_two_sum(c2, a[2], e, e);
  1374         m_two_sum(c2, a[2], e, e);
  1274         m_two_sum(c3, a[3], e, e);
  1375         m_two_sum(c3, a[3], e, e);
  1275 
  1376 
  1276         m_renorm5(c0, c1, c2, c3, e);
  1377         m_renorm5(c0, c1, c2, c3, e);
       
  1378         fpu_fix_end(&savedCV);
  1277         __qNew_qdReal(newQD, c0, c1, c2, c3);
  1379         __qNew_qdReal(newQD, c0, c1, c2, c3);
  1278         RETURN( newQD );
  1380         RETURN( newQD );
  1279     }
  1381     }
  1280 %}.
  1382 %}.
  1281     ^ super sumFromFloat:aFloat.
  1383     ^ super sumFromFloat:aFloat.
  1290      (1e20 + (QDouble fromFloat:1.0)) asDoubleArray
  1392      (1e20 + (QDouble fromFloat:1.0)) asDoubleArray
  1291      (1e20 + (QDouble fromFloat:1.0) + 1e-20) asDoubleArray
  1393      (1e20 + (QDouble fromFloat:1.0) + 1e-20) asDoubleArray
  1292     "
  1394     "
  1293 
  1395 
  1294     "Created: / 12-06-2017 / 17:16:41 / cg"
  1396     "Created: / 12-06-2017 / 17:16:41 / cg"
  1295     "Modified: / 12-06-2017 / 22:57:03 / cg"
  1397     "Modified: / 14-06-2017 / 11:43:47 / cg"
  1296 !
  1398 !
  1297 
  1399 
  1298 sumFromQDouble:aQDouble
  1400 sumFromQDouble:aQDouble
  1299 %{
  1401 %{
  1300     if (__Class(aQDouble) == QDouble) {
  1402     if (__Class(aQDouble) == QDouble) {
  1306         // sloppy_add...
  1408         // sloppy_add...
  1307 
  1409 
  1308         /*
  1410         /*
  1309         double s0, s1, s2, s3;
  1411         double s0, s1, s2, s3;
  1310         double t0, t1, t2, t3;
  1412         double t0, t1, t2, t3;
  1311 
  1413         int savedCV;
       
  1414 
       
  1415         fpu_fix_start(&savedCV);
  1312         m_two_sum(s0, a[0], b[0], t0);
  1416         m_two_sum(s0, a[0], b[0], t0);
  1313         m_two_sum(s1, a[1], b[1], t1);
  1417         m_two_sum(s1, a[1], b[1], t1);
  1314         m_two_sum(s2, a[2], b[2], t2);
  1418         m_two_sum(s2, a[2], b[2], t2);
  1315         m_two_sum(s3, a[3], b[3], t3);
  1419         m_two_sum(s3, a[3], b[3], t3);
  1316 
  1420 
  1317         m_two_sum(s1, s1, t0, t0);
  1421         m_two_sum(s1, s1, t0, t0);
  1318         m_three_sum(s2, t0, t1);
  1422         m_three_sum(s2, t0, t1);
  1319         m_three_sum2(s3, t0, t2);
  1423         m_three_sum2(s3, t0, t2);
  1320         t0 = t0 + t1 + t3;
  1424         t0 = t0 + t1 + t3;
  1321 
  1425 
       
  1426         fpu_fix_end(&savedCV);
  1322         m_renorm5(s0, s1, s2, s3, t0);
  1427         m_renorm5(s0, s1, s2, s3, t0);
  1323         return qd_real(s0, s1, s2, s3, t0);
  1428         return qd_real(s0, s1, s2, s3, t0);
  1324         */
  1429         */
  1325 
  1430 
  1326         /* Same as above, but addition re-organized to minimize
  1431         /* Same as above, but addition re-organized to minimize
  1327            data dependency ... unfortunately some compilers are
  1432            data dependency ... unfortunately some compilers are
  1328            not very smart to do this automatically */
  1433            not very smart to do this automatically */
  1329         double s0, s1, s2, s3;
  1434         double s0, s1, s2, s3;
  1330         double t0, t1, t2, t3;
  1435         double t0, t1, t2, t3;
  1331 
       
  1332         double v0, v1, v2, v3;
  1436         double v0, v1, v2, v3;
  1333         double u0, u1, u2, u3;
  1437         double u0, u1, u2, u3;
  1334         double w0, w1, w2, w3;
  1438         double w0, w1, w2, w3;
       
  1439         int savedCV;
  1335         
  1440         
       
  1441         fpu_fix_start(&savedCV);
  1336         s0 = a[0] + b[0];
  1442         s0 = a[0] + b[0];
  1337         s1 = a[1] + b[1];
  1443         s1 = a[1] + b[1];
  1338         s2 = a[2] + b[2];
  1444         s2 = a[2] + b[2];
  1339         s3 = a[3] + b[3];
  1445         s3 = a[3] + b[3];
  1340 
  1446 
  1368         m_three_sum2(s3, t0, t2);
  1474         m_three_sum2(s3, t0, t2);
  1369         t0 = t0 + t1 + t3;
  1475         t0 = t0 + t1 + t3;
  1370 
  1476 
  1371         /* renormalize */
  1477         /* renormalize */
  1372         m_renorm5(s0, s1, s2, s3, t0);
  1478         m_renorm5(s0, s1, s2, s3, t0);
       
  1479         fpu_fix_end(&savedCV);
  1373         __qNew_qdReal(newQD, s0, s1, s2, s3);
  1480         __qNew_qdReal(newQD, s0, s1, s2, s3);
  1374         RETURN(newQD);                 
  1481         RETURN(newQD);                 
  1375 #else
  1482 #else
  1376         // ieee_add...
  1483         // ieee_add...
  1377         int i, j, k;
  1484         int i, j, k;
  1378         double s, t;
  1485         double s, t;
  1379         double u, v;   /* double-length accumulator */
  1486         double u, v;   /* double-length accumulator */
  1380         double x[4] = {0.0, 0.0, 0.0, 0.0};
  1487         double x[4] = {0.0, 0.0, 0.0, 0.0};
  1381 
  1488         int savedCV;
       
  1489         
       
  1490         fpu_fix_start(&savedCV);
  1382         i = j = k = 0;
  1491         i = j = k = 0;
  1383         if (abs(a[i]) > abs(b[j]))
  1492         if (abs(a[i]) > abs(b[j]))
  1384           u = a[i++];
  1493           u = a[i++];
  1385         else
  1494         else
  1386           u = b[j++];
  1495           u = b[j++];
  1420           x[3] += a[k];
  1529           x[3] += a[k];
  1421         for (k = j; k < 4; k++)
  1530         for (k = j; k < 4; k++)
  1422           x[3] += b[k];
  1531           x[3] += b[k];
  1423 
  1532 
  1424         m_renorm4(x[0], x[1], x[2], x[3]);
  1533         m_renorm4(x[0], x[1], x[2], x[3]);
       
  1534         fpu_fix_end(&savedCV);
  1425         __qNew_qdReal(newQD, x[0], x[1], x[2], x[3]);
  1535         __qNew_qdReal(newQD, x[0], x[1], x[2], x[3]);
  1426         RETURN(newQD);                 
  1536         RETURN(newQD);                 
  1427 #endif
  1537 #endif
  1428     }
  1538     }
  1429 %}.
  1539 %}.
  1440      (1e-20 + (QDouble fromFloat:1.0)) asDoubleArray
  1550      (1e-20 + (QDouble fromFloat:1.0)) asDoubleArray
  1441      (1e20 + (QDouble fromFloat:1.0)) asDoubleArray
  1551      (1e20 + (QDouble fromFloat:1.0)) asDoubleArray
  1442    "
  1552    "
  1443 
  1553 
  1444     "Created: / 12-06-2017 / 21:15:43 / cg"
  1554     "Created: / 12-06-2017 / 21:15:43 / cg"
  1445     "Modified: / 13-06-2017 / 00:30:45 / cg"
  1555     "Modified: / 14-06-2017 / 11:44:53 / cg"
  1446 ! !
  1556 ! !
  1447 
  1557 
  1448 !QDouble methodsFor:'inspecting'!
  1558 !QDouble methodsFor:'inspecting'!
  1449 
  1559 
  1450 inspectorExtraAttributes
  1560 inspectorExtraAttributes