53 # ifndef NAN |
53 # ifndef NAN |
54 # include <bits/nan.h> |
54 # include <bits/nan.h> |
55 # endif |
55 # endif |
56 #endif |
56 #endif |
57 |
57 |
|
58 #if defined(__x86__) || defined(__x86_64__) |
|
59 |
|
60 # ifndef _FPU_EXTENDED |
|
61 # define _FPU_EXTENDED 0x0300 |
|
62 # endif |
|
63 |
|
64 # ifndef _FPU_DOUBLE |
|
65 # define _FPU_DOUBLE 0x0200 |
|
66 # endif |
|
67 |
|
68 # if defined( __win32__ ) && (defined( __BORLANDC__ ) || defined( __VISUALC__ )) |
|
69 |
|
70 # define fpu_fix_start(old_cw_ptr)\ |
|
71 {\ |
|
72 *old_cw_ptr = _control87(0, 0); \ |
|
73 _control87(_FPU_DOUBLE, _FPU_EXTENDED);\ |
|
74 } |
|
75 |
|
76 # define fpu_fix_end(old_cw_ptr)\ |
|
77 {\ |
|
78 _control87(*old_cw_ptr, _FPU_EXTENDED);\ |
|
79 } |
|
80 |
|
81 # else // assume MINGW, GCC or CLANG |
|
82 |
|
83 # ifndef _FPU_GETCW |
|
84 # define _FPU_GETCW(x) asm volatile ("fnstcw %0":"=m" (x)); |
|
85 # endif |
|
86 # ifndef _FPU_SETCW |
|
87 # define _FPU_SETCW(x) asm volatile ("fldcw %0": :"m" (x)); |
|
88 # endif |
|
89 |
|
90 # define fpu_fix_start(old_cw_ptr)\ |
|
91 {\ |
|
92 volatile unsigned short cw, new_cw;\ |
|
93 _FPU_GETCW(cw);\ |
|
94 new_cw = (cw & ~_FPU_EXTENDED) | _FPU_DOUBLE;\ |
|
95 _FPU_SETCW(new_cw);\ |
|
96 *old_cw_ptr = cw;\ |
|
97 } |
|
98 |
|
99 # define fpu_fix_end(old_cw_ptr)\ |
|
100 {\ |
|
101 volatile unsigned short cw = *old_cw_ptr;\ |
|
102 _FPU_SETCW(cw);\ |
|
103 } |
|
104 |
|
105 # endif |
|
106 |
|
107 #endif |
|
108 |
|
109 |
58 struct qd_real { |
110 struct qd_real { |
59 double x[4]; /* The Components. */ |
111 double x[4]; /* The Components. */ |
60 }; |
112 }; |
61 |
113 |
62 struct __quadDoubleStruct { |
114 struct __quadDoubleStruct { |
1128 double b = __floatVal(aFloat); |
1194 double b = __floatVal(aFloat); |
1129 double p0, p1, p2, p3; |
1195 double p0, p1, p2, p3; |
1130 double q0, q1, q2; |
1196 double q0, q1, q2; |
1131 double s0, s1, s2, s3, s4; |
1197 double s0, s1, s2, s3, s4; |
1132 OBJ newQD; |
1198 OBJ newQD; |
|
1199 int savedCV; |
|
1200 |
|
1201 fpu_fix_start(&savedCV); |
1133 |
1202 |
1134 m_two_prod(p0, a[0], b, q0); |
1203 m_two_prod(p0, a[0], b, q0); |
1135 m_two_prod(p1, a[1], b, q1); |
1204 m_two_prod(p1, a[1], b, q1); |
1136 m_two_prod(p2, a[2], b, q2); |
1205 m_two_prod(p2, a[2], b, q2); |
1137 p3 = a[3] * b; |
1206 p3 = a[3] * b; |
1146 s3 = q1; |
1215 s3 = q1; |
1147 |
1216 |
1148 s4 = q2 + p2; |
1217 s4 = q2 + p2; |
1149 |
1218 |
1150 m_renorm5(s0, s1, s2, s3, s4); |
1219 m_renorm5(s0, s1, s2, s3, s4); |
|
1220 fpu_fix_end(&savedCV); |
|
1221 |
1151 __qNew_qdReal(newQD, s0, s1, s2, s3); |
1222 __qNew_qdReal(newQD, s0, s1, s2, s3); |
1152 RETURN( newQD ); |
1223 RETURN( newQD ); |
1153 } |
1224 } |
1154 %}. |
1225 %}. |
1155 ^ super productFromFloat:aFloat. |
1226 ^ super productFromFloat:aFloat. |
1156 |
1227 |
1157 " |
1228 " |
|
1229 (QDouble fromFloat:1.0) productFromFloat:2.0 |
|
1230 ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) productFromFloat:2.0 |
|
1231 ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) productFromFloat:2e20 |
|
1232 |
1158 2.0 * (QDouble fromFloat:1.0) |
1233 2.0 * (QDouble fromFloat:1.0) |
1159 1e20 * (QDouble fromFloat:1.0) |
1234 2.0 * (QDouble fromFloat:3.0) |
|
1235 |
|
1236 2.0 * ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0)) |
|
1237 (2.0 * ((QDouble fromFloat:1e20) + (QDouble fromFloat:1.0))) - (QDouble fromFloat:1e20) - (QDouble fromFloat:1e20) |
|
1238 |
1160 (2.0 * (QDouble fromFloat:1.0)) asFloat |
1239 (2.0 * (QDouble fromFloat:1.0)) asFloat |
1161 (1e20 * (QDouble fromFloat:1.0)) asFloat |
1240 (1e20 * (QDouble fromFloat:1.0)) asFloat |
1162 |
1241 |
1163 (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray |
1242 (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray |
1164 " |
1243 " |
1165 |
1244 |
1166 "Created: / 13-06-2017 / 00:58:56 / cg" |
1245 "Created: / 13-06-2017 / 00:58:56 / cg" |
|
1246 "Modified: / 14-06-2017 / 11:42:57 / cg" |
1167 ! |
1247 ! |
1168 |
1248 |
1169 productFromQDouble:aQDouble |
1249 productFromQDouble:aQDouble |
1170 %{ |
1250 %{ |
1171 if (__Class(aQDouble) == QDouble) { |
1251 if (__Class(aQDouble) == QDouble) { |
1172 double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue; |
1252 double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue; |
1173 double *b = __QuadDoubleInstPtr(aQDouble)->d_quadDoubleValue; |
1253 double *b = __QuadDoubleInstPtr(aQDouble)->d_quadDoubleValue; |
1174 OBJ newQD; |
1254 OBJ newQD; |
1175 // sloppy |
1255 |
|
1256 // sloppy |
1176 double p0, p1, p2, p3, p4, p5; |
1257 double p0, p1, p2, p3, p4, p5; |
1177 double q0, q1, q2, q3, q4, q5; |
1258 double q0, q1, q2, q3, q4, q5; |
1178 double t0, t1; |
1259 double t0, t1; |
1179 double s0, s1, s2; |
1260 double s0, s1, s2; |
|
1261 int savedCV; |
|
1262 |
|
1263 fpu_fix_start(&savedCV); |
1180 |
1264 |
1181 m_two_prod(p0, a[0], b[0], q0); |
1265 m_two_prod(p0, a[0], b[0], q0); |
|
1266 fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[0], p0, q0); |
1182 |
1267 |
1183 m_two_prod(p1, a[0], b[1], q1); |
1268 m_two_prod(p1, a[0], b[1], q1); |
1184 m_two_prod(p2, a[1], b[0], q2); |
1269 m_two_prod(p2, a[1], b[0], q2); |
|
1270 fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[1], p1, q1); |
|
1271 fprintf(stderr, "%f * %f -> %f, %f\n", a[1], b[0], p2, q2); |
1185 |
1272 |
1186 m_two_prod(p3, a[0], b[2], q3); |
1273 m_two_prod(p3, a[0], b[2], q3); |
1187 m_two_prod(p4, a[1], b[1], q4); |
1274 m_two_prod(p4, a[1], b[1], q4); |
1188 m_two_prod(p5, a[2], b[0], q5); |
1275 m_two_prod(p5, a[2], b[0], q5); |
|
1276 |
|
1277 fprintf(stderr, "%f * %f -> %f, %f\n", a[0], b[2], p3, q3); |
|
1278 fprintf(stderr, "%f * %f -> %f, %f\n", a[1], b[1], p4, q4); |
|
1279 fprintf(stderr, "%f * %f -> %f, %f\n", a[2], b[0], p5, q5); |
1189 |
1280 |
1190 /* Start Accumulation */ |
1281 /* Start Accumulation */ |
1191 m_three_sum(p1, p2, q0); |
1282 m_three_sum(p1, p2, q0); |
1192 |
1283 |
1193 /* Six-Three Sum of p2, q1, q2, p3, p4, p5. */ |
1284 /* Six-Three Sum of p2, q1, q2, p3, p4, p5. */ |
1202 |
1293 |
1203 /* O(eps^3) order terms */ |
1294 /* O(eps^3) order terms */ |
1204 s1 += a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0] + q0 + q3 + q4 + q5; |
1295 s1 += a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0] + q0 + q3 + q4 + q5; |
1205 m_renorm5(p0, p1, s0, s1, s2); |
1296 m_renorm5(p0, p1, s0, s1, s2); |
1206 |
1297 |
|
1298 fpu_fix_end(&savedCV); |
|
1299 |
1207 __qNew_qdReal(newQD, p0, p1, s0, s1); |
1300 __qNew_qdReal(newQD, p0, p1, s0, s1); |
1208 RETURN( newQD ); |
1301 RETURN( newQD ); |
1209 } |
1302 } |
1210 %}. |
1303 %}. |
1211 ^ super productFromQDouble:aQDouble. |
1304 ^ super productFromQDouble:aQDouble. |
1212 |
1305 |
1213 " |
1306 " |
|
1307 (QDouble fromFloat:1.0) * 2.0 |
1214 2.0 * (QDouble fromFloat:1.0) |
1308 2.0 * (QDouble fromFloat:1.0) |
|
1309 (QDouble fromFloat:1.0) * (QDouble fromFloat:2.0) |
|
1310 |
1215 1e20 * (QDouble fromFloat:1.0) |
1311 1e20 * (QDouble fromFloat:1.0) |
1216 (2.0 * (QDouble fromFloat:1.0)) asFloat |
1312 (2.0 * (QDouble fromFloat:1.0)) asFloat |
1217 (1e20 * (QDouble fromFloat:1.0)) asFloat |
1313 (1e20 * (QDouble fromFloat:1.0)) asFloat |
1218 |
1314 |
1219 (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray |
1315 (1e20 * (QDouble fromFloat:1.0) * 1e-20) asDoubleArray |
1220 " |
1316 " |
1221 |
1317 |
1222 "Created: / 13-06-2017 / 01:06:22 / cg" |
1318 "Created: / 13-06-2017 / 01:06:22 / cg" |
|
1319 "Modified: / 14-06-2017 / 11:43:28 / cg" |
1223 ! |
1320 ! |
1224 |
1321 |
1225 quotientFromQDouble:aQDouble |
1322 quotientFromQDouble:aQDouble |
1226 "sloppy" |
1323 "sloppy" |
1227 |
1324 |
1265 double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue; |
1363 double *a = __QuadDoubleInstPtr(self)->d_quadDoubleValue; |
1266 double b = __floatVal(aFloat); |
1364 double b = __floatVal(aFloat); |
1267 double c0, c1, c2, c3; |
1365 double c0, c1, c2, c3; |
1268 double e; |
1366 double e; |
1269 OBJ newQD; |
1367 OBJ newQD; |
|
1368 int savedCV; |
|
1369 |
|
1370 fpu_fix_start(&savedCV); |
1270 |
1371 |
1271 m_two_sum(c0, a[0], b, e); |
1372 m_two_sum(c0, a[0], b, e); |
1272 m_two_sum(c1 ,a[1], e, e); |
1373 m_two_sum(c1 ,a[1], e, e); |
1273 m_two_sum(c2, a[2], e, e); |
1374 m_two_sum(c2, a[2], e, e); |
1274 m_two_sum(c3, a[3], e, e); |
1375 m_two_sum(c3, a[3], e, e); |
1275 |
1376 |
1276 m_renorm5(c0, c1, c2, c3, e); |
1377 m_renorm5(c0, c1, c2, c3, e); |
|
1378 fpu_fix_end(&savedCV); |
1277 __qNew_qdReal(newQD, c0, c1, c2, c3); |
1379 __qNew_qdReal(newQD, c0, c1, c2, c3); |
1278 RETURN( newQD ); |
1380 RETURN( newQD ); |
1279 } |
1381 } |
1280 %}. |
1382 %}. |
1281 ^ super sumFromFloat:aFloat. |
1383 ^ super sumFromFloat:aFloat. |
1306 // sloppy_add... |
1408 // sloppy_add... |
1307 |
1409 |
1308 /* |
1410 /* |
1309 double s0, s1, s2, s3; |
1411 double s0, s1, s2, s3; |
1310 double t0, t1, t2, t3; |
1412 double t0, t1, t2, t3; |
1311 |
1413 int savedCV; |
|
1414 |
|
1415 fpu_fix_start(&savedCV); |
1312 m_two_sum(s0, a[0], b[0], t0); |
1416 m_two_sum(s0, a[0], b[0], t0); |
1313 m_two_sum(s1, a[1], b[1], t1); |
1417 m_two_sum(s1, a[1], b[1], t1); |
1314 m_two_sum(s2, a[2], b[2], t2); |
1418 m_two_sum(s2, a[2], b[2], t2); |
1315 m_two_sum(s3, a[3], b[3], t3); |
1419 m_two_sum(s3, a[3], b[3], t3); |
1316 |
1420 |
1317 m_two_sum(s1, s1, t0, t0); |
1421 m_two_sum(s1, s1, t0, t0); |
1318 m_three_sum(s2, t0, t1); |
1422 m_three_sum(s2, t0, t1); |
1319 m_three_sum2(s3, t0, t2); |
1423 m_three_sum2(s3, t0, t2); |
1320 t0 = t0 + t1 + t3; |
1424 t0 = t0 + t1 + t3; |
1321 |
1425 |
|
1426 fpu_fix_end(&savedCV); |
1322 m_renorm5(s0, s1, s2, s3, t0); |
1427 m_renorm5(s0, s1, s2, s3, t0); |
1323 return qd_real(s0, s1, s2, s3, t0); |
1428 return qd_real(s0, s1, s2, s3, t0); |
1324 */ |
1429 */ |
1325 |
1430 |
1326 /* Same as above, but addition re-organized to minimize |
1431 /* Same as above, but addition re-organized to minimize |
1327 data dependency ... unfortunately some compilers are |
1432 data dependency ... unfortunately some compilers are |
1328 not very smart to do this automatically */ |
1433 not very smart to do this automatically */ |
1329 double s0, s1, s2, s3; |
1434 double s0, s1, s2, s3; |
1330 double t0, t1, t2, t3; |
1435 double t0, t1, t2, t3; |
1331 |
|
1332 double v0, v1, v2, v3; |
1436 double v0, v1, v2, v3; |
1333 double u0, u1, u2, u3; |
1437 double u0, u1, u2, u3; |
1334 double w0, w1, w2, w3; |
1438 double w0, w1, w2, w3; |
|
1439 int savedCV; |
1335 |
1440 |
|
1441 fpu_fix_start(&savedCV); |
1336 s0 = a[0] + b[0]; |
1442 s0 = a[0] + b[0]; |
1337 s1 = a[1] + b[1]; |
1443 s1 = a[1] + b[1]; |
1338 s2 = a[2] + b[2]; |
1444 s2 = a[2] + b[2]; |
1339 s3 = a[3] + b[3]; |
1445 s3 = a[3] + b[3]; |
1340 |
1446 |
1368 m_three_sum2(s3, t0, t2); |
1474 m_three_sum2(s3, t0, t2); |
1369 t0 = t0 + t1 + t3; |
1475 t0 = t0 + t1 + t3; |
1370 |
1476 |
1371 /* renormalize */ |
1477 /* renormalize */ |
1372 m_renorm5(s0, s1, s2, s3, t0); |
1478 m_renorm5(s0, s1, s2, s3, t0); |
|
1479 fpu_fix_end(&savedCV); |
1373 __qNew_qdReal(newQD, s0, s1, s2, s3); |
1480 __qNew_qdReal(newQD, s0, s1, s2, s3); |
1374 RETURN(newQD); |
1481 RETURN(newQD); |
1375 #else |
1482 #else |
1376 // ieee_add... |
1483 // ieee_add... |
1377 int i, j, k; |
1484 int i, j, k; |
1378 double s, t; |
1485 double s, t; |
1379 double u, v; /* double-length accumulator */ |
1486 double u, v; /* double-length accumulator */ |
1380 double x[4] = {0.0, 0.0, 0.0, 0.0}; |
1487 double x[4] = {0.0, 0.0, 0.0, 0.0}; |
1381 |
1488 int savedCV; |
|
1489 |
|
1490 fpu_fix_start(&savedCV); |
1382 i = j = k = 0; |
1491 i = j = k = 0; |
1383 if (abs(a[i]) > abs(b[j])) |
1492 if (abs(a[i]) > abs(b[j])) |
1384 u = a[i++]; |
1493 u = a[i++]; |
1385 else |
1494 else |
1386 u = b[j++]; |
1495 u = b[j++]; |