1120 _cnt = ((_v1 * 0x01010101) >> 24) + ((_v2 * 0x01010101) >> 24); |
1119 _cnt = ((_v1 * 0x01010101) >> 24) + ((_v2 * 0x01010101) >> 24); |
1121 return __c__._RETURN( STInteger._qnew( (byte)(_cnt) ) ); |
1120 return __c__._RETURN( STInteger._qnew( (byte)(_cnt) ) ); |
1122 #else |
1121 #else |
1123 unsigned int _cnt; |
1122 unsigned int _cnt; |
1124 |
1123 |
1125 // popcnt is actually slower on some cpus; |
1124 // popcnt is actually slower on some (older) cpus; |
1126 // and almost equal to ALG3 on modern intel hardware. |
1125 // and almost equal to ALG3 on modern intel hardware. |
1127 // So ALGO3 is good for all |
1126 // So ALGO3 is good for all |
1128 # if 0 && (defined(__GNUC__) || defined(clang)) && (defined(__x86__) || defined(__x86_64__)) |
1127 # if 0 && (defined(__GNUC__) || defined(clang)) && (defined(__x86__) || defined(__x86_64__)) |
1129 # define ALGORITHM_4 |
1128 # define ALGORITHM_4 |
1130 # else |
1129 # else |
1181 _cnt = (_cnt * 0x01010101) >> 24; |
1180 _cnt = (_cnt * 0x01010101) >> 24; |
1182 # endif |
1181 # endif |
1183 |
1182 |
1184 # elif defined( ALGORITHM_4 ) |
1183 # elif defined( ALGORITHM_4 ) |
1185 |
1184 |
1186 // using the popcnt instruction (x86 only); strange enough, this is slower than ALGO3 |
1185 // using the popcnt instruction (x86 only); |
|
1186 // strange enough, this is slower than ALGO3 on some (older) Pentiums |
1187 |
1187 |
1188 # if __POINTER_SIZE__ == 8 |
1188 # if __POINTER_SIZE__ == 8 |
1189 unsigned INT _v; |
1189 unsigned INT _v; |
1190 |
1190 |
1191 #define _POPCNT(__op) \ |
1191 #define _POPCNT(__op) \ |
1303 bitDeinterleave:n |
1303 bitDeinterleave:n |
1304 "extract count integers from an n-way Morton number as a vector; |
1304 "extract count integers from an n-way Morton number as a vector; |
1305 This is the inverse operation from bitInterleave: - see comment there. |
1305 This is the inverse operation from bitInterleave: - see comment there. |
1306 i.e. if count is 3, |
1306 i.e. if count is 3, |
1307 and the receiver's bits are |
1307 and the receiver's bits are |
1308 cN bN aN ... c2 b2 a2 c1 b1 a1 c0 b0 a0 |
1308 cN bN aN ... c2 b2 a2 c1 b1 a1 c0 b0 a0 |
1309 then the result will be a vector containing the numbers a,b,c with bits: |
1309 then the result will be a vector containing the numbers a,b,c with bits: |
1310 aN ... a2 a1 a0 |
1310 aN ... a2 a1 a0 |
1311 bN ... b2 b1 b0 |
1311 bN ... b2 b1 b0 |
1312 cN ... c2 c1 c0." |
1312 cN ... c2 c1 c0. |
|
1313 |
|
1314 Morton numbers are great to linearize 2D coordinates |
|
1315 eg. to sort 2D points by distances" |
1313 |
1316 |
1314 %{ |
1317 %{ |
1315 #if __POINTER_SIZE__ == 8 |
1318 #if __POINTER_SIZE__ == 8 |
1316 |
1319 |
1317 # define M5555555555555555 0x5555555555555555 |
1320 # define M5555555555555555 0x5555555555555555 |
1347 unsigned INT bits = __intVal(self); |
1350 unsigned INT bits = __intVal(self); |
1348 unsigned INT a, b; |
1351 unsigned INT a, b; |
1349 |
1352 |
1350 if (n == __MKSMALLINT(2)) { |
1353 if (n == __MKSMALLINT(2)) { |
1351 # define morton2(x,dst) \ |
1354 # define morton2(x,dst) \ |
1352 { \ |
1355 { \ |
1353 unsigned INT t; \ |
1356 unsigned INT t; \ |
1354 t = (x) & M5555555555555555; \ |
1357 t = (x) & M5555555555555555; \ |
1355 t = (t | (t >> 1)) & M3333333333333333; \ |
1358 t = (t | (t >> 1)) & M3333333333333333; \ |
1356 t = (t | (t >> 2)) & M0f0f0f0f0f0f0f0f; \ |
1359 t = (t | (t >> 2)) & M0f0f0f0f0f0f0f0f; \ |
1357 t = (t | (t >> 4)) & M00ff00ff00ff00ff; \ |
1360 t = (t | (t >> 4)) & M00ff00ff00ff00ff; \ |
1358 t = (t | (t >> 8)) & M0000ffff0000ffff; \ |
1361 t = (t | (t >> 8)) & M0000ffff0000ffff; \ |
1359 t = (t | (t >> 16)) & M00000000ffffffff; \ |
1362 t = (t | (t >> 16)) & M00000000ffffffff; \ |
1360 dst = t; \ |
1363 dst = t; \ |
1361 } |
1364 } |
1362 |
1365 |
1363 morton2(bits, a); |
1366 morton2(bits, a); |
1364 morton2(bits>>1, b); |
1367 morton2(bits>>1, b); |
1365 RETURN (__ARRAY_WITH2(__MKSMALLINT(a), __MKSMALLINT(b))); |
1368 RETURN (__ARRAY_WITH2(__MKSMALLINT(a), __MKSMALLINT(b))); |
1366 } |
1369 } |
1367 |
1370 |
1368 if (n == __MKSMALLINT(3)) { |
1371 if (n == __MKSMALLINT(3)) { |
1369 unsigned INT c; |
1372 unsigned INT c; |
1370 |
1373 |
1371 # define morton3(x,dst) \ |
1374 # define morton3(x,dst) \ |
1372 { \ |
1375 { \ |
1373 unsigned INT t; \ |
1376 unsigned INT t; \ |
1374 t = (x) & M9249249249249249; \ |
1377 t = (x) & M9249249249249249; \ |
1375 t = (t | (t >> 2)) & M30c30c30c30c30c3; \ |
1378 t = (t | (t >> 2)) & M30c30c30c30c30c3; \ |
1376 t = (t | (t >> 4)) & Mf00f00f00f00f00f; \ |
1379 t = (t | (t >> 4)) & Mf00f00f00f00f00f; \ |
1377 t = (t | (t >> 8)) & M00ff0000ff0000ff; \ |
1380 t = (t | (t >> 8)) & M00ff0000ff0000ff; \ |
1378 t = (t | (t >> 16)) & Mffff00000000ffff; \ |
1381 t = (t | (t >> 16)) & Mffff00000000ffff; \ |
1379 t = (t | (t >> 32)) & M00000000ffffffff; \ |
1382 t = (t | (t >> 32)) & M00000000ffffffff; \ |
1380 dst = t; \ |
1383 dst = t; \ |
1381 } |
1384 } |
1382 |
1385 |
1383 morton3(bits, a); |
1386 morton3(bits, a); |
1384 morton3(bits>>1, b); |
1387 morton3(bits>>1, b); |
1385 morton3(bits>>2, c); |
1388 morton3(bits>>2, c); |
1386 RETURN (__ARRAY_WITH3(__MKSMALLINT(a), __MKSMALLINT(b), __MKSMALLINT(c))); |
1389 RETURN (__ARRAY_WITH3(__MKSMALLINT(a), __MKSMALLINT(b), __MKSMALLINT(c))); |
1387 } |
1390 } |
1388 done: ; |
1391 done: ; |
1389 %}. |
1392 %}. |
1390 ^ super bitDeinterleave:n |
1393 ^ super bitDeinterleave:n |
1391 |
1394 |
1395 |
1398 |
1396 (197 bitInterleaveWith:144 and:62) bitDeinterleave:3 -> #(197 144 62) |
1399 (197 bitInterleaveWith:144 and:62) bitDeinterleave:3 -> #(197 144 62) |
1397 |
1400 |
1398 |a b| |
1401 |a b| |
1399 (0 to:31) do:[:bitA | |
1402 (0 to:31) do:[:bitA | |
1400 a := 1 << bitA. |
1403 a := 1 << bitA. |
1401 (0 to:31) do:[:bitB | |
1404 (0 to:31) do:[:bitB | |
1402 b := 1 << bitB. |
1405 b := 1 << bitB. |
1403 self assert:( (a bitInterleaveWith:b) bitDeinterleave:2 ) = {a . b } |
1406 self assert:( (a bitInterleaveWith:b) bitDeinterleave:2 ) = {a . b } |
1404 ]. |
1407 ]. |
1405 ]. |
1408 ]. |
1406 |
1409 |
1407 |a b c| |
1410 |a b c| |
1408 (0 to:31) do:[:bitA | |
1411 (0 to:31) do:[:bitA | |
1409 a := 1 << bitA. |
1412 a := 1 << bitA. |
1410 (0 to:31) do:[:bitB | |
1413 (0 to:31) do:[:bitB | |
1411 b := 1 << bitB. |
1414 b := 1 << bitB. |
1412 (0 to:31) do:[:bitC | |
1415 (0 to:31) do:[:bitC | |
1413 c := 1 << bitC. |
1416 c := 1 << bitC. |
1414 self assert:( (a bitInterleaveWith:b and:c) bitDeinterleave:3 ) = {a . b . c} |
1417 self assert:( (a bitInterleaveWith:b and:c) bitDeinterleave:3 ) = {a . b . c} |
1415 ]. |
1418 ]. |
1416 ]. |
1419 ]. |
1417 ]. |
1420 ]. |
1418 " |
1421 " |
1419 |
1422 |
1420 "Created: / 28-08-2017 / 18:32:48 / cg" |
1423 "Created: / 28-08-2017 / 18:32:48 / cg" |
1421 ! |
1424 ! |