2026-02-19 02:36:36 +01:00
|
|
|
/*
|
|
|
|
|
Crafter®.Math
|
|
|
|
|
Copyright (C) 2026 Catcrafts®
|
|
|
|
|
catcrafts.net
|
|
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
|
License version 3.0 as published by the Free Software Foundation;
|
|
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
export module Crafter.Math:Intersection;
|
2026-05-18 18:03:20 +02:00
|
|
|
import :VectorF32;
|
2026-03-04 15:57:34 +01:00
|
|
|
import :MatrixRowMajor;
|
2026-02-19 02:36:36 +01:00
|
|
|
import std;
|
|
|
|
|
|
|
|
|
|
namespace Crafter {
|
2026-05-18 18:03:20 +02:00
|
|
|
// All intersection tests are batched over four primitives at a time so they
|
|
|
|
|
// feed the VectorF32<3,1>::Dot / Cross / Length / Normalize four-pair
|
|
|
|
|
// overloads directly. The single-primitive case is just "pass the same
|
|
|
|
|
// primitive four times and read lane 0" - there is no single-vector
|
|
|
|
|
// fast-path because the SIMD pipelines want full lanes.
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
// Möller-Trumbore against four triangles sharing one ray. Returns ray
|
|
|
|
|
// parameter t per triangle, or float max where the ray misses.
|
|
|
|
|
export inline VectorF32<1, 4> IntersectionTestRayTriangle(
|
|
|
|
|
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
|
|
|
|
|
VectorF32<3, 1> aV0, VectorF32<3, 1> aV1, VectorF32<3, 1> aV2,
|
|
|
|
|
VectorF32<3, 1> bV0, VectorF32<3, 1> bV1, VectorF32<3, 1> bV2,
|
|
|
|
|
VectorF32<3, 1> cV0, VectorF32<3, 1> cV1, VectorF32<3, 1> cV2,
|
|
|
|
|
VectorF32<3, 1> dV0, VectorF32<3, 1> dV1, VectorF32<3, 1> dV2
|
|
|
|
|
) {
|
|
|
|
|
VectorF32<3, 1> aE1 = aV1 - aV0;
|
|
|
|
|
VectorF32<3, 1> aE2 = aV2 - aV0;
|
|
|
|
|
VectorF32<3, 1> bE1 = bV1 - bV0;
|
|
|
|
|
VectorF32<3, 1> bE2 = bV2 - bV0;
|
|
|
|
|
VectorF32<3, 1> cE1 = cV1 - cV0;
|
|
|
|
|
VectorF32<3, 1> cE2 = cV2 - cV0;
|
|
|
|
|
VectorF32<3, 1> dE1 = dV1 - dV0;
|
|
|
|
|
VectorF32<3, 1> dE2 = dV2 - dV0;
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
VectorF32<3, 1> aH = VectorF32<3, 1>::Cross(rayDir, aE2);
|
|
|
|
|
VectorF32<3, 1> bH = VectorF32<3, 1>::Cross(rayDir, bE2);
|
|
|
|
|
VectorF32<3, 1> cH = VectorF32<3, 1>::Cross(rayDir, cE2);
|
|
|
|
|
VectorF32<3, 1> dH = VectorF32<3, 1>::Cross(rayDir, dE2);
|
|
|
|
|
|
|
|
|
|
VectorF32<3, 1> aS = rayOrigin - aV0;
|
|
|
|
|
VectorF32<3, 1> bS = rayOrigin - bV0;
|
|
|
|
|
VectorF32<3, 1> cS = rayOrigin - cV0;
|
|
|
|
|
VectorF32<3, 1> dS = rayOrigin - dV0;
|
|
|
|
|
|
|
|
|
|
VectorF32<3, 1> aQ = VectorF32<3, 1>::Cross(aS, aE1);
|
|
|
|
|
VectorF32<3, 1> bQ = VectorF32<3, 1>::Cross(bS, bE1);
|
|
|
|
|
VectorF32<3, 1> cQ = VectorF32<3, 1>::Cross(cS, cE1);
|
|
|
|
|
VectorF32<3, 1> dQ = VectorF32<3, 1>::Cross(dS, dE1);
|
|
|
|
|
|
|
|
|
|
// Four 3-component dots packed into one __m128 per call.
|
|
|
|
|
VectorF32<1, 4> det = VectorF32<3, 1>::Dot(
|
|
|
|
|
aE1, aH, bE1, bH, cE1, cH, dE1, dH);
|
|
|
|
|
VectorF32<1, 4> uNum = VectorF32<3, 1>::Dot(
|
|
|
|
|
aS, aH, bS, bH, cS, cH, dS, dH);
|
|
|
|
|
VectorF32<1, 4> vNum = VectorF32<3, 1>::Dot(
|
|
|
|
|
rayDir, aQ, rayDir, bQ, rayDir, cQ, rayDir, dQ);
|
|
|
|
|
VectorF32<1, 4> tNum = VectorF32<3, 1>::Dot(
|
|
|
|
|
aE2, aQ, bE2, bQ, cE2, cQ, dE2, dQ);
|
|
|
|
|
|
|
|
|
|
std::array<float, 4> detArr = det.template Store<float>();
|
|
|
|
|
std::array<float, 4> uArr = uNum.template Store<float>();
|
|
|
|
|
std::array<float, 4> vArr = vNum.template Store<float>();
|
|
|
|
|
std::array<float, 4> tArr = tNum.template Store<float>();
|
|
|
|
|
|
|
|
|
|
constexpr float eps = std::numeric_limits<float>::epsilon();
|
|
|
|
|
constexpr float maxF = std::numeric_limits<float>::max();
|
|
|
|
|
alignas(16) std::array<float, 4> out{};
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
float d = detArr[i];
|
|
|
|
|
if (d <= eps) { out[i] = maxF; continue; }
|
|
|
|
|
float invD = 1.0f / d;
|
|
|
|
|
float u = uArr[i] * invD;
|
|
|
|
|
if (u < 0.0f || u > 1.0f) { out[i] = maxF; continue; }
|
|
|
|
|
float v = vArr[i] * invD;
|
|
|
|
|
if (v < 0.0f || u + v > 1.0f) { out[i] = maxF; continue; }
|
|
|
|
|
out[i] = tArr[i] * invD;
|
2026-02-19 02:36:36 +01:00
|
|
|
}
|
2026-05-18 18:03:20 +02:00
|
|
|
return VectorF32<1, 4>(out.data());
|
2026-02-19 02:36:36 +01:00
|
|
|
}
|
|
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
// One ray against four spheres. radii must hold {rA, rB, rC, rD} in lanes
|
|
|
|
|
// 0..3.
|
|
|
|
|
export inline VectorF32<1, 4> IntersectionTestRaySphere(
|
|
|
|
|
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
|
|
|
|
|
VectorF32<3, 1> posA, VectorF32<3, 1> posB,
|
|
|
|
|
VectorF32<3, 1> posC, VectorF32<3, 1> posD,
|
|
|
|
|
VectorF32<1, 4> radii
|
|
|
|
|
) {
|
|
|
|
|
VectorF32<3, 1> sA = rayOrigin - posA;
|
|
|
|
|
VectorF32<3, 1> sB = rayOrigin - posB;
|
|
|
|
|
VectorF32<3, 1> sC = rayOrigin - posC;
|
|
|
|
|
VectorF32<3, 1> sD = rayOrigin - posD;
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
// dirDotS_i = rayDir · (rayOrigin - pos_i)
|
|
|
|
|
VectorF32<1, 4> dirDotS = VectorF32<3, 1>::Dot(
|
|
|
|
|
rayDir, sA, rayDir, sB, rayDir, sC, rayDir, sD);
|
|
|
|
|
// sqDist_i = |rayOrigin - pos_i|² (a.k.a. LengthSq of the s vectors)
|
|
|
|
|
VectorF32<1, 4> sqDist = VectorF32<3, 1>::LengthSq(sA, sB, sC, sD);
|
|
|
|
|
// aScalar = rayDir · rayDir, broadcast across four lanes.
|
|
|
|
|
VectorF32<1, 4> aScalar = VectorF32<3, 1>::LengthSq(
|
|
|
|
|
rayDir, rayDir, rayDir, rayDir);
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
VectorF32<1, 4> two(2.0f);
|
|
|
|
|
VectorF32<1, 4> four(4.0f);
|
|
|
|
|
VectorF32<1, 4> b = two * dirDotS;
|
|
|
|
|
VectorF32<1, 4> c = sqDist - radii * radii;
|
|
|
|
|
// discriminant = b² - 4·a·c
|
|
|
|
|
VectorF32<1, 4> disc = b * b - four * aScalar * c;
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
std::array<float, 4> discArr = disc.template Store<float>();
|
|
|
|
|
std::array<float, 4> bArr = b.template Store<float>();
|
|
|
|
|
std::array<float, 4> aArr = aScalar.template Store<float>();
|
|
|
|
|
|
|
|
|
|
constexpr float maxF = std::numeric_limits<float>::max();
|
|
|
|
|
alignas(16) std::array<float, 4> out{};
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
float d = discArr[i];
|
|
|
|
|
if (d < 0.0f) { out[i] = maxF; continue; }
|
|
|
|
|
float sqrtD = std::sqrt(d);
|
|
|
|
|
float t = -0.5f * (bArr[i] + sqrtD) / aArr[i];
|
|
|
|
|
out[i] = (t > 0.0f) ? t : maxF;
|
2026-02-19 02:36:36 +01:00
|
|
|
}
|
2026-05-18 18:03:20 +02:00
|
|
|
return VectorF32<1, 4>(out.data());
|
2026-02-19 02:36:36 +01:00
|
|
|
}
|
|
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
// One ray against four OBBs. Each box is described by world-space position,
|
|
|
|
|
// half-extent vector (per-axis sizes), and a unit quaternion rotation.
|
|
|
|
|
export inline VectorF32<1, 4> IntersectionTestRayOrientedBox(
|
|
|
|
|
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
|
|
|
|
|
VectorF32<3, 1> posA, VectorF32<3, 1> sizeA, VectorF32<4, 1> rotA,
|
|
|
|
|
VectorF32<3, 1> posB, VectorF32<3, 1> sizeB, VectorF32<4, 1> rotB,
|
|
|
|
|
VectorF32<3, 1> posC, VectorF32<3, 1> sizeC, VectorF32<4, 1> rotC,
|
|
|
|
|
VectorF32<3, 1> posD, VectorF32<3, 1> sizeD, VectorF32<4, 1> rotD
|
|
|
|
|
) {
|
|
|
|
|
// Conjugate quaternion: negate xyz, keep w. Negate<{true,true,true,false}>
|
|
|
|
|
// is constant-folded into a single XOR with a mask vector.
|
|
|
|
|
VectorF32<4, 1> invRotA = rotA.template Negate<{{true, true, true, false}}>();
|
|
|
|
|
VectorF32<4, 1> invRotB = rotB.template Negate<{{true, true, true, false}}>();
|
|
|
|
|
VectorF32<4, 1> invRotC = rotC.template Negate<{{true, true, true, false}}>();
|
|
|
|
|
VectorF32<4, 1> invRotD = rotD.template Negate<{{true, true, true, false}}>();
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
VectorF32<3, 1> localOriginA = VectorF32<3, 1>::Rotate(rayOrigin - posA, invRotA);
|
|
|
|
|
VectorF32<3, 1> localOriginB = VectorF32<3, 1>::Rotate(rayOrigin - posB, invRotB);
|
|
|
|
|
VectorF32<3, 1> localOriginC = VectorF32<3, 1>::Rotate(rayOrigin - posC, invRotC);
|
|
|
|
|
VectorF32<3, 1> localOriginD = VectorF32<3, 1>::Rotate(rayOrigin - posD, invRotD);
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
VectorF32<3, 1> localDirA = VectorF32<3, 1>::Rotate(rayDir, invRotA);
|
|
|
|
|
VectorF32<3, 1> localDirB = VectorF32<3, 1>::Rotate(rayDir, invRotB);
|
|
|
|
|
VectorF32<3, 1> localDirC = VectorF32<3, 1>::Rotate(rayDir, invRotC);
|
|
|
|
|
VectorF32<3, 1> localDirD = VectorF32<3, 1>::Rotate(rayDir, invRotD);
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
VectorF32<3, 1> halfA = sizeA * 0.5f;
|
|
|
|
|
VectorF32<3, 1> halfB = sizeB * 0.5f;
|
|
|
|
|
VectorF32<3, 1> halfC = sizeC * 0.5f;
|
|
|
|
|
VectorF32<3, 1> halfD = sizeD * 0.5f;
|
2026-02-19 02:36:36 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
std::array<std::array<float, 4>, 4> origLanes{
|
|
|
|
|
localOriginA.template Store<float>(),
|
|
|
|
|
localOriginB.template Store<float>(),
|
|
|
|
|
localOriginC.template Store<float>(),
|
|
|
|
|
localOriginD.template Store<float>(),
|
|
|
|
|
};
|
|
|
|
|
std::array<std::array<float, 4>, 4> dirLanes{
|
|
|
|
|
localDirA.template Store<float>(),
|
|
|
|
|
localDirB.template Store<float>(),
|
|
|
|
|
localDirC.template Store<float>(),
|
|
|
|
|
localDirD.template Store<float>(),
|
|
|
|
|
};
|
|
|
|
|
std::array<std::array<float, 4>, 4> halfLanes{
|
|
|
|
|
halfA.template Store<float>(),
|
|
|
|
|
halfB.template Store<float>(),
|
|
|
|
|
halfC.template Store<float>(),
|
|
|
|
|
halfD.template Store<float>(),
|
2026-03-04 15:57:34 +01:00
|
|
|
};
|
|
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
constexpr float eps = std::numeric_limits<float>::epsilon();
|
|
|
|
|
constexpr float maxF = std::numeric_limits<float>::max();
|
|
|
|
|
alignas(16) std::array<float, 4> out{};
|
|
|
|
|
for (std::uint8_t b = 0; b < 4; ++b) {
|
|
|
|
|
float tMin = 0.0f;
|
|
|
|
|
float tMax = maxF;
|
|
|
|
|
bool miss = false;
|
|
|
|
|
for (std::uint8_t i = 0; i < 3; ++i) {
|
|
|
|
|
float d = dirLanes[b][i];
|
|
|
|
|
float o = origLanes[b][i];
|
|
|
|
|
float h = halfLanes[b][i];
|
|
|
|
|
if (std::abs(d) < eps) {
|
|
|
|
|
if (o < -h || o > h) { miss = true; break; }
|
|
|
|
|
} else {
|
|
|
|
|
float invD = 1.0f / d;
|
|
|
|
|
float t1 = (-h - o) * invD;
|
|
|
|
|
float t2 = ( h - o) * invD;
|
|
|
|
|
if (t1 > t2) std::swap(t1, t2);
|
|
|
|
|
tMin = std::max(tMin, t1);
|
|
|
|
|
tMax = std::min(tMax, t2);
|
|
|
|
|
if (tMin > tMax) { miss = true; break; }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out[b] = miss ? maxF : (tMin >= 0.0f ? tMin : tMax);
|
|
|
|
|
}
|
|
|
|
|
return VectorF32<1, 4>(out.data());
|
2026-03-04 15:57:34 +01:00
|
|
|
}
|
|
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
// One sphere against four OBBs. boxMatrix encodes rotation in m[r][0..2]
|
|
|
|
|
// and translation in m[r][3].
|
|
|
|
|
export inline VectorF32<1, 4> IntersectionTestSphereOrientedBox(
|
|
|
|
|
VectorF32<3, 1> sphereCenter, VectorF32<1, 4> radii,
|
|
|
|
|
VectorF32<3, 1> sizeA, MatrixRowMajor<float, 4, 3, 1> boxA,
|
|
|
|
|
VectorF32<3, 1> sizeB, MatrixRowMajor<float, 4, 3, 1> boxB,
|
|
|
|
|
VectorF32<3, 1> sizeC, MatrixRowMajor<float, 4, 3, 1> boxC,
|
|
|
|
|
VectorF32<3, 1> sizeD, MatrixRowMajor<float, 4, 3, 1> boxD
|
|
|
|
|
) {
|
|
|
|
|
auto perBox = [&](MatrixRowMajor<float, 4, 3, 1> const& m,
|
|
|
|
|
VectorF32<3, 1> const& size,
|
|
|
|
|
VectorF32<3, 1>& xAxis,
|
|
|
|
|
VectorF32<3, 1>& yAxis,
|
|
|
|
|
VectorF32<3, 1>& zAxis,
|
|
|
|
|
VectorF32<3, 1>& delta) {
|
|
|
|
|
// Existing semantics: the OBB axes are read from the rows of the
|
|
|
|
|
// upper 3x3 block, and the translation column is gathered from the
|
|
|
|
|
// w lane of each row.
|
|
|
|
|
std::array<float, 4> r0 = m.rows[0].template Store<float>();
|
|
|
|
|
std::array<float, 4> r1 = m.rows[1].template Store<float>();
|
|
|
|
|
std::array<float, 4> r2 = m.rows[2].template Store<float>();
|
|
|
|
|
alignas(16) float xBuf[4] = { r0[0], r0[1], r0[2], 0.0f };
|
|
|
|
|
alignas(16) float yBuf[4] = { r1[0], r1[1], r1[2], 0.0f };
|
|
|
|
|
alignas(16) float zBuf[4] = { r2[0], r2[1], r2[2], 0.0f };
|
|
|
|
|
alignas(16) float oBuf[4] = { r0[3], r1[3], r2[3], 0.0f };
|
|
|
|
|
xAxis = VectorF32<3, 1>(xBuf);
|
|
|
|
|
yAxis = VectorF32<3, 1>(yBuf);
|
|
|
|
|
zAxis = VectorF32<3, 1>(zBuf);
|
|
|
|
|
VectorF32<3, 1> origin(oBuf);
|
|
|
|
|
delta = sphereCenter - origin;
|
|
|
|
|
(void)size;
|
|
|
|
|
};
|
2026-03-04 15:57:34 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
VectorF32<3, 1> xA, yA, zA, dA;
|
|
|
|
|
VectorF32<3, 1> xB, yB, zB, dB;
|
|
|
|
|
VectorF32<3, 1> xC, yC, zC, dC;
|
|
|
|
|
VectorF32<3, 1> xD, yD, zD, dD;
|
|
|
|
|
perBox(boxA, sizeA, xA, yA, zA, dA);
|
|
|
|
|
perBox(boxB, sizeB, xB, yB, zB, dB);
|
|
|
|
|
perBox(boxC, sizeC, xC, yC, zC, dC);
|
|
|
|
|
perBox(boxD, sizeD, xD, yD, zD, dD);
|
2026-03-04 15:57:34 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
// Local sphere center per box: project delta onto each box axis. We
|
|
|
|
|
// produce {lx, ly, lz, lx, ly, lz, lx, ly, lz, lx, ly, lz} as three
|
|
|
|
|
// packed 4-wide Dot results (one Dot per axis).
|
|
|
|
|
VectorF32<1, 4> locX = VectorF32<3, 1>::Dot(
|
|
|
|
|
dA, xA, dB, xB, dC, xC, dD, xD);
|
|
|
|
|
VectorF32<1, 4> locY = VectorF32<3, 1>::Dot(
|
|
|
|
|
dA, yA, dB, yB, dC, yC, dD, yD);
|
|
|
|
|
VectorF32<1, 4> locZ = VectorF32<3, 1>::Dot(
|
|
|
|
|
dA, zA, dB, zB, dC, zC, dD, zD);
|
|
|
|
|
|
|
|
|
|
std::array<float, 4> lxArr = locX.template Store<float>();
|
|
|
|
|
std::array<float, 4> lyArr = locY.template Store<float>();
|
|
|
|
|
std::array<float, 4> lzArr = locZ.template Store<float>();
|
|
|
|
|
std::array<float, 4> rArr = radii.template Store<float>();
|
|
|
|
|
std::array<std::array<float, 4>, 4> sizeLanes{
|
|
|
|
|
sizeA.template Store<float>(),
|
|
|
|
|
sizeB.template Store<float>(),
|
|
|
|
|
sizeC.template Store<float>(),
|
|
|
|
|
sizeD.template Store<float>(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
alignas(16) std::array<float, 4> out{};
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
float lx = lxArr[i], ly = lyArr[i], lz = lzArr[i];
|
|
|
|
|
float sx = sizeLanes[i][0], sy = sizeLanes[i][1], sz = sizeLanes[i][2];
|
|
|
|
|
float cx = std::clamp(lx, -sx, sx);
|
|
|
|
|
float cy = std::clamp(ly, -sy, sy);
|
|
|
|
|
float cz = std::clamp(lz, -sz, sz);
|
|
|
|
|
float dx = lx - cx, dy = ly - cy, dz = lz - cz;
|
|
|
|
|
float distSq = dx * dx + dy * dy + dz * dz;
|
|
|
|
|
float r = rArr[i];
|
|
|
|
|
// Returns 0.0 on hit, max on miss - keeps a consistent
|
|
|
|
|
// "t-like" output signature with the other intersection tests.
|
|
|
|
|
out[i] = (distSq <= r * r) ? 0.0f : std::numeric_limits<float>::max();
|
|
|
|
|
}
|
|
|
|
|
return VectorF32<1, 4>(out.data());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Eight local corners of a unit OBB transformed by `matrix`. Uses one
|
|
|
|
|
// batched 4-pair Dot per result row (x, y, z), reproducing the corners in
|
|
|
|
|
// two groups of four.
|
|
|
|
|
export inline std::array<VectorF32<3, 1>, 8> GetOBBCorners(
|
|
|
|
|
VectorF32<3, 1> size, MatrixRowMajor<float, 4, 3, 1> matrix
|
|
|
|
|
) {
|
|
|
|
|
std::array<float, 4> sz = size.template Store<float>();
|
|
|
|
|
const float sx = sz[0], sy = sz[1], sz_ = sz[2];
|
|
|
|
|
|
|
|
|
|
VectorF32<4, 1> mx = matrix.rows[0];
|
|
|
|
|
VectorF32<4, 1> my = matrix.rows[1];
|
|
|
|
|
VectorF32<4, 1> mz = matrix.rows[2];
|
|
|
|
|
|
|
|
|
|
// Eight homogeneous corner vectors with w=1 so the translation column
|
|
|
|
|
// of `matrix` participates in the dot product.
|
|
|
|
|
alignas(16) float c0[4] = { -sx, -sy, -sz_, 1.0f };
|
|
|
|
|
alignas(16) float c1[4] = { sx, -sy, -sz_, 1.0f };
|
|
|
|
|
alignas(16) float c2[4] = { -sx, sy, -sz_, 1.0f };
|
|
|
|
|
alignas(16) float c3[4] = { sx, sy, -sz_, 1.0f };
|
|
|
|
|
alignas(16) float c4[4] = { -sx, -sy, sz_, 1.0f };
|
|
|
|
|
alignas(16) float c5[4] = { sx, -sy, sz_, 1.0f };
|
|
|
|
|
alignas(16) float c6[4] = { -sx, sy, sz_, 1.0f };
|
|
|
|
|
alignas(16) float c7[4] = { sx, sy, sz_, 1.0f };
|
|
|
|
|
VectorF32<4, 1> v0(c0), v1(c1), v2(c2), v3(c3);
|
|
|
|
|
VectorF32<4, 1> v4(c4), v5(c5), v6(c6), v7(c7);
|
|
|
|
|
|
|
|
|
|
// First four corners (0..3): batch x, y, z dots.
|
|
|
|
|
VectorF32<1, 4> xLo = VectorF32<4, 1>::Dot(mx, v0, mx, v1, mx, v2, mx, v3);
|
|
|
|
|
VectorF32<1, 4> yLo = VectorF32<4, 1>::Dot(my, v0, my, v1, my, v2, my, v3);
|
|
|
|
|
VectorF32<1, 4> zLo = VectorF32<4, 1>::Dot(mz, v0, mz, v1, mz, v2, mz, v3);
|
|
|
|
|
// Second four corners (4..7).
|
|
|
|
|
VectorF32<1, 4> xHi = VectorF32<4, 1>::Dot(mx, v4, mx, v5, mx, v6, mx, v7);
|
|
|
|
|
VectorF32<1, 4> yHi = VectorF32<4, 1>::Dot(my, v4, my, v5, my, v6, my, v7);
|
|
|
|
|
VectorF32<1, 4> zHi = VectorF32<4, 1>::Dot(mz, v4, mz, v5, mz, v6, mz, v7);
|
|
|
|
|
|
|
|
|
|
std::array<float, 4> xLoA = xLo.template Store<float>();
|
|
|
|
|
std::array<float, 4> yLoA = yLo.template Store<float>();
|
|
|
|
|
std::array<float, 4> zLoA = zLo.template Store<float>();
|
|
|
|
|
std::array<float, 4> xHiA = xHi.template Store<float>();
|
|
|
|
|
std::array<float, 4> yHiA = yHi.template Store<float>();
|
|
|
|
|
std::array<float, 4> zHiA = zHi.template Store<float>();
|
|
|
|
|
|
|
|
|
|
std::array<VectorF32<3, 1>, 8> result;
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
alignas(16) float buf[4] = { xLoA[i], yLoA[i], zLoA[i], 0.0f };
|
|
|
|
|
result[i] = VectorF32<3, 1>(buf);
|
|
|
|
|
}
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
alignas(16) float buf[4] = { xHiA[i], yHiA[i], zHiA[i], 0.0f };
|
|
|
|
|
result[4 + i] = VectorF32<3, 1>(buf);
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SAT against fifteen separating axes (3 box-A, 3 box-B, 9 cross products).
|
|
|
|
|
// We compute every corner projection with batched 4-pair Dots: each axis
|
|
|
|
|
// projects four corners per call, two calls per axis covers the 8 corners.
|
|
|
|
|
export inline bool IntersectionTestOrientedBoxOrientedBox(
|
|
|
|
|
VectorF32<3, 1> sizeA, MatrixRowMajor<float, 4, 3, 1> boxA,
|
|
|
|
|
VectorF32<3, 1> sizeB, MatrixRowMajor<float, 4, 3, 1> boxB
|
|
|
|
|
) {
|
|
|
|
|
std::array<VectorF32<3, 1>, 8> cornersA = GetOBBCorners(sizeA, boxA);
|
|
|
|
|
std::array<VectorF32<3, 1>, 8> cornersB = GetOBBCorners(sizeB, boxB);
|
|
|
|
|
|
|
|
|
|
// Axes are the upper-3 lanes of each matrix row (same convention as
|
|
|
|
|
// SphereOrientedBox). ExtractLo<3> just retypes the SIMD register; the
|
|
|
|
|
// 4th lane is ignored by the Len=3 ops below.
|
|
|
|
|
std::array<VectorF32<3, 1>, 3> axesA = {
|
|
|
|
|
boxA.rows[0].template ExtractLo<3>(),
|
|
|
|
|
boxA.rows[1].template ExtractLo<3>(),
|
|
|
|
|
boxA.rows[2].template ExtractLo<3>(),
|
|
|
|
|
};
|
|
|
|
|
std::array<VectorF32<3, 1>, 3> axesB = {
|
|
|
|
|
boxB.rows[0].template ExtractLo<3>(),
|
|
|
|
|
boxB.rows[1].template ExtractLo<3>(),
|
|
|
|
|
boxB.rows[2].template ExtractLo<3>(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::array<VectorF32<3, 1>, 15> axes{};
|
|
|
|
|
axes[0] = axesA[0]; axes[1] = axesA[1]; axes[2] = axesA[2];
|
|
|
|
|
axes[3] = axesB[0]; axes[4] = axesB[1]; axes[5] = axesB[2];
|
|
|
|
|
// Normalize all nine cross axes together with a single batched
|
|
|
|
|
// Normalize call (Packing=3 not in the API, so two calls of four +
|
|
|
|
|
// one of one would be needed; for now just normalize in two batches
|
|
|
|
|
// of four and the trailing one inline).
|
|
|
|
|
std::array<VectorF32<3, 1>, 9> crossAxes{};
|
|
|
|
|
std::uint8_t k = 0;
|
|
|
|
|
for (std::uint8_t i = 0; i < 3; ++i) {
|
|
|
|
|
for (std::uint8_t j = 0; j < 3; ++j) {
|
|
|
|
|
crossAxes[k++] = VectorF32<3, 1>::Cross(axesA[i], axesB[j]);
|
2026-03-04 15:57:34 +01:00
|
|
|
}
|
|
|
|
|
}
|
2026-05-18 18:03:20 +02:00
|
|
|
auto norm0 = VectorF32<3, 1>::Normalize(crossAxes[0], crossAxes[1], crossAxes[2], crossAxes[3]);
|
|
|
|
|
auto norm1 = VectorF32<3, 1>::Normalize(crossAxes[4], crossAxes[5], crossAxes[6], crossAxes[7]);
|
|
|
|
|
auto norm2 = VectorF32<3, 1>::Normalize(crossAxes[8], crossAxes[8], crossAxes[8], crossAxes[8]);
|
|
|
|
|
axes[6] = std::get<0>(norm0);
|
|
|
|
|
axes[7] = std::get<1>(norm0);
|
|
|
|
|
axes[8] = std::get<2>(norm0);
|
|
|
|
|
axes[9] = std::get<3>(norm0);
|
|
|
|
|
axes[10] = std::get<0>(norm1);
|
|
|
|
|
axes[11] = std::get<1>(norm1);
|
|
|
|
|
axes[12] = std::get<2>(norm1);
|
|
|
|
|
axes[13] = std::get<3>(norm1);
|
|
|
|
|
axes[14] = std::get<0>(norm2);
|
2026-03-04 15:57:34 +01:00
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
for (std::uint8_t axisIdx = 0; axisIdx < 15; ++axisIdx) {
|
|
|
|
|
VectorF32<3, 1> axis = axes[axisIdx];
|
|
|
|
|
// Project all 8 corners of each box onto `axis` using two batched
|
|
|
|
|
// 4-pair Dot calls (lo and hi corners).
|
|
|
|
|
VectorF32<1, 4> projA_lo = VectorF32<3, 1>::Dot(
|
|
|
|
|
cornersA[0], axis, cornersA[1], axis,
|
|
|
|
|
cornersA[2], axis, cornersA[3], axis);
|
|
|
|
|
VectorF32<1, 4> projA_hi = VectorF32<3, 1>::Dot(
|
|
|
|
|
cornersA[4], axis, cornersA[5], axis,
|
|
|
|
|
cornersA[6], axis, cornersA[7], axis);
|
|
|
|
|
VectorF32<1, 4> projB_lo = VectorF32<3, 1>::Dot(
|
|
|
|
|
cornersB[0], axis, cornersB[1], axis,
|
|
|
|
|
cornersB[2], axis, cornersB[3], axis);
|
|
|
|
|
VectorF32<1, 4> projB_hi = VectorF32<3, 1>::Dot(
|
|
|
|
|
cornersB[4], axis, cornersB[5], axis,
|
|
|
|
|
cornersB[6], axis, cornersB[7], axis);
|
|
|
|
|
|
|
|
|
|
std::array<float, 4> aLo = projA_lo.template Store<float>();
|
|
|
|
|
std::array<float, 4> aHi = projA_hi.template Store<float>();
|
|
|
|
|
std::array<float, 4> bLo = projB_lo.template Store<float>();
|
|
|
|
|
std::array<float, 4> bHi = projB_hi.template Store<float>();
|
|
|
|
|
|
|
|
|
|
float minA = aLo[0], maxA = aLo[0];
|
|
|
|
|
for (std::uint8_t i = 1; i < 4; ++i) {
|
|
|
|
|
minA = std::min(minA, aLo[i]);
|
|
|
|
|
maxA = std::max(maxA, aLo[i]);
|
|
|
|
|
}
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
minA = std::min(minA, aHi[i]);
|
|
|
|
|
maxA = std::max(maxA, aHi[i]);
|
|
|
|
|
}
|
|
|
|
|
float minB = bLo[0], maxB = bLo[0];
|
|
|
|
|
for (std::uint8_t i = 1; i < 4; ++i) {
|
|
|
|
|
minB = std::min(minB, bLo[i]);
|
|
|
|
|
maxB = std::max(maxB, bLo[i]);
|
|
|
|
|
}
|
|
|
|
|
for (std::uint8_t i = 0; i < 4; ++i) {
|
|
|
|
|
minB = std::min(minB, bHi[i]);
|
|
|
|
|
maxB = std::max(maxB, bHi[i]);
|
2026-03-04 15:57:34 +01:00
|
|
|
}
|
|
|
|
|
|
2026-05-18 18:03:20 +02:00
|
|
|
if (maxA < minB || maxB < minA) return false;
|
2026-03-04 15:57:34 +01:00
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2026-05-18 18:03:20 +02:00
|
|
|
}
|