matrix operations

This commit is contained in:
Jorijn van der Graaf 2026-05-18 18:03:20 +02:00
commit ad5ba21b4d
6 changed files with 1433 additions and 613 deletions

View file

@ -18,207 +18,432 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
export module Crafter.Math:Intersection;
import :Vector;
import :VectorF32;
import :MatrixRowMajor;
import std;
namespace Crafter {
export template<typename T>
constexpr T IntersectionTestRayTriangle(Vector<T, 3, 0> vert0, Vector<T, 3, 0> vert1, Vector<T, 3, 0> vert2, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) {
Vector<T, 3, 0> edge1 = vert1 - vert0;
Vector<T, 3, 0> edge2 = vert2 - vert0;
// All intersection tests are batched over four primitives at a time so they
// feed the VectorF32<3,1>::Dot / Cross / Length / Normalize four-pair
// overloads directly. The single-primitive case is just "pass the same
// primitive four times and read lane 0" - there is no single-vector
// fast-path because the SIMD pipelines want full lanes.
Vector<T, 3, 0> h = Vector<T, 3, 0>::Cross(rayDir, edge2);
T determinant = Vector<T, 3, 0>::Dot(edge1, h);
// Möller-Trumbore against four triangles sharing one ray. Returns ray
// parameter t per triangle, or float max where the ray misses.
export inline VectorF32<1, 4> IntersectionTestRayTriangle(
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
VectorF32<3, 1> aV0, VectorF32<3, 1> aV1, VectorF32<3, 1> aV2,
VectorF32<3, 1> bV0, VectorF32<3, 1> bV1, VectorF32<3, 1> bV2,
VectorF32<3, 1> cV0, VectorF32<3, 1> cV1, VectorF32<3, 1> cV2,
VectorF32<3, 1> dV0, VectorF32<3, 1> dV1, VectorF32<3, 1> dV2
) {
VectorF32<3, 1> aE1 = aV1 - aV0;
VectorF32<3, 1> aE2 = aV2 - aV0;
VectorF32<3, 1> bE1 = bV1 - bV0;
VectorF32<3, 1> bE2 = bV2 - bV0;
VectorF32<3, 1> cE1 = cV1 - cV0;
VectorF32<3, 1> cE2 = cV2 - cV0;
VectorF32<3, 1> dE1 = dV1 - dV0;
VectorF32<3, 1> dE2 = dV2 - dV0;
if (determinant <= std::numeric_limits<T>::epsilon()) {
return std::numeric_limits<T>::max();
VectorF32<3, 1> aH = VectorF32<3, 1>::Cross(rayDir, aE2);
VectorF32<3, 1> bH = VectorF32<3, 1>::Cross(rayDir, bE2);
VectorF32<3, 1> cH = VectorF32<3, 1>::Cross(rayDir, cE2);
VectorF32<3, 1> dH = VectorF32<3, 1>::Cross(rayDir, dE2);
VectorF32<3, 1> aS = rayOrigin - aV0;
VectorF32<3, 1> bS = rayOrigin - bV0;
VectorF32<3, 1> cS = rayOrigin - cV0;
VectorF32<3, 1> dS = rayOrigin - dV0;
VectorF32<3, 1> aQ = VectorF32<3, 1>::Cross(aS, aE1);
VectorF32<3, 1> bQ = VectorF32<3, 1>::Cross(bS, bE1);
VectorF32<3, 1> cQ = VectorF32<3, 1>::Cross(cS, cE1);
VectorF32<3, 1> dQ = VectorF32<3, 1>::Cross(dS, dE1);
// Four 3-component dots packed into one __m128 per call.
VectorF32<1, 4> det = VectorF32<3, 1>::Dot(
aE1, aH, bE1, bH, cE1, cH, dE1, dH);
VectorF32<1, 4> uNum = VectorF32<3, 1>::Dot(
aS, aH, bS, bH, cS, cH, dS, dH);
VectorF32<1, 4> vNum = VectorF32<3, 1>::Dot(
rayDir, aQ, rayDir, bQ, rayDir, cQ, rayDir, dQ);
VectorF32<1, 4> tNum = VectorF32<3, 1>::Dot(
aE2, aQ, bE2, bQ, cE2, cQ, dE2, dQ);
std::array<float, 4> detArr = det.template Store<float>();
std::array<float, 4> uArr = uNum.template Store<float>();
std::array<float, 4> vArr = vNum.template Store<float>();
std::array<float, 4> tArr = tNum.template Store<float>();
constexpr float eps = std::numeric_limits<float>::epsilon();
constexpr float maxF = std::numeric_limits<float>::max();
alignas(16) std::array<float, 4> out{};
for (std::uint8_t i = 0; i < 4; ++i) {
float d = detArr[i];
if (d <= eps) { out[i] = maxF; continue; }
float invD = 1.0f / d;
float u = uArr[i] * invD;
if (u < 0.0f || u > 1.0f) { out[i] = maxF; continue; }
float v = vArr[i] * invD;
if (v < 0.0f || u + v > 1.0f) { out[i] = maxF; continue; }
out[i] = tArr[i] * invD;
}
T inverse_determinant = T(1) / determinant;
Vector<T, 3, 0> origins_diff_vector = rayOrigin - vert0;
T u = Vector<T, 3, 0>::Dot(origins_diff_vector, h) * inverse_determinant;
if (u < 0.0 || u > 1.0)
{
return std::numeric_limits<T>::max();
}
Vector<T, 3, 0> q = Vector<T, 3, 0>::Cross(origins_diff_vector, edge1);
T v = inverse_determinant * Vector<T, 3, 0>::Dot(rayDir, q);
if (v < 0.0 || u + v > 1.0) {
return std::numeric_limits<T>::max();
}
return inverse_determinant * Vector<T, 3, 0>::Dot(edge2, q);
return VectorF32<1, 4>(out.data());
}
export template<typename T>
constexpr T IntersectionTestRaySphere(Vector<T, 3, 0> position, T radius, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) {
T a = Vector<T, 3, 0>::Dot(rayDir, rayDir);
T b = Vector<T, 3, 0>::Dot(rayDir, (T(2) * (rayOrigin - position)));
T c = Vector<T, 3, 0>::Dot(position, position) + Vector<T, 3, 0>::Dot(rayOrigin, rayOrigin) - T(2) * Vector<T, 3, 0>::Dot(rayOrigin, position) - radius * radius;
T d = b * b + (T(-4)) * a * c;
// One ray against four spheres. radii must hold {rA, rB, rC, rD} in lanes
// 0..3.
export inline VectorF32<1, 4> IntersectionTestRaySphere(
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
VectorF32<3, 1> posA, VectorF32<3, 1> posB,
VectorF32<3, 1> posC, VectorF32<3, 1> posD,
VectorF32<1, 4> radii
) {
VectorF32<3, 1> sA = rayOrigin - posA;
VectorF32<3, 1> sB = rayOrigin - posB;
VectorF32<3, 1> sC = rayOrigin - posC;
VectorF32<3, 1> sD = rayOrigin - posD;
if (d < 0) {
return std::numeric_limits<T>::max();
}
// dirDotS_i = rayDir · (rayOrigin - pos_i)
VectorF32<1, 4> dirDotS = VectorF32<3, 1>::Dot(
rayDir, sA, rayDir, sB, rayDir, sC, rayDir, sD);
// sqDist_i = |rayOrigin - pos_i|² (a.k.a. LengthSq of the s vectors)
VectorF32<1, 4> sqDist = VectorF32<3, 1>::LengthSq(sA, sB, sC, sD);
// aScalar = rayDir · rayDir, broadcast across four lanes.
VectorF32<1, 4> aScalar = VectorF32<3, 1>::LengthSq(
rayDir, rayDir, rayDir, rayDir);
d = std::sqrt(d);
VectorF32<1, 4> two(2.0f);
VectorF32<1, 4> four(4.0f);
VectorF32<1, 4> b = two * dirDotS;
VectorF32<1, 4> c = sqDist - radii * radii;
// discriminant = b² - 4·a·c
VectorF32<1, 4> disc = b * b - four * aScalar * c;
T t = (T(-0.5)) * (b + d) / a;
if (t > T(0)) {
return t;
} else {
return std::numeric_limits<T>::max();
std::array<float, 4> discArr = disc.template Store<float>();
std::array<float, 4> bArr = b.template Store<float>();
std::array<float, 4> aArr = aScalar.template Store<float>();
constexpr float maxF = std::numeric_limits<float>::max();
alignas(16) std::array<float, 4> out{};
for (std::uint8_t i = 0; i < 4; ++i) {
float d = discArr[i];
if (d < 0.0f) { out[i] = maxF; continue; }
float sqrtD = std::sqrt(d);
float t = -0.5f * (bArr[i] + sqrtD) / aArr[i];
out[i] = (t > 0.0f) ? t : maxF;
}
return VectorF32<1, 4>(out.data());
}
export template<typename T>
constexpr T IntersectionTestRayOrientedBox(Vector<T, 3, 0> boxPosition, Vector<T, 3, 0> boxSize, Vector<T, 4, 0> boxRotation, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) {
Vector<T, 4, 0> invRot(
-boxRotation.x,
-boxRotation.y,
-boxRotation.z,
boxRotation.w
);
// One ray against four OBBs. Each box is described by world-space position,
// half-extent vector (per-axis sizes), and a unit quaternion rotation.
export inline VectorF32<1, 4> IntersectionTestRayOrientedBox(
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
VectorF32<3, 1> posA, VectorF32<3, 1> sizeA, VectorF32<4, 1> rotA,
VectorF32<3, 1> posB, VectorF32<3, 1> sizeB, VectorF32<4, 1> rotB,
VectorF32<3, 1> posC, VectorF32<3, 1> sizeC, VectorF32<4, 1> rotC,
VectorF32<3, 1> posD, VectorF32<3, 1> sizeD, VectorF32<4, 1> rotD
) {
// Conjugate quaternion: negate xyz, keep w. Negate<{true,true,true,false}>
// is constant-folded into a single XOR with a mask vector.
VectorF32<4, 1> invRotA = rotA.template Negate<{{true, true, true, false}}>();
VectorF32<4, 1> invRotB = rotB.template Negate<{{true, true, true, false}}>();
VectorF32<4, 1> invRotC = rotC.template Negate<{{true, true, true, false}}>();
VectorF32<4, 1> invRotD = rotD.template Negate<{{true, true, true, false}}>();
Vector<T, 3, 0> localOrigin = Vector<T, 3, 0>::Rotate(rayOrigin - boxPosition, invRot);
Vector<T, 3, 0> localDir = Vector<T, 3, 0>::Rotate(rayDir, invRot);
VectorF32<3, 1> localOriginA = VectorF32<3, 1>::Rotate(rayOrigin - posA, invRotA);
VectorF32<3, 1> localOriginB = VectorF32<3, 1>::Rotate(rayOrigin - posB, invRotB);
VectorF32<3, 1> localOriginC = VectorF32<3, 1>::Rotate(rayOrigin - posC, invRotC);
VectorF32<3, 1> localOriginD = VectorF32<3, 1>::Rotate(rayOrigin - posD, invRotD);
Vector<T,3,0> halfExtents = boxSize * T(0.5);
VectorF32<3, 1> localDirA = VectorF32<3, 1>::Rotate(rayDir, invRotA);
VectorF32<3, 1> localDirB = VectorF32<3, 1>::Rotate(rayDir, invRotB);
VectorF32<3, 1> localDirC = VectorF32<3, 1>::Rotate(rayDir, invRotC);
VectorF32<3, 1> localDirD = VectorF32<3, 1>::Rotate(rayDir, invRotD);
T tMin = T(0);
T tMax = std::numeric_limits<T>::max();
VectorF32<3, 1> halfA = sizeA * 0.5f;
VectorF32<3, 1> halfB = sizeB * 0.5f;
VectorF32<3, 1> halfC = sizeC * 0.5f;
VectorF32<3, 1> halfD = sizeD * 0.5f;
for (std::uint32_t i = 0; i < 3; ++i)
{
if (std::abs(localDir.v[i]) < std::numeric_limits<T>::epsilon())
{
if (localOrigin.v[i] < -halfExtents.v[i] || localOrigin.v[i] > halfExtents.v[i]) {
return std::numeric_limits<T>::max();
}
}
else
{
T invD = T(1) / localDir.v[i];
T t1 = (-halfExtents.v[i] - localOrigin.v[i]) * invD;
T t2 = ( halfExtents.v[i] - localOrigin.v[i]) * invD;
if (t1 > t2) {
std::swap(t1, t2);
}
tMin = std::max(tMin, t1);
tMax = std::min(tMax, t2);
if (tMin > tMax) {
return std::numeric_limits<T>::max();
}
}
}
return (tMin >= T(0)) ? tMin : tMax;
}
export template<typename T>
std::vector<Vector<T, 3, 0>> getOBBCorners(Vector<T, 3, 0> size, MatrixRowMajor<T, 4, 3, 1> matrix) {
std::vector<Vector<T, 3, 0>> localCorners = {
Vector<T, 3, 0>(-size.x, -size.y, -size.z),
Vector<T, 3, 0>( size.x, -size.y, -size.z),
Vector<T, 3, 0>(-size.x, size.y, -size.z),
Vector<T, 3, 0>( size.x, size.y, -size.z),
Vector<T, 3, 0>(-size.x, -size.y, size.z),
Vector<T, 3, 0>( size.x, -size.y, size.z),
Vector<T, 3, 0>(-size.x, size.y, size.z),
Vector<T, 3, 0>( size.x, size.y, size.z)
std::array<std::array<float, 4>, 4> origLanes{
localOriginA.template Store<float>(),
localOriginB.template Store<float>(),
localOriginC.template Store<float>(),
localOriginD.template Store<float>(),
};
std::array<std::array<float, 4>, 4> dirLanes{
localDirA.template Store<float>(),
localDirB.template Store<float>(),
localDirC.template Store<float>(),
localDirD.template Store<float>(),
};
std::array<std::array<float, 4>, 4> halfLanes{
halfA.template Store<float>(),
halfB.template Store<float>(),
halfC.template Store<float>(),
halfD.template Store<float>(),
};
std::vector<Vector<T, 3, 0>> worldCorners;
for (Vector<T, 3, 0> localCorner : localCorners) {
Vector<T, 3, 0> rotatedCorner = matrix * localCorner;
worldCorners.push_back(rotatedCorner);
}
return worldCorners;
constexpr float eps = std::numeric_limits<float>::epsilon();
constexpr float maxF = std::numeric_limits<float>::max();
alignas(16) std::array<float, 4> out{};
for (std::uint8_t b = 0; b < 4; ++b) {
float tMin = 0.0f;
float tMax = maxF;
bool miss = false;
for (std::uint8_t i = 0; i < 3; ++i) {
float d = dirLanes[b][i];
float o = origLanes[b][i];
float h = halfLanes[b][i];
if (std::abs(d) < eps) {
if (o < -h || o > h) { miss = true; break; }
} else {
float invD = 1.0f / d;
float t1 = (-h - o) * invD;
float t2 = ( h - o) * invD;
if (t1 > t2) std::swap(t1, t2);
tMin = std::max(tMin, t1);
tMax = std::min(tMax, t2);
if (tMin > tMax) { miss = true; break; }
}
}
out[b] = miss ? maxF : (tMin >= 0.0f ? tMin : tMax);
}
return VectorF32<1, 4>(out.data());
}
export template<typename T>
constexpr bool IntersectionTestOrientedBoxOrientedBox(Vector<T, 3, 0> sizeA, MatrixRowMajor<T, 4, 3, 1> boxA, Vector<T, 3, 0> sizeB, MatrixRowMajor<T, 4, 3, 1> boxB) {
std::vector<Vector<T, 3, 0>> axes;
// One sphere against four OBBs. boxMatrix encodes rotation in m[r][0..2]
// and translation in m[r][3].
export inline VectorF32<1, 4> IntersectionTestSphereOrientedBox(
VectorF32<3, 1> sphereCenter, VectorF32<1, 4> radii,
VectorF32<3, 1> sizeA, MatrixRowMajor<float, 4, 3, 1> boxA,
VectorF32<3, 1> sizeB, MatrixRowMajor<float, 4, 3, 1> boxB,
VectorF32<3, 1> sizeC, MatrixRowMajor<float, 4, 3, 1> boxC,
VectorF32<3, 1> sizeD, MatrixRowMajor<float, 4, 3, 1> boxD
) {
auto perBox = [&](MatrixRowMajor<float, 4, 3, 1> const& m,
VectorF32<3, 1> const& size,
VectorF32<3, 1>& xAxis,
VectorF32<3, 1>& yAxis,
VectorF32<3, 1>& zAxis,
VectorF32<3, 1>& delta) {
// Existing semantics: the OBB axes are read from the rows of the
// upper 3x3 block, and the translation column is gathered from the
// w lane of each row.
std::array<float, 4> r0 = m.rows[0].template Store<float>();
std::array<float, 4> r1 = m.rows[1].template Store<float>();
std::array<float, 4> r2 = m.rows[2].template Store<float>();
alignas(16) float xBuf[4] = { r0[0], r0[1], r0[2], 0.0f };
alignas(16) float yBuf[4] = { r1[0], r1[1], r1[2], 0.0f };
alignas(16) float zBuf[4] = { r2[0], r2[1], r2[2], 0.0f };
alignas(16) float oBuf[4] = { r0[3], r1[3], r2[3], 0.0f };
xAxis = VectorF32<3, 1>(xBuf);
yAxis = VectorF32<3, 1>(yBuf);
zAxis = VectorF32<3, 1>(zBuf);
VectorF32<3, 1> origin(oBuf);
delta = sphereCenter - origin;
(void)size;
};
std::vector<Vector<T, 3, 0>> box1Corners = getOBBCorners(sizeA, boxA);
std::vector<Vector<T, 3, 0>> box2Corners = getOBBCorners(sizeB, boxB);
VectorF32<3, 1> xA, yA, zA, dA;
VectorF32<3, 1> xB, yB, zB, dB;
VectorF32<3, 1> xC, yC, zC, dC;
VectorF32<3, 1> xD, yD, zD, dD;
perBox(boxA, sizeA, xA, yA, zA, dA);
perBox(boxB, sizeB, xB, yB, zB, dB);
perBox(boxC, sizeC, xC, yC, zC, dC);
perBox(boxD, sizeD, xD, yD, zD, dD);
axes.push_back(Vector<T, 3, 0>(boxA.m[0][0], boxA.m[0][1], boxA.m[0][2]));
axes.push_back(Vector<T, 3, 0>(boxA.m[1][0], boxA.m[1][1], boxA.m[1][2]));
axes.push_back(Vector<T, 3, 0>(boxA.m[2][0], boxA.m[2][1], boxA.m[2][2]));
axes.push_back(Vector<T, 3, 0>(boxB.m[0][0], boxB.m[0][1], boxB.m[0][2]));
axes.push_back(Vector<T, 3, 0>(boxB.m[1][0], boxB.m[1][1], boxB.m[1][2]));
axes.push_back(Vector<T, 3, 0>(boxB.m[2][0], boxB.m[2][1], boxB.m[2][2]));
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
axes.push_back(Vector<T, 3, 0>::Normalize(Vector<T, 3, 0>::Cross(Vector<T, 3, 0>(boxA.m[i][0], boxA.m[i][1], boxA.m[i][2]), Vector<T, 3, 0>(boxB.m[j][0], boxB.m[j][1], boxB.m[j][2]))));
// Local sphere center per box: project delta onto each box axis. We
// produce {lx, ly, lz, lx, ly, lz, lx, ly, lz, lx, ly, lz} as three
// packed 4-wide Dot results (one Dot per axis).
VectorF32<1, 4> locX = VectorF32<3, 1>::Dot(
dA, xA, dB, xB, dC, xC, dD, xD);
VectorF32<1, 4> locY = VectorF32<3, 1>::Dot(
dA, yA, dB, yB, dC, yC, dD, yD);
VectorF32<1, 4> locZ = VectorF32<3, 1>::Dot(
dA, zA, dB, zB, dC, zC, dD, zD);
std::array<float, 4> lxArr = locX.template Store<float>();
std::array<float, 4> lyArr = locY.template Store<float>();
std::array<float, 4> lzArr = locZ.template Store<float>();
std::array<float, 4> rArr = radii.template Store<float>();
std::array<std::array<float, 4>, 4> sizeLanes{
sizeA.template Store<float>(),
sizeB.template Store<float>(),
sizeC.template Store<float>(),
sizeD.template Store<float>(),
};
alignas(16) std::array<float, 4> out{};
for (std::uint8_t i = 0; i < 4; ++i) {
float lx = lxArr[i], ly = lyArr[i], lz = lzArr[i];
float sx = sizeLanes[i][0], sy = sizeLanes[i][1], sz = sizeLanes[i][2];
float cx = std::clamp(lx, -sx, sx);
float cy = std::clamp(ly, -sy, sy);
float cz = std::clamp(lz, -sz, sz);
float dx = lx - cx, dy = ly - cy, dz = lz - cz;
float distSq = dx * dx + dy * dy + dz * dz;
float r = rArr[i];
// Returns 0.0 on hit, max on miss - keeps a consistent
// "t-like" output signature with the other intersection tests.
out[i] = (distSq <= r * r) ? 0.0f : std::numeric_limits<float>::max();
}
return VectorF32<1, 4>(out.data());
}
// Eight local corners of a unit OBB transformed by `matrix`. Uses one
// batched 4-pair Dot per result row (x, y, z), reproducing the corners in
// two groups of four.
export inline std::array<VectorF32<3, 1>, 8> GetOBBCorners(
VectorF32<3, 1> size, MatrixRowMajor<float, 4, 3, 1> matrix
) {
std::array<float, 4> sz = size.template Store<float>();
const float sx = sz[0], sy = sz[1], sz_ = sz[2];
VectorF32<4, 1> mx = matrix.rows[0];
VectorF32<4, 1> my = matrix.rows[1];
VectorF32<4, 1> mz = matrix.rows[2];
// Eight homogeneous corner vectors with w=1 so the translation column
// of `matrix` participates in the dot product.
alignas(16) float c0[4] = { -sx, -sy, -sz_, 1.0f };
alignas(16) float c1[4] = { sx, -sy, -sz_, 1.0f };
alignas(16) float c2[4] = { -sx, sy, -sz_, 1.0f };
alignas(16) float c3[4] = { sx, sy, -sz_, 1.0f };
alignas(16) float c4[4] = { -sx, -sy, sz_, 1.0f };
alignas(16) float c5[4] = { sx, -sy, sz_, 1.0f };
alignas(16) float c6[4] = { -sx, sy, sz_, 1.0f };
alignas(16) float c7[4] = { sx, sy, sz_, 1.0f };
VectorF32<4, 1> v0(c0), v1(c1), v2(c2), v3(c3);
VectorF32<4, 1> v4(c4), v5(c5), v6(c6), v7(c7);
// First four corners (0..3): batch x, y, z dots.
VectorF32<1, 4> xLo = VectorF32<4, 1>::Dot(mx, v0, mx, v1, mx, v2, mx, v3);
VectorF32<1, 4> yLo = VectorF32<4, 1>::Dot(my, v0, my, v1, my, v2, my, v3);
VectorF32<1, 4> zLo = VectorF32<4, 1>::Dot(mz, v0, mz, v1, mz, v2, mz, v3);
// Second four corners (4..7).
VectorF32<1, 4> xHi = VectorF32<4, 1>::Dot(mx, v4, mx, v5, mx, v6, mx, v7);
VectorF32<1, 4> yHi = VectorF32<4, 1>::Dot(my, v4, my, v5, my, v6, my, v7);
VectorF32<1, 4> zHi = VectorF32<4, 1>::Dot(mz, v4, mz, v5, mz, v6, mz, v7);
std::array<float, 4> xLoA = xLo.template Store<float>();
std::array<float, 4> yLoA = yLo.template Store<float>();
std::array<float, 4> zLoA = zLo.template Store<float>();
std::array<float, 4> xHiA = xHi.template Store<float>();
std::array<float, 4> yHiA = yHi.template Store<float>();
std::array<float, 4> zHiA = zHi.template Store<float>();
std::array<VectorF32<3, 1>, 8> result;
for (std::uint8_t i = 0; i < 4; ++i) {
alignas(16) float buf[4] = { xLoA[i], yLoA[i], zLoA[i], 0.0f };
result[i] = VectorF32<3, 1>(buf);
}
for (std::uint8_t i = 0; i < 4; ++i) {
alignas(16) float buf[4] = { xHiA[i], yHiA[i], zHiA[i], 0.0f };
result[4 + i] = VectorF32<3, 1>(buf);
}
return result;
}
// SAT against fifteen separating axes (3 box-A, 3 box-B, 9 cross products).
// We compute every corner projection with batched 4-pair Dots: each axis
// projects four corners per call, two calls per axis covers the 8 corners.
export inline bool IntersectionTestOrientedBoxOrientedBox(
VectorF32<3, 1> sizeA, MatrixRowMajor<float, 4, 3, 1> boxA,
VectorF32<3, 1> sizeB, MatrixRowMajor<float, 4, 3, 1> boxB
) {
std::array<VectorF32<3, 1>, 8> cornersA = GetOBBCorners(sizeA, boxA);
std::array<VectorF32<3, 1>, 8> cornersB = GetOBBCorners(sizeB, boxB);
// Axes are the upper-3 lanes of each matrix row (same convention as
// SphereOrientedBox). ExtractLo<3> just retypes the SIMD register; the
// 4th lane is ignored by the Len=3 ops below.
std::array<VectorF32<3, 1>, 3> axesA = {
boxA.rows[0].template ExtractLo<3>(),
boxA.rows[1].template ExtractLo<3>(),
boxA.rows[2].template ExtractLo<3>(),
};
std::array<VectorF32<3, 1>, 3> axesB = {
boxB.rows[0].template ExtractLo<3>(),
boxB.rows[1].template ExtractLo<3>(),
boxB.rows[2].template ExtractLo<3>(),
};
std::array<VectorF32<3, 1>, 15> axes{};
axes[0] = axesA[0]; axes[1] = axesA[1]; axes[2] = axesA[2];
axes[3] = axesB[0]; axes[4] = axesB[1]; axes[5] = axesB[2];
// Normalize all nine cross axes together with a single batched
// Normalize call (Packing=3 not in the API, so two calls of four +
// one of one would be needed; for now just normalize in two batches
// of four and the trailing one inline).
std::array<VectorF32<3, 1>, 9> crossAxes{};
std::uint8_t k = 0;
for (std::uint8_t i = 0; i < 3; ++i) {
for (std::uint8_t j = 0; j < 3; ++j) {
crossAxes[k++] = VectorF32<3, 1>::Cross(axesA[i], axesB[j]);
}
}
auto norm0 = VectorF32<3, 1>::Normalize(crossAxes[0], crossAxes[1], crossAxes[2], crossAxes[3]);
auto norm1 = VectorF32<3, 1>::Normalize(crossAxes[4], crossAxes[5], crossAxes[6], crossAxes[7]);
auto norm2 = VectorF32<3, 1>::Normalize(crossAxes[8], crossAxes[8], crossAxes[8], crossAxes[8]);
axes[6] = std::get<0>(norm0);
axes[7] = std::get<1>(norm0);
axes[8] = std::get<2>(norm0);
axes[9] = std::get<3>(norm0);
axes[10] = std::get<0>(norm1);
axes[11] = std::get<1>(norm1);
axes[12] = std::get<2>(norm1);
axes[13] = std::get<3>(norm1);
axes[14] = std::get<0>(norm2);
for (Vector<T, 3, 0> axis : axes) {
T min1 = Vector<T, 3, 0>::Dot(box1Corners[0], axis);
T max1 = min1;
for (Vector<T, 3, 0> corner : box1Corners) {
T projection = Vector<T, 3, 0>::Dot(corner, axis);
min1 = std::min(min1, projection);
max1 = std::max(max1, projection);
for (std::uint8_t axisIdx = 0; axisIdx < 15; ++axisIdx) {
VectorF32<3, 1> axis = axes[axisIdx];
// Project all 8 corners of each box onto `axis` using two batched
// 4-pair Dot calls (lo and hi corners).
VectorF32<1, 4> projA_lo = VectorF32<3, 1>::Dot(
cornersA[0], axis, cornersA[1], axis,
cornersA[2], axis, cornersA[3], axis);
VectorF32<1, 4> projA_hi = VectorF32<3, 1>::Dot(
cornersA[4], axis, cornersA[5], axis,
cornersA[6], axis, cornersA[7], axis);
VectorF32<1, 4> projB_lo = VectorF32<3, 1>::Dot(
cornersB[0], axis, cornersB[1], axis,
cornersB[2], axis, cornersB[3], axis);
VectorF32<1, 4> projB_hi = VectorF32<3, 1>::Dot(
cornersB[4], axis, cornersB[5], axis,
cornersB[6], axis, cornersB[7], axis);
std::array<float, 4> aLo = projA_lo.template Store<float>();
std::array<float, 4> aHi = projA_hi.template Store<float>();
std::array<float, 4> bLo = projB_lo.template Store<float>();
std::array<float, 4> bHi = projB_hi.template Store<float>();
float minA = aLo[0], maxA = aLo[0];
for (std::uint8_t i = 1; i < 4; ++i) {
minA = std::min(minA, aLo[i]);
maxA = std::max(maxA, aLo[i]);
}
for (std::uint8_t i = 0; i < 4; ++i) {
minA = std::min(minA, aHi[i]);
maxA = std::max(maxA, aHi[i]);
}
float minB = bLo[0], maxB = bLo[0];
for (std::uint8_t i = 1; i < 4; ++i) {
minB = std::min(minB, bLo[i]);
maxB = std::max(maxB, bLo[i]);
}
for (std::uint8_t i = 0; i < 4; ++i) {
minB = std::min(minB, bHi[i]);
maxB = std::max(maxB, bHi[i]);
}
T min2 = Vector<T, 3, 0>::Dot(box2Corners[0], axis);
T max2 = min2;
for (Vector<T, 3, 0> corner : box2Corners) {
T projection = Vector<T, 3, 0>::Dot(corner, axis);
min2 = std::min(min2, projection);
max2 = std::max(max2, projection);
}
if (max1 < min2 || max2 < min1) {
return false;
}
if (maxA < minB || maxB < minA) return false;
}
return true;
}
export template<typename T>
constexpr bool IntersectionTestSphereOrientedBox(Vector<T, 3, 0> sphereCenter, T sphereRadius, Vector<T, 3, 0> boxSize, MatrixRowMajor<T, 4, 3, 1> boxMatrix) {
// Extract the OBB's axes (columns of the rotation matrix)
Vector<T, 3, 0> xAxis(boxMatrix.m[0][0], boxMatrix.m[0][1], boxMatrix.m[0][2]);
Vector<T, 3, 0> yAxis(boxMatrix.m[1][0], boxMatrix.m[1][1], boxMatrix.m[1][2]);
Vector<T, 3, 0> zAxis(boxMatrix.m[2][0], boxMatrix.m[2][1], boxMatrix.m[2][2]);
// Translate the sphere center into the OBB's local space
Vector<T, 3, 0> localCenter = Vector<T, 3, 0>(
Vector<T, 3, 0>::Dot(sphereCenter - Vector<T, 3, 0>(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), xAxis),
Vector<T, 3, 0>::Dot(sphereCenter - Vector<T, 3, 0>(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), yAxis),
Vector<T, 3, 0>::Dot(sphereCenter - Vector<T, 3, 0>(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), zAxis)
);
// Clamp the local center to the OBB's extents
Vector<T, 3, 0> closestPoint = Vector<T, 3, 0>(
std::max(-boxSize.x, std::min(localCenter.x, boxSize.x)),
std::max(-boxSize.y, std::min(localCenter.y, boxSize.y)),
std::max(-boxSize.z, std::min(localCenter.z, boxSize.z))
);
// Calculate the distance between the closest point and the local center
Vector<T, 3, 0> delta = localCenter - closestPoint;
T distanceSquared = Vector<T, 3, 0>::Dot(delta, delta);
// Check if the distance is less than or equal to the sphere's radius squared
return distanceSquared <= (sphereRadius * sphereRadius);
}
}
}

View file

@ -1,433 +1,330 @@
/*
Crafter®.Math
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
export module Crafter.Math:MatrixRowMajor;
import :Basic;
import :Vector;
import std;
namespace Crafter {
export template <typename T, std::uint32_t CollumSize, std::uint32_t RowSize, std::uint32_t Repeats>
class MatrixRowMajor {
public:
T m[RowSize][CollumSize*Repeats];
MatrixRowMajor() = default;
MatrixRowMajor(
float x0, float y0, float z0, float w0,
float x1, float y1, float z1, float w1,
float x2, float y2, float z2, float w2,
float x3, float y3, float z3, float w3
) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
m[0][0] = x0;
m[0][1] = y0;
m[0][2] = z0;
m[0][3] = w0;
m[1][0] = x1;
m[1][1] = y1;
m[1][2] = z1;
m[1][3] = w1;
m[2][0] = x2;
m[2][1] = y2;
m[2][2] = z2;
m[2][3] = w2;
m[3][0] = x3;
m[3][1] = y3;
m[3][2] = z3;
m[3][3] = w3;
}
MatrixRowMajor(
float x0, float y0, float z0, float w0,
float x1, float y1, float z1, float w1,
float x2, float y2, float z2, float w2
) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
m[0][0] = x0;
m[0][1] = y0;
m[0][2] = z0;
m[0][3] = w0;
m[1][0] = x1;
m[1][1] = y1;
m[1][2] = z1;
m[1][3] = w1;
m[2][0] = x2;
m[2][1] = y2;
m[2][2] = z2;
m[2][3] = w2;
}
template <std::uint32_t VAligment>
Vector<T, 3, VAligment> operator*(Vector<T, 3, VAligment> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return Vector<T, 3, VAligment>(
b.x * m[0][0] + b.y * m[0][1] + b.z * m[0][2] + m[0][3],
b.x * m[1][0] + b.y * m[1][1] + b.z * m[1][2] + m[1][3],
b.x * m[2][0] + b.y * m[2][1] + b.z * m[2][2] + m[2][3]
);
}
MatrixRowMajor<T, CollumSize, RowSize, Repeats> operator*(MatrixRowMajor<T, CollumSize, RowSize, Repeats> b) const requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
MatrixRowMajor<T, CollumSize, RowSize, Repeats> result;
result.m[0][0] = b.m[0][0] * m[0][0] + b.m[0][1] * m[1][0] + b.m[0][2] * m[2][0] + b.m[0][3] * m[3][0];
result.m[1][0] = b.m[1][0] * m[0][0] + b.m[1][1] * m[1][0] + b.m[1][2] * m[2][0] + b.m[1][3] * m[3][0];
result.m[2][0] = b.m[2][0] * m[0][0] + b.m[2][1] * m[1][0] + b.m[2][2] * m[2][0] + b.m[2][3] * m[3][0];
result.m[3][0] = b.m[3][0] * m[0][0] + b.m[3][1] * m[1][0] + b.m[3][2] * m[2][0] + b.m[3][3] * m[3][0];
result.m[0][1] = b.m[0][0] * m[0][1] + b.m[0][1] * m[1][1] + b.m[0][2] * m[2][1] + b.m[0][3] * m[3][1];
result.m[1][1] = b.m[1][0] * m[0][1] + b.m[1][1] * m[1][1] + b.m[1][2] * m[2][1] + b.m[1][3] * m[3][1];
result.m[2][1] = b.m[2][0] * m[0][1] + b.m[2][1] * m[1][1] + b.m[2][2] * m[2][1] + b.m[2][3] * m[3][1];
result.m[3][1] = b.m[3][0] * m[0][1] + b.m[3][1] * m[1][1] + b.m[3][2] * m[2][1] + b.m[3][3] * m[3][1];
result.m[0][2] = b.m[0][0] * m[0][2] + b.m[0][1] * m[1][2] + b.m[0][2] * m[2][2] + b.m[0][3] * m[3][2];
result.m[1][2] = b.m[1][0] * m[0][2] + b.m[1][1] * m[1][2] + b.m[1][2] * m[2][2] + b.m[1][3] * m[3][2];
result.m[2][2] = b.m[2][0] * m[0][2] + b.m[2][1] * m[1][2] + b.m[2][2] * m[2][2] + b.m[2][3] * m[3][2];
result.m[3][2] = b.m[3][0] * m[0][2] + b.m[3][1] * m[1][2] + b.m[3][2] * m[2][2] + b.m[3][3] * m[3][2];
result.m[0][3] = b.m[0][0] * m[0][3] + b.m[0][1] * m[1][3] + b.m[0][2] * m[2][3] + b.m[0][3] * m[3][3];
result.m[1][3] = b.m[1][0] * m[0][3] + b.m[1][1] * m[1][3] + b.m[1][2] * m[2][3] + b.m[1][3] * m[3][3];
result.m[2][3] = b.m[2][0] * m[0][3] + b.m[2][1] * m[1][3] + b.m[2][2] * m[2][3] + b.m[2][3] * m[3][3];
result.m[3][3] = b.m[3][0] * m[0][3] + b.m[3][1] * m[1][3] + b.m[3][2] * m[2][3] + b.m[3][3] * m[3][3];
return result;
}
MatrixRowMajor<T, CollumSize, RowSize, Repeats> operator*(MatrixRowMajor<T, CollumSize, RowSize, Repeats> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
MatrixRowMajor<T, CollumSize, RowSize, Repeats> result;
// Column 0
result.m[0][0] = b.m[0][0]*m[0][0] + b.m[0][1]*m[1][0] + b.m[0][2]*m[2][0];
result.m[1][0] = b.m[1][0]*m[0][0] + b.m[1][1]*m[1][0] + b.m[1][2]*m[2][0];
result.m[2][0] = b.m[2][0]*m[0][0] + b.m[2][1]*m[1][0] + b.m[2][2]*m[2][0];
// Column 1
result.m[0][1] = b.m[0][0]*m[0][1] + b.m[0][1]*m[1][1] + b.m[0][2]*m[2][1];
result.m[1][1] = b.m[1][0]*m[0][1] + b.m[1][1]*m[1][1] + b.m[1][2]*m[2][1];
result.m[2][1] = b.m[2][0]*m[0][1] + b.m[2][1]*m[1][1] + b.m[2][2]*m[2][1];
// Column 2
result.m[0][2] = b.m[0][0]*m[0][2] + b.m[0][1]*m[1][2] + b.m[0][2]*m[2][2];
result.m[1][2] = b.m[1][0]*m[0][2] + b.m[1][1]*m[1][2] + b.m[1][2]*m[2][2];
result.m[2][2] = b.m[2][0]*m[0][2] + b.m[2][1]*m[1][2] + b.m[2][2]*m[2][2];
// Translation column
result.m[0][3] = b.m[0][0]*m[0][3] + b.m[0][1]*m[1][3] + b.m[0][2]*m[2][3] + b.m[0][3];
result.m[1][3] = b.m[1][0]*m[0][3] + b.m[1][1]*m[1][3] + b.m[1][2]*m[2][3] + b.m[1][3];
result.m[2][3] = b.m[2][0]*m[0][3] + b.m[2][1]*m[1][3] + b.m[2][2]*m[2][3] + b.m[2][3];
return result;
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Identity() requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0
);
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>(
x, 0, 0, 0,
0, y, 0, 0,
0, 0, z, 0,
0, 0, 0, 1
);
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>(
x, 0, 0, 0,
0, y, 0, 0,
0, 0, z, 0
);
}
template <std::uint32_t VAligment>
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Scaling(Vector<float, 3, VAligment> vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return Scaling(vector.x, vector.y, vector.z);
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
x, y, z, 1
);
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>(
1, 0, 0, x,
0, 1, 0, y,
0, 0, 1, z
);
}
template <std::uint32_t VAligment>
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Translation(Vector<T, 3, VAligment> vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return Translation(vector.x, vector.y, vector.z);
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
float cp = std::cosf(Pitch);
float sp = std::sinf(Pitch);
float cy = std::cosf(Yaw);
float sy = std::sinf(Yaw);
float cr = std::cosf(Roll);
float sr = std::sinf(Roll);
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
M.m[0][0] = cr * cy + sr * sp * sy;
M.m[0][1] = sr * cp;
M.m[0][2] = sr * sp * cy - cr * sy;
M.m[0][3] = 0.0f;
M.m[1][0] = cr * sp * sy - sr * cy;
M.m[1][1] = cr * cp;
M.m[1][2] = sr * sy + cr * sp * cy;
M.m[1][3] = 0.0f;
M.m[2][0] = cp * sy;
M.m[2][1] = -sp;
M.m[2][2] = cp * cy;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
}
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
float cp = std::cosf(Pitch);
float sp = std::sinf(Pitch);
float cy = std::cosf(Yaw);
float sy = std::sinf(Yaw);
float cr = std::cosf(Roll);
float sr = std::sinf(Roll);
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
M.m[0][0] = cr * cy + sr * sp * sy;
M.m[0][1] = sr * cp;
M.m[0][2] = sr * sp * cy - cr * sy;
M.m[0][3] = 0.0f;
M.m[1][0] = cr * sp * sy - sr * cy;
M.m[1][1] = cr * cp;
M.m[1][2] = sr * sy + cr * sp * cy;
M.m[1][3] = 0.0f;
M.m[2][0] = cp * sy;
M.m[2][1] = -sp;
M.m[2][2] = cp * cy;
M.m[2][3] = 0.0f;
return M;
}
template <std::uint32_t VAligment>
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> LookAt(Vector<T, 3, VAligment> eyePosition, Vector<T, 3, VAligment> focusPosition, Vector<T, 3, VAligment> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
Vector<T, 3, VAligment> negEyeDirection = eyePosition - focusPosition;
return LookTo(eyePosition, negEyeDirection, upDirection);
return M;
}
template <std::uint32_t VAligment>
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> LookTo(Vector<T, 3, VAligment> eyePosition, Vector<T, 3, VAligment> eyeDirection, Vector<T, 3, VAligment> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
Vector<T, 3, 3> R2 = eyeDirection.Normalize();
Vector<T, 3, 3> R0 = upDirection.Cross(R2);
R0 = R0.Normalize();
Vector<T, 3, 3> R1 = R2.Cross(R0);
Vector<T, 3, 3> NegEyePosition = -eyePosition;
float D0 = R0.Dot(NegEyePosition);
float D1 = R1.Dot(NegEyePosition);
float D2 = R2.Dot(NegEyePosition);
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
M.m[0][0] = R0.v[0];
M.m[1][0] = R0.v[1];
M.m[2][0] = R0.v[2];
M.m[3][0] = D0;
M.m[0][1] = R1.v[0];
M.m[1][1] = R1.v[1];
M.m[2][1] = R1.v[2];
M.m[3][1] = D1;
M.m[0][2] = R2.v[0];
M.m[1][2] = R2.v[1];
M.m[2][2] = R2.v[2];
M.m[3][2] = D2;
M.m[0][3] = 0;
M.m[1][3] = 0;
M.m[2][3] = 0;
M.m[3][3] = 1;
return M;
}
template <std::uint32_t VAligment>
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Rotation(Vector<T, 3, VAligment> vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return Rotation(vector.x, vector.y, vector.z);
}
template <std::uint32_t VAligment>
Vector<T, 3, VAligment> TransformNormal(Vector<T, 3, VAligment> V) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
Vector<T, 3, 3> Z = Vector<T, 3, 3>(V.z, V.z, V.z);
Vector<T, 3, 3> Y = Vector<T, 3, 3>(V.y, V.y, V.y);
Vector<T, 3, 3> X = Vector<T, 3, 3>(V.x, V.x, V.x);
Vector<T, 3, VAligment> Result = Z * Vector<T, 3, VAligment>(m[2][0], m[2][1], m[2][2]);
Result = Y * Vector<T, 3, VAligment>(m[1][0], m[1][1], m[1][2]) + Result;
Result = X * Vector<T, 3, VAligment>(m[0][0], m[0][1], m[0][2]) + Result;
return Result;
}
// MatrixRowMajor<T, CollumSize, RowSize, Repeats> Inverse() requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
// Vector<float, 4> V0[4], V1[4];
// V0[0] = Vector<float, 4>(m[0][2], m[0][2], m[1][2], m[1][2]);
// V1[0] = Vector<float, 4>(m[2][3], m[3][3], m[2][3], m[3][3]);
// V0[1] = Vector<float, 4>(m[0][0], m[0][0], m[1][0], m[1][0]);
// V1[1] = Vector<float, 4>(m[2][1], m[3][1], m[2][1], m[3][1]);
// V0[2] = Vector<float, 4>(m[0][2], m[2][2], m[0][0], m[2][0]);
// V1[2] = Vector<float, 4>(m[1][3], m[3][3], m[1][1], m[3][1]);
// Vector<float, 4> D0 = V0[0] * V1[0];
// Vector<float, 4> D1 = V0[1] * V1[1];
// Vector<float, 4> D2 = V0[2] * V1[2];
// V0[0] = Vector<float, 4>(m[2][2], m[3][2], m[2][2], m[3][2]);
// V1[0] = Vector<float, 4>(m[0][3], m[0][3], m[1][3], m[1][3]);
// V0[1] = Vector<float, 4>(m[2][0], m[3][0], m[2][0], m[3][0]);
// V1[1] = Vector<float, 4>(m[0][1], m[0][1], m[1][1], m[1][1]);
// V0[2] = Vector<float, 4>(m[1][2], m[3][2], m[1][0], m[3][0]);
// V1[2] = Vector<float, 4>(m[0][3], m[2][3], m[0][1], m[2][1]);
// D0 = Vector<float, 4>::NegativeMultiplySubtract(V0[0], V1[0], D0);
// D1 = Vector<float, 4>::NegativeMultiplySubtract(V0[1], V1[1], D1);
// D2 = Vector<float, 4>::NegativeMultiplySubtract(V0[2], V1[2], D2);
// V0[0] = Vector<float, 4>(m[1][1], m[2][1], m[0][1], m[1][1]);
// V1[0] = Vector<float, 4>(D2.v[1], D0.v[1], D0.v[3], D0.v[0]);
// V0[1] = Vector<float, 4>(m[2][0], m[0][0], m[1][0], m[0][0]);
// V1[1] = Vector<float, 4>(D0.v[3], D2.v[1], D0.v[1], D0.v[2]);
// V0[2] = Vector<float, 4>(m[1][3], m[2][3], m[0][3], m[1][3]);
// V1[2] = Vector<float, 4>(D2.v[3], D1.v[1], D1.v[3], D1.v[0]);
// V0[3] = Vector<float, 4>(m[2][2], m[0][2], m[1][2], m[0][2]);
// V1[3] = Vector<float, 4>(D1.v[3], D2.v[3], D1.v[1], D1.v[2]);
// Vector<float, 4> C0 = V0[0] * V1[0];
// Vector<float, 4> C2 = V0[1] * V1[1];
// Vector<float, 4> C4 = V0[2] * V1[2];
// Vector<float, 4> C6 = V0[3] * V1[3];
// V0[0] = Vector<float, 4>(m[2][1], m[3][1], m[1][1], m[2][1]);
// V1[0] = Vector<float, 4>(D0.v[3], D0.v[0], D0.v[1], D2.v[0]);
// V0[1] = Vector<float, 4>(m[3][0], m[2][0], m[3][0], m[1][0]);
// V1[1] = Vector<float, 4>(D0.v[2], D0.v[1], D2.v[0], D0.v[0]);
// V0[2] = Vector<float, 4>(m[2][3], m[3][3], m[1][3], m[2][3]);
// V1[2] = Vector<float, 4>(D1.v[3], D1.v[0], D1.v[1], D2.v[2]);
// V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
// V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
// C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
// C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
// C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
// C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
// V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
// V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
// V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
// V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
// V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
// V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
// V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
// V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
// XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
// C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
// XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
// C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
// XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
// C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
// XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
// C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
// XMMATRIX R;
// R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
// R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
// R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
// R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
// XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
// XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
// XMMATRIX Result;
// Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
// Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
// Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
// Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
// return Result;
// }
};
}
template <>
struct std::formatter<Crafter::MatrixRowMajor<float, 4, 4, 1>> : std::formatter<std::string> {
auto format(const Crafter::MatrixRowMajor<float, 4, 4, 1>& obj, format_context& ctx) const {
return std::formatter<std::string>::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
obj.m[0][0], obj.m[0][1], obj.m[0][2], obj.m[0][3],
obj.m[1][0], obj.m[1][1], obj.m[1][2], obj.m[1][3],
obj.m[2][0], obj.m[2][1], obj.m[2][2], obj.m[2][3],
obj.m[3][0], obj.m[3][1], obj.m[3][2], obj.m[3][3]
), ctx);
}
};
template <>
struct std::formatter<Crafter::MatrixRowMajor<float, 4, 3, 1>> : std::formatter<std::string> {
auto format(const Crafter::MatrixRowMajor<float, 4, 3, 1>& obj, format_context& ctx) const {
return std::formatter<std::string>::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
obj.m[0][0], obj.m[0][1], obj.m[0][2], obj.m[0][3],
obj.m[1][0], obj.m[1][1], obj.m[1][2], obj.m[1][3],
obj.m[2][0], obj.m[2][1], obj.m[2][2], obj.m[2][3]
), ctx);
}
};
/*
Crafter®.Math
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
export module Crafter.Math:MatrixRowMajor;
import :Basic;
import :VectorF32;
import std;
namespace Crafter {
// Row-major matrix whose rows live in optimized SIMD vectors. All
// multiplications are expressed as broadcast + fused multiply-add against
// these row vectors so the heavy work stays in __m128/__m256/__m512 land.
//
// CollumSize is the column count; only CollumSize == 4 is implemented (the
// matrix gets one SIMD row per row). Repeats is reserved for future
// SoA-style batching.
export template <typename T, std::uint32_t CollumSize, std::uint32_t RowSize, std::uint32_t Repeats>
class MatrixRowMajor {
public:
// Rows are exposed publicly so users can compose with VectorF32 ops
// directly without going through an accessor. Each row is a single
// SIMD vector covering all columns.
VectorF32<static_cast<std::uint8_t>(CollumSize), 1> rows[RowSize];
MatrixRowMajor() = default;
MatrixRowMajor(
float x0, float y0, float z0, float w0,
float x1, float y1, float z1, float w1,
float x2, float y2, float z2, float w2,
float x3, float y3, float z3, float w3
) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
alignas(16) float r0[4] = { x0, y0, z0, w0 };
alignas(16) float r1[4] = { x1, y1, z1, w1 };
alignas(16) float r2[4] = { x2, y2, z2, w2 };
alignas(16) float r3[4] = { x3, y3, z3, w3 };
rows[0] = VectorF32<4, 1>(r0);
rows[1] = VectorF32<4, 1>(r1);
rows[2] = VectorF32<4, 1>(r2);
rows[3] = VectorF32<4, 1>(r3);
}
MatrixRowMajor(
float x0, float y0, float z0, float w0,
float x1, float y1, float z1, float w1,
float x2, float y2, float z2, float w2
) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
alignas(16) float r0[4] = { x0, y0, z0, w0 };
alignas(16) float r1[4] = { x1, y1, z1, w1 };
alignas(16) float r2[4] = { x2, y2, z2, w2 };
rows[0] = VectorF32<4, 1>(r0);
rows[1] = VectorF32<4, 1>(r1);
rows[2] = VectorF32<4, 1>(r2);
}
// Flatten to RowSize*CollumSize contiguous floats (row-major). Replaces
// the old `m[i][j]` raw array access for callers that need a packed
// float buffer (e.g. GPU upload via memcpy).
constexpr void Store(float* dst) const requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
for (std::uint32_t i = 0; i < RowSize; ++i) {
rows[i].Store(dst + i * 4);
}
}
constexpr std::array<float, RowSize * 4> Store() const requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, RowSize * 4> out{};
Store(out.data());
return out;
}
// Affine transform: extend `b` with implicit w=1 (translation) and dot
// each row against it. Three row-dots packed into one batched 4-pair
// Dot call (lane 3 of the result is the duplicated row 0 and gets
// discarded).
VectorF32<3, 1> operator*(VectorF32<3, 1> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> bArr = b.template Store<float>();
alignas(16) float bhBuf[4] = { bArr[0], bArr[1], bArr[2], 1.0f };
VectorF32<4, 1> bh(bhBuf);
VectorF32<1, 4> dots = VectorF32<4, 1>::Dot(
rows[0], bh, rows[1], bh, rows[2], bh, rows[0], bh);
std::array<float, 4> dotsArr = dots.template Store<float>();
alignas(16) float outBuf[4] = { dotsArr[0], dotsArr[1], dotsArr[2], 0.0f };
return VectorF32<3, 1>(outBuf);
}
// Linear transform (no translation): same as the affine version but
// with bh.w = 0 so the translation column does not contribute. Useful
// for direction vectors and normals.
VectorF32<3, 1> TransformNormal(VectorF32<3, 1> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> bArr = b.template Store<float>();
alignas(16) float bhBuf[4] = { bArr[0], bArr[1], bArr[2], 0.0f };
VectorF32<4, 1> bh(bhBuf);
VectorF32<1, 4> dots = VectorF32<4, 1>::Dot(
rows[0], bh, rows[1], bh, rows[2], bh, rows[0], bh);
std::array<float, 4> dotsArr = dots.template Store<float>();
alignas(16) float outBuf[4] = { dotsArr[0], dotsArr[1], dotsArr[2], 0.0f };
return VectorF32<3, 1>(outBuf);
}
// 4×4 matrix product via broadcast + FMA. Each result row is
// b[i][0]·rows[0] + b[i][1]·rows[1] + b[i][2]·rows[2] + b[i][3]·rows[3]
// produced with four shuffle-broadcasts and three fused multiply-adds.
MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
MatrixRowMajor result;
for (std::uint32_t i = 0; i < 4; ++i) {
VectorF32<4, 1> bi = b.rows[i];
VectorF32<4, 1> bx = bi.template Shuffle<{{0, 0, 0, 0}}>();
VectorF32<4, 1> by = bi.template Shuffle<{{1, 1, 1, 1}}>();
VectorF32<4, 1> bz = bi.template Shuffle<{{2, 2, 2, 2}}>();
VectorF32<4, 1> bw = bi.template Shuffle<{{3, 3, 3, 3}}>();
VectorF32<4, 1> row = bx * rows[0];
row = VectorF32<4, 1>::MulitplyAdd(by, rows[1], row);
row = VectorF32<4, 1>::MulitplyAdd(bz, rows[2], row);
row = VectorF32<4, 1>::MulitplyAdd(bw, rows[3], row);
result.rows[i] = row;
}
return result;
}
// 4×3 affine product. Same broadcast + FMA pattern, but the implicit
// 4th row of both matrices is [0, 0, 0, 1] so the b.w · row3 term
// contributes only to the translation slot.
MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
alignas(16) float wRowBuf[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
VectorF32<4, 1> wRow(wRowBuf);
MatrixRowMajor result;
for (std::uint32_t i = 0; i < 3; ++i) {
VectorF32<4, 1> bi = b.rows[i];
VectorF32<4, 1> bx = bi.template Shuffle<{{0, 0, 0, 0}}>();
VectorF32<4, 1> by = bi.template Shuffle<{{1, 1, 1, 1}}>();
VectorF32<4, 1> bz = bi.template Shuffle<{{2, 2, 2, 2}}>();
VectorF32<4, 1> bw = bi.template Shuffle<{{3, 3, 3, 3}}>();
VectorF32<4, 1> row = bx * rows[0];
row = VectorF32<4, 1>::MulitplyAdd(by, rows[1], row);
row = VectorF32<4, 1>::MulitplyAdd(bz, rows[2], row);
row = VectorF32<4, 1>::MulitplyAdd(bw, wRow, row);
result.rows[i] = row;
}
return result;
}
static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0
);
}
static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1
);
}
static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
x, 0, 0, 0,
0, y, 0, 0,
0, 0, z, 0,
0, 0, 0, 1
);
}
static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
x, 0, 0, 0,
0, y, 0, 0,
0, 0, z, 0
);
}
static MatrixRowMajor Scaling(VectorF32<3, 1> vector) requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> a = vector.template Store<float>();
return Scaling(a[0], a[1], a[2]);
}
static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
x, y, z, 1
);
}
static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
1, 0, 0, x,
0, 1, 0, y,
0, 0, 1, z
);
}
static MatrixRowMajor Translation(VectorF32<3, 1> vector) requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> a = vector.template Store<float>();
return Translation(a[0], a[1], a[2]);
}
// Pitch/yaw/roll Euler rotation. Computes all three sin/cos pairs as a
// single batched SinCos on a VectorF32<3, 1>, then assembles the rows.
static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
alignas(16) float angles[4] = { Pitch, Yaw, Roll, 0.0f };
VectorF32<3, 1> v(angles);
std::tuple<VectorF32<3, 1>, VectorF32<3, 1>> sc = v.SinCos();
std::array<float, 4> s = std::get<0>(sc).template Store<float>();
std::array<float, 4> c = std::get<1>(sc).template Store<float>();
const float sp = s[0], cp = c[0];
const float sy = s[1], cy = c[1];
const float sr = s[2], cr = c[2];
return MatrixRowMajor(
cr * cy + sr * sp * sy, sr * cp, sr * sp * cy - cr * sy, 0.0f,
cr * sp * sy - sr * cy, cr * cp, sr * sy + cr * sp * cy, 0.0f,
cp * sy, -sp, cp * cy, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f
);
}
static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
alignas(16) float angles[4] = { Pitch, Yaw, Roll, 0.0f };
VectorF32<3, 1> v(angles);
std::tuple<VectorF32<3, 1>, VectorF32<3, 1>> sc = v.SinCos();
std::array<float, 4> s = std::get<0>(sc).template Store<float>();
std::array<float, 4> c = std::get<1>(sc).template Store<float>();
const float sp = s[0], cp = c[0];
const float sy = s[1], cy = c[1];
const float sr = s[2], cr = c[2];
return MatrixRowMajor(
cr * cy + sr * sp * sy, sr * cp, sr * sp * cy - cr * sy, 0.0f,
cr * sp * sy - sr * cy, cr * cp, sr * sy + cr * sp * cy, 0.0f,
cp * sy, -sp, cp * cy, 0.0f
);
}
static MatrixRowMajor Rotation(VectorF32<3, 1> v) requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> a = v.template Store<float>();
return Rotation(a[0], a[1], a[2]);
}
// View matrix: builds the basis from a forward (negated) direction and
// an up reference, then dots each basis vector with -eye for the
// translation column. The four dots needed are produced by a single
// batched 4-pair Dot.
static MatrixRowMajor LookTo(VectorF32<3, 1> eyePosition, VectorF32<3, 1> eyeDirection, VectorF32<3, 1> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
// R0 = up × R2 is linear in R2, so its normalized direction does
// not depend on whether we hand R2 in before or after its own
// normalize. Computing R0_raw from the un-normalized R2 lets us
// satisfy the 4-input Normalize requirement with one batched call
// (duplicating R2 and R0 in the padding slots).
VectorF32<3, 1> R0Raw = VectorF32<3, 1>::Cross(upDirection, eyeDirection);
auto normalized = VectorF32<3, 1>::Normalize(eyeDirection, R0Raw, eyeDirection, R0Raw);
VectorF32<3, 1> R2 = std::get<0>(normalized);
VectorF32<3, 1> R0 = std::get<1>(normalized);
VectorF32<3, 1> R1 = VectorF32<3, 1>::Cross(R2, R0);
VectorF32<3, 1> negEye = -eyePosition;
VectorF32<1, 4> dots = VectorF32<3, 1>::Dot(
R0, negEye, R1, negEye, R2, negEye, R0, negEye);
std::array<float, 4> d = dots.template Store<float>();
std::array<float, 4> r0a = R0.template Store<float>();
std::array<float, 4> r1a = R1.template Store<float>();
std::array<float, 4> r2a = R2.template Store<float>();
return MatrixRowMajor(
r0a[0], r1a[0], r2a[0], 0.0f,
r0a[1], r1a[1], r2a[1], 0.0f,
r0a[2], r1a[2], r2a[2], 0.0f,
d[0], d[1], d[2], 1.0f
);
}
static MatrixRowMajor LookAt(VectorF32<3, 1> eyePosition, VectorF32<3, 1> focusPosition, VectorF32<3, 1> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return LookTo(eyePosition, eyePosition - focusPosition, upDirection);
}
};
}
// Pretty printer using Store() so it does not depend on the legacy m[i][j]
// access pattern.
template <>
struct std::formatter<Crafter::MatrixRowMajor<float, 4, 4, 1>> : std::formatter<std::string> {
auto format(const Crafter::MatrixRowMajor<float, 4, 4, 1>& obj, format_context& ctx) const {
std::array<float, 16> v = obj.Store();
return std::formatter<std::string>::format(std::format(
"{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
v[0], v[1], v[2], v[3],
v[4], v[5], v[6], v[7],
v[8], v[9], v[10], v[11],
v[12], v[13], v[14], v[15]
), ctx);
}
};
template <>
struct std::formatter<Crafter::MatrixRowMajor<float, 4, 3, 1>> : std::formatter<std::string> {
auto format(const Crafter::MatrixRowMajor<float, 4, 3, 1>& obj, format_context& ctx) const {
std::array<float, 12> v = obj.Store();
return std::formatter<std::string>::format(std::format(
"{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
v[0], v[1], v[2], v[3],
v[4], v[5], v[6], v[7],
v[8], v[9], v[10], v[11]
), ctx);
}
};