matrix operations

This commit is contained in:
Jorijn van der Graaf 2026-05-18 18:03:20 +02:00
commit ad5ba21b4d
6 changed files with 1433 additions and 613 deletions

65
README.md Normal file
View file

@ -0,0 +1,65 @@
# Crafter.Math
A C++23 math library for the Crafter engine, distributed as a set of C++ modules. Provides generic vector and matrix types alongside SIMD-specialized fixed-size vectors for `float` and `_Float16`, plus a small set of ray intersection routines.
The library is hardware-aware: it picks the widest SIMD path the target supports (SSE / AVX / AVX-512 on x86_64, SIMD128 on WebAssembly) and falls back to scalar code elsewhere.
## Modules
`import Crafter.Math;` re-exports the following partitions:
| Partition | Contents |
| --- | --- |
| `:Basic` | `ToRadian`, plus the `VectorF16 → VectorF32` alias on x86_64 without AVX512-FP16. |
| `:Vector` | Generic `Vector<T, Len, Alignment>` with `.x/.y/.z/.w` and `.r/.g/.b/.a` accessors, arithmetic, comparison, and conversion operators. |
| `:MatrixRowMajor` | `MatrixRowMajor<T, ColumnSize, RowSize, Repeats>` — row-major matrices with packed-batch support via `Repeats`. |
| `:VectorF32` | `VectorF32<Len, Packing>` — SIMD-specialized 32-bit float vector with `Load`/`Store`, FMA, `Cross`, `Dot`, `Length`, `Normalize`, `Sin`/`Cos`, `Shuffle`, etc. |
| `:VectorF16` | `VectorF16<Len, Packing>` — same surface as `VectorF32` for `_Float16` when the target supports it. |
| `:Intersection` | `IntersectionTestRayTriangle`, `IntersectionTestRaySphere`, `IntersectionTestRayOrientedBox`. |
| `:Common` | Internal SIMD type selection (`__m128/256/512`, `__m128h/256h/512h`, `v128_t`, or scalar array) used by the F16/F32 vectors. |
`Len` is the logical vector width (14 for most graphics use) and `Packing` is the number of vectors packed into one SIMD register — e.g. `VectorF32<3, 4>` holds four 3-vectors in a single 512-bit register, which is what the batched `Dot`, `Length`, and `Normalize` overloads operate on.
## Example
```cpp
import Crafter.Math;
using namespace Crafter;
// Generic 3-vector of floats.
Vector<float, 3> a(1.0f, 2.0f, 3.0f);
Vector<float, 3> b(4.0f, 5.0f, 6.0f);
auto c = a + b;
// SIMD-specialized 4-vector with one element of padding.
VectorF32<4, 1> p(1.0f);
VectorF32<4, 1> q(2.0f);
auto r = VectorF32<3, 1>::Cross(p, q);
// Ray vs. triangle.
float t = IntersectionTestRayTriangle<float>(
{0,0,0}, {1,0,0}, {0,1,0},
{0.25f, 0.25f, -1.0f}, {0, 0, 1});
```
## Building
This project uses [Crafter.Build](https://forgejo.catcrafts.net/Catcrafts/Crafter.Build). The build script in [project.cpp](project.cpp) declares the library as a static C++ module library and registers three test executables targeting different ISAs:
- `Vector-sapphirerapids` — AVX-512 with FP16 (`-march=sapphirerapids`)
- `Vector-x86-64-v4` — AVX-512 baseline
- `Vector-x86-64-v3` — AVX2 baseline
Build and run tests via your usual Crafter.Build entry point; `build/` and `bin/` are git-ignored.
## Target support
- **x86_64**: SSE / AVX / AVX-512F selected per target; AVX512-FP16 is required for native `VectorF16` (otherwise it aliases `VectorF32`). F16C is used for fp16 ↔ fp32 conversion when available.
- **WebAssembly**: `wasm_simd128.h` 128-bit path; SIMD width is capped at 16 bytes.
- **Other targets**: scalar fallback via `std::array<T, N>`.
## License
LGPL-3.0-only. See [LICENSE](LICENSE).
Copyright © 2026 Catcrafts — [catcrafts.net](https://catcrafts.net)

View file

@ -18,207 +18,432 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
export module Crafter.Math:Intersection; export module Crafter.Math:Intersection;
import :Vector; import :VectorF32;
import :MatrixRowMajor; import :MatrixRowMajor;
import std; import std;
namespace Crafter { namespace Crafter {
export template<typename T> // All intersection tests are batched over four primitives at a time so they
constexpr T IntersectionTestRayTriangle(Vector<T, 3, 0> vert0, Vector<T, 3, 0> vert1, Vector<T, 3, 0> vert2, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) { // feed the VectorF32<3,1>::Dot / Cross / Length / Normalize four-pair
Vector<T, 3, 0> edge1 = vert1 - vert0; // overloads directly. The single-primitive case is just "pass the same
Vector<T, 3, 0> edge2 = vert2 - vert0; // primitive four times and read lane 0" - there is no single-vector
// fast-path because the SIMD pipelines want full lanes.
Vector<T, 3, 0> h = Vector<T, 3, 0>::Cross(rayDir, edge2); // Möller-Trumbore against four triangles sharing one ray. Returns ray
T determinant = Vector<T, 3, 0>::Dot(edge1, h); // parameter t per triangle, or float max where the ray misses.
export inline VectorF32<1, 4> IntersectionTestRayTriangle(
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
VectorF32<3, 1> aV0, VectorF32<3, 1> aV1, VectorF32<3, 1> aV2,
VectorF32<3, 1> bV0, VectorF32<3, 1> bV1, VectorF32<3, 1> bV2,
VectorF32<3, 1> cV0, VectorF32<3, 1> cV1, VectorF32<3, 1> cV2,
VectorF32<3, 1> dV0, VectorF32<3, 1> dV1, VectorF32<3, 1> dV2
) {
VectorF32<3, 1> aE1 = aV1 - aV0;
VectorF32<3, 1> aE2 = aV2 - aV0;
VectorF32<3, 1> bE1 = bV1 - bV0;
VectorF32<3, 1> bE2 = bV2 - bV0;
VectorF32<3, 1> cE1 = cV1 - cV0;
VectorF32<3, 1> cE2 = cV2 - cV0;
VectorF32<3, 1> dE1 = dV1 - dV0;
VectorF32<3, 1> dE2 = dV2 - dV0;
if (determinant <= std::numeric_limits<T>::epsilon()) { VectorF32<3, 1> aH = VectorF32<3, 1>::Cross(rayDir, aE2);
return std::numeric_limits<T>::max(); VectorF32<3, 1> bH = VectorF32<3, 1>::Cross(rayDir, bE2);
VectorF32<3, 1> cH = VectorF32<3, 1>::Cross(rayDir, cE2);
VectorF32<3, 1> dH = VectorF32<3, 1>::Cross(rayDir, dE2);
VectorF32<3, 1> aS = rayOrigin - aV0;
VectorF32<3, 1> bS = rayOrigin - bV0;
VectorF32<3, 1> cS = rayOrigin - cV0;
VectorF32<3, 1> dS = rayOrigin - dV0;
VectorF32<3, 1> aQ = VectorF32<3, 1>::Cross(aS, aE1);
VectorF32<3, 1> bQ = VectorF32<3, 1>::Cross(bS, bE1);
VectorF32<3, 1> cQ = VectorF32<3, 1>::Cross(cS, cE1);
VectorF32<3, 1> dQ = VectorF32<3, 1>::Cross(dS, dE1);
// Four 3-component dots packed into one __m128 per call.
VectorF32<1, 4> det = VectorF32<3, 1>::Dot(
aE1, aH, bE1, bH, cE1, cH, dE1, dH);
VectorF32<1, 4> uNum = VectorF32<3, 1>::Dot(
aS, aH, bS, bH, cS, cH, dS, dH);
VectorF32<1, 4> vNum = VectorF32<3, 1>::Dot(
rayDir, aQ, rayDir, bQ, rayDir, cQ, rayDir, dQ);
VectorF32<1, 4> tNum = VectorF32<3, 1>::Dot(
aE2, aQ, bE2, bQ, cE2, cQ, dE2, dQ);
std::array<float, 4> detArr = det.template Store<float>();
std::array<float, 4> uArr = uNum.template Store<float>();
std::array<float, 4> vArr = vNum.template Store<float>();
std::array<float, 4> tArr = tNum.template Store<float>();
constexpr float eps = std::numeric_limits<float>::epsilon();
constexpr float maxF = std::numeric_limits<float>::max();
alignas(16) std::array<float, 4> out{};
for (std::uint8_t i = 0; i < 4; ++i) {
float d = detArr[i];
if (d <= eps) { out[i] = maxF; continue; }
float invD = 1.0f / d;
float u = uArr[i] * invD;
if (u < 0.0f || u > 1.0f) { out[i] = maxF; continue; }
float v = vArr[i] * invD;
if (v < 0.0f || u + v > 1.0f) { out[i] = maxF; continue; }
out[i] = tArr[i] * invD;
}
return VectorF32<1, 4>(out.data());
} }
T inverse_determinant = T(1) / determinant; // One ray against four spheres. radii must hold {rA, rB, rC, rD} in lanes
// 0..3.
export inline VectorF32<1, 4> IntersectionTestRaySphere(
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
VectorF32<3, 1> posA, VectorF32<3, 1> posB,
VectorF32<3, 1> posC, VectorF32<3, 1> posD,
VectorF32<1, 4> radii
) {
VectorF32<3, 1> sA = rayOrigin - posA;
VectorF32<3, 1> sB = rayOrigin - posB;
VectorF32<3, 1> sC = rayOrigin - posC;
VectorF32<3, 1> sD = rayOrigin - posD;
Vector<T, 3, 0> origins_diff_vector = rayOrigin - vert0; // dirDotS_i = rayDir · (rayOrigin - pos_i)
T u = Vector<T, 3, 0>::Dot(origins_diff_vector, h) * inverse_determinant; VectorF32<1, 4> dirDotS = VectorF32<3, 1>::Dot(
rayDir, sA, rayDir, sB, rayDir, sC, rayDir, sD);
// sqDist_i = |rayOrigin - pos_i|² (a.k.a. LengthSq of the s vectors)
VectorF32<1, 4> sqDist = VectorF32<3, 1>::LengthSq(sA, sB, sC, sD);
// aScalar = rayDir · rayDir, broadcast across four lanes.
VectorF32<1, 4> aScalar = VectorF32<3, 1>::LengthSq(
rayDir, rayDir, rayDir, rayDir);
if (u < 0.0 || u > 1.0) VectorF32<1, 4> two(2.0f);
{ VectorF32<1, 4> four(4.0f);
return std::numeric_limits<T>::max(); VectorF32<1, 4> b = two * dirDotS;
VectorF32<1, 4> c = sqDist - radii * radii;
// discriminant = b² - 4·a·c
VectorF32<1, 4> disc = b * b - four * aScalar * c;
std::array<float, 4> discArr = disc.template Store<float>();
std::array<float, 4> bArr = b.template Store<float>();
std::array<float, 4> aArr = aScalar.template Store<float>();
constexpr float maxF = std::numeric_limits<float>::max();
alignas(16) std::array<float, 4> out{};
for (std::uint8_t i = 0; i < 4; ++i) {
float d = discArr[i];
if (d < 0.0f) { out[i] = maxF; continue; }
float sqrtD = std::sqrt(d);
float t = -0.5f * (bArr[i] + sqrtD) / aArr[i];
out[i] = (t > 0.0f) ? t : maxF;
}
return VectorF32<1, 4>(out.data());
} }
Vector<T, 3, 0> q = Vector<T, 3, 0>::Cross(origins_diff_vector, edge1); // One ray against four OBBs. Each box is described by world-space position,
T v = inverse_determinant * Vector<T, 3, 0>::Dot(rayDir, q); // half-extent vector (per-axis sizes), and a unit quaternion rotation.
export inline VectorF32<1, 4> IntersectionTestRayOrientedBox(
VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir,
VectorF32<3, 1> posA, VectorF32<3, 1> sizeA, VectorF32<4, 1> rotA,
VectorF32<3, 1> posB, VectorF32<3, 1> sizeB, VectorF32<4, 1> rotB,
VectorF32<3, 1> posC, VectorF32<3, 1> sizeC, VectorF32<4, 1> rotC,
VectorF32<3, 1> posD, VectorF32<3, 1> sizeD, VectorF32<4, 1> rotD
) {
// Conjugate quaternion: negate xyz, keep w. Negate<{true,true,true,false}>
// is constant-folded into a single XOR with a mask vector.
VectorF32<4, 1> invRotA = rotA.template Negate<{{true, true, true, false}}>();
VectorF32<4, 1> invRotB = rotB.template Negate<{{true, true, true, false}}>();
VectorF32<4, 1> invRotC = rotC.template Negate<{{true, true, true, false}}>();
VectorF32<4, 1> invRotD = rotD.template Negate<{{true, true, true, false}}>();
if (v < 0.0 || u + v > 1.0) { VectorF32<3, 1> localOriginA = VectorF32<3, 1>::Rotate(rayOrigin - posA, invRotA);
return std::numeric_limits<T>::max(); VectorF32<3, 1> localOriginB = VectorF32<3, 1>::Rotate(rayOrigin - posB, invRotB);
} VectorF32<3, 1> localOriginC = VectorF32<3, 1>::Rotate(rayOrigin - posC, invRotC);
VectorF32<3, 1> localOriginD = VectorF32<3, 1>::Rotate(rayOrigin - posD, invRotD);
return inverse_determinant * Vector<T, 3, 0>::Dot(edge2, q); VectorF32<3, 1> localDirA = VectorF32<3, 1>::Rotate(rayDir, invRotA);
} VectorF32<3, 1> localDirB = VectorF32<3, 1>::Rotate(rayDir, invRotB);
VectorF32<3, 1> localDirC = VectorF32<3, 1>::Rotate(rayDir, invRotC);
VectorF32<3, 1> localDirD = VectorF32<3, 1>::Rotate(rayDir, invRotD);
export template<typename T> VectorF32<3, 1> halfA = sizeA * 0.5f;
constexpr T IntersectionTestRaySphere(Vector<T, 3, 0> position, T radius, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) { VectorF32<3, 1> halfB = sizeB * 0.5f;
T a = Vector<T, 3, 0>::Dot(rayDir, rayDir); VectorF32<3, 1> halfC = sizeC * 0.5f;
T b = Vector<T, 3, 0>::Dot(rayDir, (T(2) * (rayOrigin - position))); VectorF32<3, 1> halfD = sizeD * 0.5f;
T c = Vector<T, 3, 0>::Dot(position, position) + Vector<T, 3, 0>::Dot(rayOrigin, rayOrigin) - T(2) * Vector<T, 3, 0>::Dot(rayOrigin, position) - radius * radius;
T d = b * b + (T(-4)) * a * c;
if (d < 0) { std::array<std::array<float, 4>, 4> origLanes{
return std::numeric_limits<T>::max(); localOriginA.template Store<float>(),
} localOriginB.template Store<float>(),
localOriginC.template Store<float>(),
d = std::sqrt(d); localOriginD.template Store<float>(),
};
T t = (T(-0.5)) * (b + d) / a; std::array<std::array<float, 4>, 4> dirLanes{
if (t > T(0)) { localDirA.template Store<float>(),
return t; localDirB.template Store<float>(),
} else { localDirC.template Store<float>(),
return std::numeric_limits<T>::max(); localDirD.template Store<float>(),
} };
} std::array<std::array<float, 4>, 4> halfLanes{
halfA.template Store<float>(),
export template<typename T> halfB.template Store<float>(),
constexpr T IntersectionTestRayOrientedBox(Vector<T, 3, 0> boxPosition, Vector<T, 3, 0> boxSize, Vector<T, 4, 0> boxRotation, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) { halfC.template Store<float>(),
Vector<T, 4, 0> invRot( halfD.template Store<float>(),
-boxRotation.x,
-boxRotation.y,
-boxRotation.z,
boxRotation.w
);
Vector<T, 3, 0> localOrigin = Vector<T, 3, 0>::Rotate(rayOrigin - boxPosition, invRot);
Vector<T, 3, 0> localDir = Vector<T, 3, 0>::Rotate(rayDir, invRot);
Vector<T,3,0> halfExtents = boxSize * T(0.5);
T tMin = T(0);
T tMax = std::numeric_limits<T>::max();
for (std::uint32_t i = 0; i < 3; ++i)
{
if (std::abs(localDir.v[i]) < std::numeric_limits<T>::epsilon())
{
if (localOrigin.v[i] < -halfExtents.v[i] || localOrigin.v[i] > halfExtents.v[i]) {
return std::numeric_limits<T>::max();
}
}
else
{
T invD = T(1) / localDir.v[i];
T t1 = (-halfExtents.v[i] - localOrigin.v[i]) * invD;
T t2 = ( halfExtents.v[i] - localOrigin.v[i]) * invD;
if (t1 > t2) {
std::swap(t1, t2);
}
tMin = std::max(tMin, t1);
tMax = std::min(tMax, t2);
if (tMin > tMax) {
return std::numeric_limits<T>::max();
}
}
}
return (tMin >= T(0)) ? tMin : tMax;
}
export template<typename T>
std::vector<Vector<T, 3, 0>> getOBBCorners(Vector<T, 3, 0> size, MatrixRowMajor<T, 4, 3, 1> matrix) {
std::vector<Vector<T, 3, 0>> localCorners = {
Vector<T, 3, 0>(-size.x, -size.y, -size.z),
Vector<T, 3, 0>( size.x, -size.y, -size.z),
Vector<T, 3, 0>(-size.x, size.y, -size.z),
Vector<T, 3, 0>( size.x, size.y, -size.z),
Vector<T, 3, 0>(-size.x, -size.y, size.z),
Vector<T, 3, 0>( size.x, -size.y, size.z),
Vector<T, 3, 0>(-size.x, size.y, size.z),
Vector<T, 3, 0>( size.x, size.y, size.z)
}; };
std::vector<Vector<T, 3, 0>> worldCorners; constexpr float eps = std::numeric_limits<float>::epsilon();
constexpr float maxF = std::numeric_limits<float>::max();
for (Vector<T, 3, 0> localCorner : localCorners) { alignas(16) std::array<float, 4> out{};
Vector<T, 3, 0> rotatedCorner = matrix * localCorner; for (std::uint8_t b = 0; b < 4; ++b) {
worldCorners.push_back(rotatedCorner); float tMin = 0.0f;
} float tMax = maxF;
bool miss = false;
return worldCorners; for (std::uint8_t i = 0; i < 3; ++i) {
} float d = dirLanes[b][i];
float o = origLanes[b][i];
export template<typename T> float h = halfLanes[b][i];
constexpr bool IntersectionTestOrientedBoxOrientedBox(Vector<T, 3, 0> sizeA, MatrixRowMajor<T, 4, 3, 1> boxA, Vector<T, 3, 0> sizeB, MatrixRowMajor<T, 4, 3, 1> boxB) { if (std::abs(d) < eps) {
std::vector<Vector<T, 3, 0>> axes; if (o < -h || o > h) { miss = true; break; }
} else {
std::vector<Vector<T, 3, 0>> box1Corners = getOBBCorners(sizeA, boxA); float invD = 1.0f / d;
std::vector<Vector<T, 3, 0>> box2Corners = getOBBCorners(sizeB, boxB); float t1 = (-h - o) * invD;
float t2 = ( h - o) * invD;
axes.push_back(Vector<T, 3, 0>(boxA.m[0][0], boxA.m[0][1], boxA.m[0][2])); if (t1 > t2) std::swap(t1, t2);
axes.push_back(Vector<T, 3, 0>(boxA.m[1][0], boxA.m[1][1], boxA.m[1][2])); tMin = std::max(tMin, t1);
axes.push_back(Vector<T, 3, 0>(boxA.m[2][0], boxA.m[2][1], boxA.m[2][2])); tMax = std::min(tMax, t2);
if (tMin > tMax) { miss = true; break; }
axes.push_back(Vector<T, 3, 0>(boxB.m[0][0], boxB.m[0][1], boxB.m[0][2]));
axes.push_back(Vector<T, 3, 0>(boxB.m[1][0], boxB.m[1][1], boxB.m[1][2]));
axes.push_back(Vector<T, 3, 0>(boxB.m[2][0], boxB.m[2][1], boxB.m[2][2]));
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
axes.push_back(Vector<T, 3, 0>::Normalize(Vector<T, 3, 0>::Cross(Vector<T, 3, 0>(boxA.m[i][0], boxA.m[i][1], boxA.m[i][2]), Vector<T, 3, 0>(boxB.m[j][0], boxB.m[j][1], boxB.m[j][2]))));
} }
} }
out[b] = miss ? maxF : (tMin >= 0.0f ? tMin : tMax);
for (Vector<T, 3, 0> axis : axes) { }
T min1 = Vector<T, 3, 0>::Dot(box1Corners[0], axis); return VectorF32<1, 4>(out.data());
T max1 = min1;
for (Vector<T, 3, 0> corner : box1Corners) {
T projection = Vector<T, 3, 0>::Dot(corner, axis);
min1 = std::min(min1, projection);
max1 = std::max(max1, projection);
} }
T min2 = Vector<T, 3, 0>::Dot(box2Corners[0], axis); // One sphere against four OBBs. boxMatrix encodes rotation in m[r][0..2]
T max2 = min2; // and translation in m[r][3].
for (Vector<T, 3, 0> corner : box2Corners) { export inline VectorF32<1, 4> IntersectionTestSphereOrientedBox(
T projection = Vector<T, 3, 0>::Dot(corner, axis); VectorF32<3, 1> sphereCenter, VectorF32<1, 4> radii,
min2 = std::min(min2, projection); VectorF32<3, 1> sizeA, MatrixRowMajor<float, 4, 3, 1> boxA,
max2 = std::max(max2, projection); VectorF32<3, 1> sizeB, MatrixRowMajor<float, 4, 3, 1> boxB,
VectorF32<3, 1> sizeC, MatrixRowMajor<float, 4, 3, 1> boxC,
VectorF32<3, 1> sizeD, MatrixRowMajor<float, 4, 3, 1> boxD
) {
auto perBox = [&](MatrixRowMajor<float, 4, 3, 1> const& m,
VectorF32<3, 1> const& size,
VectorF32<3, 1>& xAxis,
VectorF32<3, 1>& yAxis,
VectorF32<3, 1>& zAxis,
VectorF32<3, 1>& delta) {
// Existing semantics: the OBB axes are read from the rows of the
// upper 3x3 block, and the translation column is gathered from the
// w lane of each row.
std::array<float, 4> r0 = m.rows[0].template Store<float>();
std::array<float, 4> r1 = m.rows[1].template Store<float>();
std::array<float, 4> r2 = m.rows[2].template Store<float>();
alignas(16) float xBuf[4] = { r0[0], r0[1], r0[2], 0.0f };
alignas(16) float yBuf[4] = { r1[0], r1[1], r1[2], 0.0f };
alignas(16) float zBuf[4] = { r2[0], r2[1], r2[2], 0.0f };
alignas(16) float oBuf[4] = { r0[3], r1[3], r2[3], 0.0f };
xAxis = VectorF32<3, 1>(xBuf);
yAxis = VectorF32<3, 1>(yBuf);
zAxis = VectorF32<3, 1>(zBuf);
VectorF32<3, 1> origin(oBuf);
delta = sphereCenter - origin;
(void)size;
};
VectorF32<3, 1> xA, yA, zA, dA;
VectorF32<3, 1> xB, yB, zB, dB;
VectorF32<3, 1> xC, yC, zC, dC;
VectorF32<3, 1> xD, yD, zD, dD;
perBox(boxA, sizeA, xA, yA, zA, dA);
perBox(boxB, sizeB, xB, yB, zB, dB);
perBox(boxC, sizeC, xC, yC, zC, dC);
perBox(boxD, sizeD, xD, yD, zD, dD);
// Local sphere center per box: project delta onto each box axis. We
// produce {lx, ly, lz, lx, ly, lz, lx, ly, lz, lx, ly, lz} as three
// packed 4-wide Dot results (one Dot per axis).
VectorF32<1, 4> locX = VectorF32<3, 1>::Dot(
dA, xA, dB, xB, dC, xC, dD, xD);
VectorF32<1, 4> locY = VectorF32<3, 1>::Dot(
dA, yA, dB, yB, dC, yC, dD, yD);
VectorF32<1, 4> locZ = VectorF32<3, 1>::Dot(
dA, zA, dB, zB, dC, zC, dD, zD);
std::array<float, 4> lxArr = locX.template Store<float>();
std::array<float, 4> lyArr = locY.template Store<float>();
std::array<float, 4> lzArr = locZ.template Store<float>();
std::array<float, 4> rArr = radii.template Store<float>();
std::array<std::array<float, 4>, 4> sizeLanes{
sizeA.template Store<float>(),
sizeB.template Store<float>(),
sizeC.template Store<float>(),
sizeD.template Store<float>(),
};
alignas(16) std::array<float, 4> out{};
for (std::uint8_t i = 0; i < 4; ++i) {
float lx = lxArr[i], ly = lyArr[i], lz = lzArr[i];
float sx = sizeLanes[i][0], sy = sizeLanes[i][1], sz = sizeLanes[i][2];
float cx = std::clamp(lx, -sx, sx);
float cy = std::clamp(ly, -sy, sy);
float cz = std::clamp(lz, -sz, sz);
float dx = lx - cx, dy = ly - cy, dz = lz - cz;
float distSq = dx * dx + dy * dy + dz * dz;
float r = rArr[i];
// Returns 0.0 on hit, max on miss - keeps a consistent
// "t-like" output signature with the other intersection tests.
out[i] = (distSq <= r * r) ? 0.0f : std::numeric_limits<float>::max();
}
return VectorF32<1, 4>(out.data());
} }
if (max1 < min2 || max2 < min1) { // Eight local corners of a unit OBB transformed by `matrix`. Uses one
return false; // batched 4-pair Dot per result row (x, y, z), reproducing the corners in
// two groups of four.
export inline std::array<VectorF32<3, 1>, 8> GetOBBCorners(
VectorF32<3, 1> size, MatrixRowMajor<float, 4, 3, 1> matrix
) {
std::array<float, 4> sz = size.template Store<float>();
const float sx = sz[0], sy = sz[1], sz_ = sz[2];
VectorF32<4, 1> mx = matrix.rows[0];
VectorF32<4, 1> my = matrix.rows[1];
VectorF32<4, 1> mz = matrix.rows[2];
// Eight homogeneous corner vectors with w=1 so the translation column
// of `matrix` participates in the dot product.
alignas(16) float c0[4] = { -sx, -sy, -sz_, 1.0f };
alignas(16) float c1[4] = { sx, -sy, -sz_, 1.0f };
alignas(16) float c2[4] = { -sx, sy, -sz_, 1.0f };
alignas(16) float c3[4] = { sx, sy, -sz_, 1.0f };
alignas(16) float c4[4] = { -sx, -sy, sz_, 1.0f };
alignas(16) float c5[4] = { sx, -sy, sz_, 1.0f };
alignas(16) float c6[4] = { -sx, sy, sz_, 1.0f };
alignas(16) float c7[4] = { sx, sy, sz_, 1.0f };
VectorF32<4, 1> v0(c0), v1(c1), v2(c2), v3(c3);
VectorF32<4, 1> v4(c4), v5(c5), v6(c6), v7(c7);
// First four corners (0..3): batch x, y, z dots.
VectorF32<1, 4> xLo = VectorF32<4, 1>::Dot(mx, v0, mx, v1, mx, v2, mx, v3);
VectorF32<1, 4> yLo = VectorF32<4, 1>::Dot(my, v0, my, v1, my, v2, my, v3);
VectorF32<1, 4> zLo = VectorF32<4, 1>::Dot(mz, v0, mz, v1, mz, v2, mz, v3);
// Second four corners (4..7).
VectorF32<1, 4> xHi = VectorF32<4, 1>::Dot(mx, v4, mx, v5, mx, v6, mx, v7);
VectorF32<1, 4> yHi = VectorF32<4, 1>::Dot(my, v4, my, v5, my, v6, my, v7);
VectorF32<1, 4> zHi = VectorF32<4, 1>::Dot(mz, v4, mz, v5, mz, v6, mz, v7);
std::array<float, 4> xLoA = xLo.template Store<float>();
std::array<float, 4> yLoA = yLo.template Store<float>();
std::array<float, 4> zLoA = zLo.template Store<float>();
std::array<float, 4> xHiA = xHi.template Store<float>();
std::array<float, 4> yHiA = yHi.template Store<float>();
std::array<float, 4> zHiA = zHi.template Store<float>();
std::array<VectorF32<3, 1>, 8> result;
for (std::uint8_t i = 0; i < 4; ++i) {
alignas(16) float buf[4] = { xLoA[i], yLoA[i], zLoA[i], 0.0f };
result[i] = VectorF32<3, 1>(buf);
} }
for (std::uint8_t i = 0; i < 4; ++i) {
alignas(16) float buf[4] = { xHiA[i], yHiA[i], zHiA[i], 0.0f };
result[4 + i] = VectorF32<3, 1>(buf);
}
return result;
} }
// SAT against fifteen separating axes (3 box-A, 3 box-B, 9 cross products).
// We compute every corner projection with batched 4-pair Dots: each axis
// projects four corners per call, two calls per axis covers the 8 corners.
export inline bool IntersectionTestOrientedBoxOrientedBox(
VectorF32<3, 1> sizeA, MatrixRowMajor<float, 4, 3, 1> boxA,
VectorF32<3, 1> sizeB, MatrixRowMajor<float, 4, 3, 1> boxB
) {
std::array<VectorF32<3, 1>, 8> cornersA = GetOBBCorners(sizeA, boxA);
std::array<VectorF32<3, 1>, 8> cornersB = GetOBBCorners(sizeB, boxB);
// Axes are the upper-3 lanes of each matrix row (same convention as
// SphereOrientedBox). ExtractLo<3> just retypes the SIMD register; the
// 4th lane is ignored by the Len=3 ops below.
std::array<VectorF32<3, 1>, 3> axesA = {
boxA.rows[0].template ExtractLo<3>(),
boxA.rows[1].template ExtractLo<3>(),
boxA.rows[2].template ExtractLo<3>(),
};
std::array<VectorF32<3, 1>, 3> axesB = {
boxB.rows[0].template ExtractLo<3>(),
boxB.rows[1].template ExtractLo<3>(),
boxB.rows[2].template ExtractLo<3>(),
};
std::array<VectorF32<3, 1>, 15> axes{};
axes[0] = axesA[0]; axes[1] = axesA[1]; axes[2] = axesA[2];
axes[3] = axesB[0]; axes[4] = axesB[1]; axes[5] = axesB[2];
// Normalize all nine cross axes together with a single batched
// Normalize call (Packing=3 not in the API, so two calls of four +
// one of one would be needed; for now just normalize in two batches
// of four and the trailing one inline).
std::array<VectorF32<3, 1>, 9> crossAxes{};
std::uint8_t k = 0;
for (std::uint8_t i = 0; i < 3; ++i) {
for (std::uint8_t j = 0; j < 3; ++j) {
crossAxes[k++] = VectorF32<3, 1>::Cross(axesA[i], axesB[j]);
}
}
auto norm0 = VectorF32<3, 1>::Normalize(crossAxes[0], crossAxes[1], crossAxes[2], crossAxes[3]);
auto norm1 = VectorF32<3, 1>::Normalize(crossAxes[4], crossAxes[5], crossAxes[6], crossAxes[7]);
auto norm2 = VectorF32<3, 1>::Normalize(crossAxes[8], crossAxes[8], crossAxes[8], crossAxes[8]);
axes[6] = std::get<0>(norm0);
axes[7] = std::get<1>(norm0);
axes[8] = std::get<2>(norm0);
axes[9] = std::get<3>(norm0);
axes[10] = std::get<0>(norm1);
axes[11] = std::get<1>(norm1);
axes[12] = std::get<2>(norm1);
axes[13] = std::get<3>(norm1);
axes[14] = std::get<0>(norm2);
for (std::uint8_t axisIdx = 0; axisIdx < 15; ++axisIdx) {
VectorF32<3, 1> axis = axes[axisIdx];
// Project all 8 corners of each box onto `axis` using two batched
// 4-pair Dot calls (lo and hi corners).
VectorF32<1, 4> projA_lo = VectorF32<3, 1>::Dot(
cornersA[0], axis, cornersA[1], axis,
cornersA[2], axis, cornersA[3], axis);
VectorF32<1, 4> projA_hi = VectorF32<3, 1>::Dot(
cornersA[4], axis, cornersA[5], axis,
cornersA[6], axis, cornersA[7], axis);
VectorF32<1, 4> projB_lo = VectorF32<3, 1>::Dot(
cornersB[0], axis, cornersB[1], axis,
cornersB[2], axis, cornersB[3], axis);
VectorF32<1, 4> projB_hi = VectorF32<3, 1>::Dot(
cornersB[4], axis, cornersB[5], axis,
cornersB[6], axis, cornersB[7], axis);
std::array<float, 4> aLo = projA_lo.template Store<float>();
std::array<float, 4> aHi = projA_hi.template Store<float>();
std::array<float, 4> bLo = projB_lo.template Store<float>();
std::array<float, 4> bHi = projB_hi.template Store<float>();
float minA = aLo[0], maxA = aLo[0];
for (std::uint8_t i = 1; i < 4; ++i) {
minA = std::min(minA, aLo[i]);
maxA = std::max(maxA, aLo[i]);
}
for (std::uint8_t i = 0; i < 4; ++i) {
minA = std::min(minA, aHi[i]);
maxA = std::max(maxA, aHi[i]);
}
float minB = bLo[0], maxB = bLo[0];
for (std::uint8_t i = 1; i < 4; ++i) {
minB = std::min(minB, bLo[i]);
maxB = std::max(maxB, bLo[i]);
}
for (std::uint8_t i = 0; i < 4; ++i) {
minB = std::min(minB, bHi[i]);
maxB = std::max(maxB, bHi[i]);
}
if (maxA < minB || maxB < minA) return false;
}
return true; return true;
} }
export template<typename T>
constexpr bool IntersectionTestSphereOrientedBox(Vector<T, 3, 0> sphereCenter, T sphereRadius, Vector<T, 3, 0> boxSize, MatrixRowMajor<T, 4, 3, 1> boxMatrix) {
// Extract the OBB's axes (columns of the rotation matrix)
Vector<T, 3, 0> xAxis(boxMatrix.m[0][0], boxMatrix.m[0][1], boxMatrix.m[0][2]);
Vector<T, 3, 0> yAxis(boxMatrix.m[1][0], boxMatrix.m[1][1], boxMatrix.m[1][2]);
Vector<T, 3, 0> zAxis(boxMatrix.m[2][0], boxMatrix.m[2][1], boxMatrix.m[2][2]);
// Translate the sphere center into the OBB's local space
Vector<T, 3, 0> localCenter = Vector<T, 3, 0>(
Vector<T, 3, 0>::Dot(sphereCenter - Vector<T, 3, 0>(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), xAxis),
Vector<T, 3, 0>::Dot(sphereCenter - Vector<T, 3, 0>(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), yAxis),
Vector<T, 3, 0>::Dot(sphereCenter - Vector<T, 3, 0>(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), zAxis)
);
// Clamp the local center to the OBB's extents
Vector<T, 3, 0> closestPoint = Vector<T, 3, 0>(
std::max(-boxSize.x, std::min(localCenter.x, boxSize.x)),
std::max(-boxSize.y, std::min(localCenter.y, boxSize.y)),
std::max(-boxSize.z, std::min(localCenter.z, boxSize.z))
);
// Calculate the distance between the closest point and the local center
Vector<T, 3, 0> delta = localCenter - closestPoint;
T distanceSquared = Vector<T, 3, 0>::Dot(delta, delta);
// Check if the distance is less than or equal to the sphere's radius squared
return distanceSquared <= (sphereRadius * sphereRadius);
}
} }

View file

@ -20,14 +20,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
export module Crafter.Math:MatrixRowMajor; export module Crafter.Math:MatrixRowMajor;
import :Basic; import :Basic;
import :Vector; import :VectorF32;
import std; import std;
namespace Crafter { namespace Crafter {
// Row-major matrix whose rows live in optimized SIMD vectors. All
// multiplications are expressed as broadcast + fused multiply-add against
// these row vectors so the heavy work stays in __m128/__m256/__m512 land.
//
// CollumSize is the column count; only CollumSize == 4 is implemented (the
// matrix gets one SIMD row per row). Repeats is reserved for future
// SoA-style batching.
export template <typename T, std::uint32_t CollumSize, std::uint32_t RowSize, std::uint32_t Repeats> export template <typename T, std::uint32_t CollumSize, std::uint32_t RowSize, std::uint32_t Repeats>
class MatrixRowMajor { class MatrixRowMajor {
public: public:
T m[RowSize][CollumSize*Repeats]; // Rows are exposed publicly so users can compose with VectorF32 ops
// directly without going through an accessor. Each row is a single
// SIMD vector covering all columns.
VectorF32<static_cast<std::uint8_t>(CollumSize), 1> rows[RowSize];
MatrixRowMajor() = default; MatrixRowMajor() = default;
@ -37,25 +47,14 @@ namespace Crafter {
float x2, float y2, float z2, float w2, float x2, float y2, float z2, float w2,
float x3, float y3, float z3, float w3 float x3, float y3, float z3, float w3
) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { ) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
m[0][0] = x0; alignas(16) float r0[4] = { x0, y0, z0, w0 };
m[0][1] = y0; alignas(16) float r1[4] = { x1, y1, z1, w1 };
m[0][2] = z0; alignas(16) float r2[4] = { x2, y2, z2, w2 };
m[0][3] = w0; alignas(16) float r3[4] = { x3, y3, z3, w3 };
rows[0] = VectorF32<4, 1>(r0);
m[1][0] = x1; rows[1] = VectorF32<4, 1>(r1);
m[1][1] = y1; rows[2] = VectorF32<4, 1>(r2);
m[1][2] = z1; rows[3] = VectorF32<4, 1>(r3);
m[1][3] = w1;
m[2][0] = x2;
m[2][1] = y2;
m[2][2] = z2;
m[2][3] = w2;
m[3][0] = x3;
m[3][1] = y3;
m[3][2] = z3;
m[3][3] = w3;
} }
MatrixRowMajor( MatrixRowMajor(
@ -63,360 +62,256 @@ namespace Crafter {
float x1, float y1, float z1, float w1, float x1, float y1, float z1, float w1,
float x2, float y2, float z2, float w2 float x2, float y2, float z2, float w2
) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { ) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
m[0][0] = x0; alignas(16) float r0[4] = { x0, y0, z0, w0 };
m[0][1] = y0; alignas(16) float r1[4] = { x1, y1, z1, w1 };
m[0][2] = z0; alignas(16) float r2[4] = { x2, y2, z2, w2 };
m[0][3] = w0; rows[0] = VectorF32<4, 1>(r0);
rows[1] = VectorF32<4, 1>(r1);
m[1][0] = x1; rows[2] = VectorF32<4, 1>(r2);
m[1][1] = y1;
m[1][2] = z1;
m[1][3] = w1;
m[2][0] = x2;
m[2][1] = y2;
m[2][2] = z2;
m[2][3] = w2;
} }
template <std::uint32_t VAligment> // Flatten to RowSize*CollumSize contiguous floats (row-major). Replaces
Vector<T, 3, VAligment> operator*(Vector<T, 3, VAligment> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { // the old `m[i][j]` raw array access for callers that need a packed
return Vector<T, 3, VAligment>( // float buffer (e.g. GPU upload via memcpy).
b.x * m[0][0] + b.y * m[0][1] + b.z * m[0][2] + m[0][3], constexpr void Store(float* dst) const requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
b.x * m[1][0] + b.y * m[1][1] + b.z * m[1][2] + m[1][3], for (std::uint32_t i = 0; i < RowSize; ++i) {
b.x * m[2][0] + b.y * m[2][1] + b.z * m[2][2] + m[2][3] rows[i].Store(dst + i * 4);
); }
} }
MatrixRowMajor<T, CollumSize, RowSize, Repeats> operator*(MatrixRowMajor<T, CollumSize, RowSize, Repeats> b) const requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { constexpr std::array<float, RowSize * 4> Store() const requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
MatrixRowMajor<T, CollumSize, RowSize, Repeats> result; std::array<float, RowSize * 4> out{};
Store(out.data());
return out;
}
result.m[0][0] = b.m[0][0] * m[0][0] + b.m[0][1] * m[1][0] + b.m[0][2] * m[2][0] + b.m[0][3] * m[3][0]; // Affine transform: extend `b` with implicit w=1 (translation) and dot
result.m[1][0] = b.m[1][0] * m[0][0] + b.m[1][1] * m[1][0] + b.m[1][2] * m[2][0] + b.m[1][3] * m[3][0]; // each row against it. Three row-dots packed into one batched 4-pair
result.m[2][0] = b.m[2][0] * m[0][0] + b.m[2][1] * m[1][0] + b.m[2][2] * m[2][0] + b.m[2][3] * m[3][0]; // Dot call (lane 3 of the result is the duplicated row 0 and gets
result.m[3][0] = b.m[3][0] * m[0][0] + b.m[3][1] * m[1][0] + b.m[3][2] * m[2][0] + b.m[3][3] * m[3][0]; // discarded).
VectorF32<3, 1> operator*(VectorF32<3, 1> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> bArr = b.template Store<float>();
alignas(16) float bhBuf[4] = { bArr[0], bArr[1], bArr[2], 1.0f };
VectorF32<4, 1> bh(bhBuf);
result.m[0][1] = b.m[0][0] * m[0][1] + b.m[0][1] * m[1][1] + b.m[0][2] * m[2][1] + b.m[0][3] * m[3][1]; VectorF32<1, 4> dots = VectorF32<4, 1>::Dot(
result.m[1][1] = b.m[1][0] * m[0][1] + b.m[1][1] * m[1][1] + b.m[1][2] * m[2][1] + b.m[1][3] * m[3][1]; rows[0], bh, rows[1], bh, rows[2], bh, rows[0], bh);
result.m[2][1] = b.m[2][0] * m[0][1] + b.m[2][1] * m[1][1] + b.m[2][2] * m[2][1] + b.m[2][3] * m[3][1];
result.m[3][1] = b.m[3][0] * m[0][1] + b.m[3][1] * m[1][1] + b.m[3][2] * m[2][1] + b.m[3][3] * m[3][1];
result.m[0][2] = b.m[0][0] * m[0][2] + b.m[0][1] * m[1][2] + b.m[0][2] * m[2][2] + b.m[0][3] * m[3][2]; std::array<float, 4> dotsArr = dots.template Store<float>();
result.m[1][2] = b.m[1][0] * m[0][2] + b.m[1][1] * m[1][2] + b.m[1][2] * m[2][2] + b.m[1][3] * m[3][2]; alignas(16) float outBuf[4] = { dotsArr[0], dotsArr[1], dotsArr[2], 0.0f };
result.m[2][2] = b.m[2][0] * m[0][2] + b.m[2][1] * m[1][2] + b.m[2][2] * m[2][2] + b.m[2][3] * m[3][2]; return VectorF32<3, 1>(outBuf);
result.m[3][2] = b.m[3][0] * m[0][2] + b.m[3][1] * m[1][2] + b.m[3][2] * m[2][2] + b.m[3][3] * m[3][2]; }
result.m[0][3] = b.m[0][0] * m[0][3] + b.m[0][1] * m[1][3] + b.m[0][2] * m[2][3] + b.m[0][3] * m[3][3]; // Linear transform (no translation): same as the affine version but
result.m[1][3] = b.m[1][0] * m[0][3] + b.m[1][1] * m[1][3] + b.m[1][2] * m[2][3] + b.m[1][3] * m[3][3]; // with bh.w = 0 so the translation column does not contribute. Useful
result.m[2][3] = b.m[2][0] * m[0][3] + b.m[2][1] * m[1][3] + b.m[2][2] * m[2][3] + b.m[2][3] * m[3][3]; // for direction vectors and normals.
result.m[3][3] = b.m[3][0] * m[0][3] + b.m[3][1] * m[1][3] + b.m[3][2] * m[2][3] + b.m[3][3] * m[3][3]; VectorF32<3, 1> TransformNormal(VectorF32<3, 1> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
std::array<float, 4> bArr = b.template Store<float>();
alignas(16) float bhBuf[4] = { bArr[0], bArr[1], bArr[2], 0.0f };
VectorF32<4, 1> bh(bhBuf);
VectorF32<1, 4> dots = VectorF32<4, 1>::Dot(
rows[0], bh, rows[1], bh, rows[2], bh, rows[0], bh);
std::array<float, 4> dotsArr = dots.template Store<float>();
alignas(16) float outBuf[4] = { dotsArr[0], dotsArr[1], dotsArr[2], 0.0f };
return VectorF32<3, 1>(outBuf);
}
// 4×4 matrix product via broadcast + FMA. Each result row is
// b[i][0]·rows[0] + b[i][1]·rows[1] + b[i][2]·rows[2] + b[i][3]·rows[3]
// produced with four shuffle-broadcasts and three fused multiply-adds.
MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
MatrixRowMajor result;
for (std::uint32_t i = 0; i < 4; ++i) {
VectorF32<4, 1> bi = b.rows[i];
VectorF32<4, 1> bx = bi.template Shuffle<{{0, 0, 0, 0}}>();
VectorF32<4, 1> by = bi.template Shuffle<{{1, 1, 1, 1}}>();
VectorF32<4, 1> bz = bi.template Shuffle<{{2, 2, 2, 2}}>();
VectorF32<4, 1> bw = bi.template Shuffle<{{3, 3, 3, 3}}>();
VectorF32<4, 1> row = bx * rows[0];
row = VectorF32<4, 1>::MulitplyAdd(by, rows[1], row);
row = VectorF32<4, 1>::MulitplyAdd(bz, rows[2], row);
row = VectorF32<4, 1>::MulitplyAdd(bw, rows[3], row);
result.rows[i] = row;
}
return result; return result;
} }
MatrixRowMajor<T, CollumSize, RowSize, Repeats> operator*(MatrixRowMajor<T, CollumSize, RowSize, Repeats> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { // 4×3 affine product. Same broadcast + FMA pattern, but the implicit
MatrixRowMajor<T, CollumSize, RowSize, Repeats> result; // 4th row of both matrices is [0, 0, 0, 1] so the b.w · row3 term
// contributes only to the translation slot.
// Column 0 MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
result.m[0][0] = b.m[0][0]*m[0][0] + b.m[0][1]*m[1][0] + b.m[0][2]*m[2][0]; alignas(16) float wRowBuf[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
result.m[1][0] = b.m[1][0]*m[0][0] + b.m[1][1]*m[1][0] + b.m[1][2]*m[2][0]; VectorF32<4, 1> wRow(wRowBuf);
result.m[2][0] = b.m[2][0]*m[0][0] + b.m[2][1]*m[1][0] + b.m[2][2]*m[2][0];
// Column 1
result.m[0][1] = b.m[0][0]*m[0][1] + b.m[0][1]*m[1][1] + b.m[0][2]*m[2][1];
result.m[1][1] = b.m[1][0]*m[0][1] + b.m[1][1]*m[1][1] + b.m[1][2]*m[2][1];
result.m[2][1] = b.m[2][0]*m[0][1] + b.m[2][1]*m[1][1] + b.m[2][2]*m[2][1];
// Column 2
result.m[0][2] = b.m[0][0]*m[0][2] + b.m[0][1]*m[1][2] + b.m[0][2]*m[2][2];
result.m[1][2] = b.m[1][0]*m[0][2] + b.m[1][1]*m[1][2] + b.m[1][2]*m[2][2];
result.m[2][2] = b.m[2][0]*m[0][2] + b.m[2][1]*m[1][2] + b.m[2][2]*m[2][2];
// Translation column
result.m[0][3] = b.m[0][0]*m[0][3] + b.m[0][1]*m[1][3] + b.m[0][2]*m[2][3] + b.m[0][3];
result.m[1][3] = b.m[1][0]*m[0][3] + b.m[1][1]*m[1][3] + b.m[1][2]*m[2][3] + b.m[1][3];
result.m[2][3] = b.m[2][0]*m[0][3] + b.m[2][1]*m[1][3] + b.m[2][2]*m[2][3] + b.m[2][3];
MatrixRowMajor result;
for (std::uint32_t i = 0; i < 3; ++i) {
VectorF32<4, 1> bi = b.rows[i];
VectorF32<4, 1> bx = bi.template Shuffle<{{0, 0, 0, 0}}>();
VectorF32<4, 1> by = bi.template Shuffle<{{1, 1, 1, 1}}>();
VectorF32<4, 1> bz = bi.template Shuffle<{{2, 2, 2, 2}}>();
VectorF32<4, 1> bw = bi.template Shuffle<{{3, 3, 3, 3}}>();
VectorF32<4, 1> row = bx * rows[0];
row = VectorF32<4, 1>::MulitplyAdd(by, rows[1], row);
row = VectorF32<4, 1>::MulitplyAdd(bz, rows[2], row);
row = VectorF32<4, 1>::MulitplyAdd(bw, wRow, row);
result.rows[i] = row;
}
return result; return result;
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Identity() requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>( return MatrixRowMajor(
1, 0, 0, 0, 1, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0 0, 0, 1, 0
); );
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>( return MatrixRowMajor(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1
);
}
static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor(
x, 0, 0, 0, x, 0, 0, 0,
0, y, 0, 0, 0, y, 0, 0,
0, 0, z, 0, 0, 0, z, 0,
0, 0, 0, 1 0, 0, 0, 1
); );
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>( return MatrixRowMajor(
x, 0, 0, 0, x, 0, 0, 0,
0, y, 0, 0, 0, y, 0, 0,
0, 0, z, 0 0, 0, z, 0
); );
} }
template <std::uint32_t VAligment> static MatrixRowMajor Scaling(VectorF32<3, 1> vector) requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Scaling(Vector<float, 3, VAligment> vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { std::array<float, 4> a = vector.template Store<float>();
return Scaling(vector.x, vector.y, vector.z); return Scaling(a[0], a[1], a[2]);
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>( return MatrixRowMajor(
1, 0, 0, 0, 1, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0,
x, y, z, 1 x, y, z, 1
); );
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
return MatrixRowMajor<T, CollumSize, RowSize, Repeats>( return MatrixRowMajor(
1, 0, 0, x, 1, 0, 0, x,
0, 1, 0, y, 0, 1, 0, y,
0, 0, 1, z 0, 0, 1, z
); );
} }
static MatrixRowMajor Translation(VectorF32<3, 1> vector) requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
template <std::uint32_t VAligment> std::array<float, 4> a = vector.template Store<float>();
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Translation(Vector<T, 3, VAligment> vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { return Translation(a[0], a[1], a[2]);
return Translation(vector.x, vector.y, vector.z);
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { // Pitch/yaw/roll Euler rotation. Computes all three sin/cos pairs as a
float cp = std::cosf(Pitch); // single batched SinCos on a VectorF32<3, 1>, then assembles the rows.
float sp = std::sinf(Pitch); static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
alignas(16) float angles[4] = { Pitch, Yaw, Roll, 0.0f };
VectorF32<3, 1> v(angles);
std::tuple<VectorF32<3, 1>, VectorF32<3, 1>> sc = v.SinCos();
std::array<float, 4> s = std::get<0>(sc).template Store<float>();
std::array<float, 4> c = std::get<1>(sc).template Store<float>();
const float sp = s[0], cp = c[0];
const float sy = s[1], cy = c[1];
const float sr = s[2], cr = c[2];
float cy = std::cosf(Yaw); return MatrixRowMajor(
float sy = std::sinf(Yaw); cr * cy + sr * sp * sy, sr * cp, sr * sp * cy - cr * sy, 0.0f,
cr * sp * sy - sr * cy, cr * cp, sr * sy + cr * sp * cy, 0.0f,
float cr = std::cosf(Roll); cp * sy, -sp, cp * cy, 0.0f,
float sr = std::sinf(Roll); 0.0f, 0.0f, 0.0f, 1.0f
);
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
M.m[0][0] = cr * cy + sr * sp * sy;
M.m[0][1] = sr * cp;
M.m[0][2] = sr * sp * cy - cr * sy;
M.m[0][3] = 0.0f;
M.m[1][0] = cr * sp * sy - sr * cy;
M.m[1][1] = cr * cp;
M.m[1][2] = sr * sy + cr * sp * cy;
M.m[1][3] = 0.0f;
M.m[2][0] = cp * sy;
M.m[2][1] = -sp;
M.m[2][2] = cp * cy;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
} }
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) { static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as<T, float>) {
float cp = std::cosf(Pitch); alignas(16) float angles[4] = { Pitch, Yaw, Roll, 0.0f };
float sp = std::sinf(Pitch); VectorF32<3, 1> v(angles);
std::tuple<VectorF32<3, 1>, VectorF32<3, 1>> sc = v.SinCos();
std::array<float, 4> s = std::get<0>(sc).template Store<float>();
std::array<float, 4> c = std::get<1>(sc).template Store<float>();
const float sp = s[0], cp = c[0];
const float sy = s[1], cy = c[1];
const float sr = s[2], cr = c[2];
float cy = std::cosf(Yaw); return MatrixRowMajor(
float sy = std::sinf(Yaw); cr * cy + sr * sp * sy, sr * cp, sr * sp * cy - cr * sy, 0.0f,
cr * sp * sy - sr * cy, cr * cp, sr * sy + cr * sp * cy, 0.0f,
float cr = std::cosf(Roll); cp * sy, -sp, cp * cy, 0.0f
float sr = std::sinf(Roll); );
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
M.m[0][0] = cr * cy + sr * sp * sy;
M.m[0][1] = sr * cp;
M.m[0][2] = sr * sp * cy - cr * sy;
M.m[0][3] = 0.0f;
M.m[1][0] = cr * sp * sy - sr * cy;
M.m[1][1] = cr * cp;
M.m[1][2] = sr * sy + cr * sp * cy;
M.m[1][3] = 0.0f;
M.m[2][0] = cp * sy;
M.m[2][1] = -sp;
M.m[2][2] = cp * cy;
M.m[2][3] = 0.0f;
return M;
} }
template <std::uint32_t VAligment> static MatrixRowMajor Rotation(VectorF32<3, 1> v) requires(CollumSize == 4 && Repeats == 1 && std::same_as<T, float>) {
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> LookAt(Vector<T, 3, VAligment> eyePosition, Vector<T, 3, VAligment> focusPosition, Vector<T, 3, VAligment> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { std::array<float, 4> a = v.template Store<float>();
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M; return Rotation(a[0], a[1], a[2]);
Vector<T, 3, VAligment> negEyeDirection = eyePosition - focusPosition;
return LookTo(eyePosition, negEyeDirection, upDirection);
return M;
} }
template <std::uint32_t VAligment> // View matrix: builds the basis from a forward (negated) direction and
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> LookTo(Vector<T, 3, VAligment> eyePosition, Vector<T, 3, VAligment> eyeDirection, Vector<T, 3, VAligment> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { // an up reference, then dots each basis vector with -eye for the
Vector<T, 3, 3> R2 = eyeDirection.Normalize(); // translation column. The four dots needed are produced by a single
// batched 4-pair Dot.
static MatrixRowMajor LookTo(VectorF32<3, 1> eyePosition, VectorF32<3, 1> eyeDirection, VectorF32<3, 1> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
// R0 = up × R2 is linear in R2, so its normalized direction does
// not depend on whether we hand R2 in before or after its own
// normalize. Computing R0_raw from the un-normalized R2 lets us
// satisfy the 4-input Normalize requirement with one batched call
// (duplicating R2 and R0 in the padding slots).
VectorF32<3, 1> R0Raw = VectorF32<3, 1>::Cross(upDirection, eyeDirection);
auto normalized = VectorF32<3, 1>::Normalize(eyeDirection, R0Raw, eyeDirection, R0Raw);
VectorF32<3, 1> R2 = std::get<0>(normalized);
VectorF32<3, 1> R0 = std::get<1>(normalized);
VectorF32<3, 1> R1 = VectorF32<3, 1>::Cross(R2, R0);
VectorF32<3, 1> negEye = -eyePosition;
Vector<T, 3, 3> R0 = upDirection.Cross(R2); VectorF32<1, 4> dots = VectorF32<3, 1>::Dot(
R0 = R0.Normalize(); R0, negEye, R1, negEye, R2, negEye, R0, negEye);
std::array<float, 4> d = dots.template Store<float>();
std::array<float, 4> r0a = R0.template Store<float>();
std::array<float, 4> r1a = R1.template Store<float>();
std::array<float, 4> r2a = R2.template Store<float>();
Vector<T, 3, 3> R1 = R2.Cross(R0); return MatrixRowMajor(
r0a[0], r1a[0], r2a[0], 0.0f,
Vector<T, 3, 3> NegEyePosition = -eyePosition; r0a[1], r1a[1], r2a[1], 0.0f,
r0a[2], r1a[2], r2a[2], 0.0f,
float D0 = R0.Dot(NegEyePosition); d[0], d[1], d[2], 1.0f
float D1 = R1.Dot(NegEyePosition); );
float D2 = R2.Dot(NegEyePosition);
MatrixRowMajor<T, CollumSize, RowSize, Repeats> M;
M.m[0][0] = R0.v[0];
M.m[1][0] = R0.v[1];
M.m[2][0] = R0.v[2];
M.m[3][0] = D0;
M.m[0][1] = R1.v[0];
M.m[1][1] = R1.v[1];
M.m[2][1] = R1.v[2];
M.m[3][1] = D1;
M.m[0][2] = R2.v[0];
M.m[1][2] = R2.v[1];
M.m[2][2] = R2.v[2];
M.m[3][2] = D2;
M.m[0][3] = 0;
M.m[1][3] = 0;
M.m[2][3] = 0;
M.m[3][3] = 1;
return M;
} }
template <std::uint32_t VAligment> static MatrixRowMajor LookAt(VectorF32<3, 1> eyePosition, VectorF32<3, 1> focusPosition, VectorF32<3, 1> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
static MatrixRowMajor<T, CollumSize, RowSize, Repeats> Rotation(Vector<T, 3, VAligment> vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) { return LookTo(eyePosition, eyePosition - focusPosition, upDirection);
return Rotation(vector.x, vector.y, vector.z);
} }
template <std::uint32_t VAligment>
Vector<T, 3, VAligment> TransformNormal(Vector<T, 3, VAligment> V) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
Vector<T, 3, 3> Z = Vector<T, 3, 3>(V.z, V.z, V.z);
Vector<T, 3, 3> Y = Vector<T, 3, 3>(V.y, V.y, V.y);
Vector<T, 3, 3> X = Vector<T, 3, 3>(V.x, V.x, V.x);
Vector<T, 3, VAligment> Result = Z * Vector<T, 3, VAligment>(m[2][0], m[2][1], m[2][2]);
Result = Y * Vector<T, 3, VAligment>(m[1][0], m[1][1], m[1][2]) + Result;
Result = X * Vector<T, 3, VAligment>(m[0][0], m[0][1], m[0][2]) + Result;
return Result;
}
// MatrixRowMajor<T, CollumSize, RowSize, Repeats> Inverse() requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as<T, float>) {
// Vector<float, 4> V0[4], V1[4];
// V0[0] = Vector<float, 4>(m[0][2], m[0][2], m[1][2], m[1][2]);
// V1[0] = Vector<float, 4>(m[2][3], m[3][3], m[2][3], m[3][3]);
// V0[1] = Vector<float, 4>(m[0][0], m[0][0], m[1][0], m[1][0]);
// V1[1] = Vector<float, 4>(m[2][1], m[3][1], m[2][1], m[3][1]);
// V0[2] = Vector<float, 4>(m[0][2], m[2][2], m[0][0], m[2][0]);
// V1[2] = Vector<float, 4>(m[1][3], m[3][3], m[1][1], m[3][1]);
// Vector<float, 4> D0 = V0[0] * V1[0];
// Vector<float, 4> D1 = V0[1] * V1[1];
// Vector<float, 4> D2 = V0[2] * V1[2];
// V0[0] = Vector<float, 4>(m[2][2], m[3][2], m[2][2], m[3][2]);
// V1[0] = Vector<float, 4>(m[0][3], m[0][3], m[1][3], m[1][3]);
// V0[1] = Vector<float, 4>(m[2][0], m[3][0], m[2][0], m[3][0]);
// V1[1] = Vector<float, 4>(m[0][1], m[0][1], m[1][1], m[1][1]);
// V0[2] = Vector<float, 4>(m[1][2], m[3][2], m[1][0], m[3][0]);
// V1[2] = Vector<float, 4>(m[0][3], m[2][3], m[0][1], m[2][1]);
// D0 = Vector<float, 4>::NegativeMultiplySubtract(V0[0], V1[0], D0);
// D1 = Vector<float, 4>::NegativeMultiplySubtract(V0[1], V1[1], D1);
// D2 = Vector<float, 4>::NegativeMultiplySubtract(V0[2], V1[2], D2);
// V0[0] = Vector<float, 4>(m[1][1], m[2][1], m[0][1], m[1][1]);
// V1[0] = Vector<float, 4>(D2.v[1], D0.v[1], D0.v[3], D0.v[0]);
// V0[1] = Vector<float, 4>(m[2][0], m[0][0], m[1][0], m[0][0]);
// V1[1] = Vector<float, 4>(D0.v[3], D2.v[1], D0.v[1], D0.v[2]);
// V0[2] = Vector<float, 4>(m[1][3], m[2][3], m[0][3], m[1][3]);
// V1[2] = Vector<float, 4>(D2.v[3], D1.v[1], D1.v[3], D1.v[0]);
// V0[3] = Vector<float, 4>(m[2][2], m[0][2], m[1][2], m[0][2]);
// V1[3] = Vector<float, 4>(D1.v[3], D2.v[3], D1.v[1], D1.v[2]);
// Vector<float, 4> C0 = V0[0] * V1[0];
// Vector<float, 4> C2 = V0[1] * V1[1];
// Vector<float, 4> C4 = V0[2] * V1[2];
// Vector<float, 4> C6 = V0[3] * V1[3];
// V0[0] = Vector<float, 4>(m[2][1], m[3][1], m[1][1], m[2][1]);
// V1[0] = Vector<float, 4>(D0.v[3], D0.v[0], D0.v[1], D2.v[0]);
// V0[1] = Vector<float, 4>(m[3][0], m[2][0], m[3][0], m[1][0]);
// V1[1] = Vector<float, 4>(D0.v[2], D0.v[1], D2.v[0], D0.v[0]);
// V0[2] = Vector<float, 4>(m[2][3], m[3][3], m[1][3], m[2][3]);
// V1[2] = Vector<float, 4>(D1.v[3], D1.v[0], D1.v[1], D2.v[2]);
// V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
// V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
// C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
// C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
// C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
// C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
// V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
// V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
// V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
// V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
// V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
// V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
// V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
// V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
// XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
// C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
// XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
// C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
// XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
// C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
// XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
// C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
// XMMATRIX R;
// R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
// R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
// R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
// R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
// XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
// XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
// XMMATRIX Result;
// Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
// Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
// Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
// Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
// return Result;
// }
}; };
} }
// Pretty printer using Store() so it does not depend on the legacy m[i][j]
// access pattern.
template <> template <>
struct std::formatter<Crafter::MatrixRowMajor<float, 4, 4, 1>> : std::formatter<std::string> { struct std::formatter<Crafter::MatrixRowMajor<float, 4, 4, 1>> : std::formatter<std::string> {
auto format(const Crafter::MatrixRowMajor<float, 4, 4, 1>& obj, format_context& ctx) const { auto format(const Crafter::MatrixRowMajor<float, 4, 4, 1>& obj, format_context& ctx) const {
return std::formatter<std::string>::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", std::array<float, 16> v = obj.Store();
obj.m[0][0], obj.m[0][1], obj.m[0][2], obj.m[0][3], return std::formatter<std::string>::format(std::format(
obj.m[1][0], obj.m[1][1], obj.m[1][2], obj.m[1][3], "{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
obj.m[2][0], obj.m[2][1], obj.m[2][2], obj.m[2][3], v[0], v[1], v[2], v[3],
obj.m[3][0], obj.m[3][1], obj.m[3][2], obj.m[3][3] v[4], v[5], v[6], v[7],
v[8], v[9], v[10], v[11],
v[12], v[13], v[14], v[15]
), ctx); ), ctx);
} }
}; };
@ -424,10 +319,12 @@ struct std::formatter<Crafter::MatrixRowMajor<float, 4, 4, 1>> : std::formatter<
template <> template <>
struct std::formatter<Crafter::MatrixRowMajor<float, 4, 3, 1>> : std::formatter<std::string> { struct std::formatter<Crafter::MatrixRowMajor<float, 4, 3, 1>> : std::formatter<std::string> {
auto format(const Crafter::MatrixRowMajor<float, 4, 3, 1>& obj, format_context& ctx) const { auto format(const Crafter::MatrixRowMajor<float, 4, 3, 1>& obj, format_context& ctx) const {
return std::formatter<std::string>::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", std::array<float, 12> v = obj.Store();
obj.m[0][0], obj.m[0][1], obj.m[0][2], obj.m[0][3], return std::formatter<std::string>::format(std::format(
obj.m[1][0], obj.m[1][1], obj.m[1][2], obj.m[1][3], "{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
obj.m[2][0], obj.m[2][1], obj.m[2][2], obj.m[2][3] v[0], v[1], v[2], v[3],
v[4], v[5], v[6], v[7],
v[8], v[9], v[10], v[11]
), ctx); ), ctx);
} }
}; };

View file

@ -28,10 +28,10 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
cfg.GetInterfacesAndImplementations(ifaces, impls); cfg.GetInterfacesAndImplementations(ifaces, impls);
} }
auto addVectorTest = [&](std::string march, std::string mtune) { auto addTest = [&](std::string_view testName, std::string march, std::string mtune) {
Test t; Test t;
t.config.path = "./"; t.config.path = "./";
t.config.name = std::format("Vector-{}", march); t.config.name = std::format("{}-{}", testName, march);
t.config.outputName = t.config.name; t.config.outputName = t.config.name;
t.config.target = cfg.target; t.config.target = cfg.target;
t.config.type = ConfigurationType::Executable; t.config.type = ConfigurationType::Executable;
@ -40,13 +40,15 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
t.config.debug = cfg.debug; t.config.debug = cfg.debug;
std::array<fs::path, 8> ifaces; std::array<fs::path, 8> ifaces;
std::ranges::copy(mathInterfaces, ifaces.begin()); std::ranges::copy(mathInterfaces, ifaces.begin());
std::array<fs::path, 1> impls = { "tests/Vector" }; std::array<fs::path, 1> impls = { fs::path{std::format("tests/{}", testName)} };
t.config.GetInterfacesAndImplementations(ifaces, impls); t.config.GetInterfacesAndImplementations(ifaces, impls);
cfg.tests.push_back(std::move(t)); cfg.tests.push_back(std::move(t));
}; };
addVectorTest("sapphirerapids", "native"); for (std::string_view name : { "Vector", "Intersection", "Matrix" }) {
addVectorTest("x86-64-v4", "generic"); addTest(name, "sapphirerapids", "native");
addVectorTest("x86-64-v3", "generic"); addTest(name, "x86-64-v4", "generic");
addTest(name, "x86-64-v3", "generic");
}
return cfg; return cfg;
} }

288
tests/Intersection.cpp Normal file
View file

@ -0,0 +1,288 @@
/*
Crafter® Build
Copyright (C) 2026 Catcrafts®
Catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <cmath>
import Crafter.Math;
import std;
using namespace Crafter;
namespace {
constexpr float kEps = 1e-3f;
constexpr float kMaxF = std::numeric_limits<float>::max();
constexpr bool FloatEquals(float a, float b, float epsilon = kEps) {
return std::abs(a - b) < epsilon;
}
VectorF32<3, 1> Vec3(float x, float y, float z) {
alignas(16) float buf[4] = { x, y, z, 0.0f };
return VectorF32<3, 1>(buf);
}
VectorF32<4, 1> Vec4(float x, float y, float z, float w) {
alignas(16) float buf[4] = { x, y, z, w };
return VectorF32<4, 1>(buf);
}
VectorF32<1, 4> Vec1x4(float a, float b, float c, float d) {
alignas(16) float buf[4] = { a, b, c, d };
return VectorF32<1, 4>(buf);
}
// Möller-Trumbore in this codebase rejects det <= eps, so triangles must be
// wound so their geometric normal opposes the ray direction. For rays going +Z
// that means clockwise from a +Z viewer.
std::string* TestRayTriangle() {
VectorF32<3, 1> rayOrigin = Vec3(0, 0, -5);
VectorF32<3, 1> rayDir = Vec3(0, 0, 1);
// A: hits at z=0, t=5 (front-facing).
VectorF32<3, 1> a0 = Vec3(-1, -1, 0), a1 = Vec3(0, 1, 0), a2 = Vec3(1, -1, 0);
// B: hits at z=10, t=15.
VectorF32<3, 1> b0 = Vec3(-1, -1, 10), b1 = Vec3(0, 1, 10), b2 = Vec3(1, -1, 10);
// C: front-facing triangle far off to the side - u or v out of [0,1].
VectorF32<3, 1> c0 = Vec3(99, -1, 0), c1 = Vec3(100, 1, 0), c2 = Vec3(101, -1, 0);
// D: triangle parallel to the ray (all vertices share y=2; ray lives in y=0).
VectorF32<3, 1> d0 = Vec3(-1, 2, -1), d1 = Vec3(1, 2, 1), d2 = Vec3(0, 2, 2);
VectorF32<1, 4> t = IntersectionTestRayTriangle(rayOrigin, rayDir,
a0, a1, a2,
b0, b1, b2,
c0, c1, c2,
d0, d1, d2);
std::array<float, 4> s = t.template Store<float>();
if (!FloatEquals(s[0], 5.0f))
return new std::string(std::format("RayTriangle A: expected 5, got {}", s[0]));
if (!FloatEquals(s[1], 15.0f))
return new std::string(std::format("RayTriangle B: expected 15, got {}", s[1]));
if (s[2] != kMaxF)
return new std::string(std::format("RayTriangle C: expected max (miss), got {}", s[2]));
if (s[3] != kMaxF)
return new std::string(std::format("RayTriangle D: expected max (parallel miss), got {}", s[3]));
return nullptr;
}
std::string* TestRayTriangleBackFacing() {
// Same A vertices but CCW from +Z viewer -> back-facing for +Z ray -> miss.
VectorF32<3, 1> rayOrigin = Vec3(0, 0, -5);
VectorF32<3, 1> rayDir = Vec3(0, 0, 1);
VectorF32<3, 1> v0 = Vec3(-1, -1, 0), v1 = Vec3(1, -1, 0), v2 = Vec3(0, 1, 0);
VectorF32<1, 4> t = IntersectionTestRayTriangle(rayOrigin, rayDir,
v0, v1, v2,
v0, v1, v2,
v0, v1, v2,
v0, v1, v2);
std::array<float, 4> s = t.template Store<float>();
for (std::uint8_t i = 0; i < 4; ++i) {
if (s[i] != kMaxF)
return new std::string(std::format("RayTriangle back-facing lane {}: expected max, got {}", i, s[i]));
}
return nullptr;
}
std::string* TestRaySphere() {
VectorF32<3, 1> rayOrigin = Vec3(0, 0, -10);
VectorF32<3, 1> rayDir = Vec3(0, 0, 1);
// A: sphere at origin radius 2 - first hit at z=-2, t=8.
VectorF32<3, 1> posA = Vec3(0, 0, 0);
// B: sphere at (0,0,20) radius 1 - first hit at z=19, t=29.
VectorF32<3, 1> posB = Vec3(0, 0, 20);
// C: sphere off to the side, ray misses.
VectorF32<3, 1> posC = Vec3(10, 10, 0);
// D: sphere behind the ray origin.
VectorF32<3, 1> posD = Vec3(0, 0, -50);
VectorF32<1, 4> radii = Vec1x4(2.0f, 1.0f, 0.5f, 1.0f);
VectorF32<1, 4> t = IntersectionTestRaySphere(rayOrigin, rayDir,
posA, posB, posC, posD, radii);
std::array<float, 4> s = t.template Store<float>();
if (!FloatEquals(s[0], 8.0f))
return new std::string(std::format("RaySphere A: expected 8, got {}", s[0]));
if (!FloatEquals(s[1], 29.0f))
return new std::string(std::format("RaySphere B: expected 29, got {}", s[1]));
if (s[2] != kMaxF)
return new std::string(std::format("RaySphere C: expected max (miss), got {}", s[2]));
if (s[3] != kMaxF)
return new std::string(std::format("RaySphere D: expected max (behind), got {}", s[3]));
return nullptr;
}
std::string* TestRayOrientedBox() {
VectorF32<3, 1> rayOrigin = Vec3(0, 0, -5);
VectorF32<3, 1> rayDir = Vec3(0, 0, 1);
// Identity quaternion (axis-aligned).
VectorF32<4, 1> idQ = Vec4(0, 0, 0, 1);
// Note: RayOrientedBox treats `size` as the *full* extent (it computes
// halfExtents = size * 0.5 internally). So size=2 means the box spans
// [-1, 1] in each axis. (SphereOrientedBox uses the opposite convention.)
//
// A: box at origin size 2 (half 1) -> ray enters at z=-1, t=4.
VectorF32<3, 1> posA = Vec3(0, 0, 0), sizeA = Vec3(2, 2, 2);
// B: box at (0,0,10) size 2 (half 1) -> ray enters at z=9, t=14.
VectorF32<3, 1> posB = Vec3(0, 0, 10), sizeB = Vec3(2, 2, 2);
// C: box off to the side - miss.
VectorF32<3, 1> posC = Vec3(50, 0, 0), sizeC = Vec3(2, 2, 2);
// D: box behind ray - miss.
VectorF32<3, 1> posD = Vec3(0, 0, -50), sizeD = Vec3(2, 2, 2);
VectorF32<1, 4> t = IntersectionTestRayOrientedBox(rayOrigin, rayDir,
posA, sizeA, idQ,
posB, sizeB, idQ,
posC, sizeC, idQ,
posD, sizeD, idQ);
std::array<float, 4> s = t.template Store<float>();
if (!FloatEquals(s[0], 4.0f))
return new std::string(std::format("RayOrientedBox A: expected 4, got {}", s[0]));
if (!FloatEquals(s[1], 14.0f))
return new std::string(std::format("RayOrientedBox B: expected 14, got {}", s[1]));
if (s[2] != kMaxF)
return new std::string(std::format("RayOrientedBox C: expected max (miss), got {}", s[2]));
if (s[3] != kMaxF)
return new std::string(std::format("RayOrientedBox D: expected max (behind), got {}", s[3]));
return nullptr;
}
MatrixRowMajor<float, 4, 3, 1> MakeBoxMatrix(float tx, float ty, float tz) {
// Box matrix the OBB intersection code expects: rows[i][0..2] is the i-th
// axis (the existing semantics treat matrix rows as the OBB axes), and
// rows[i][3] is the translation component along that axis.
return MatrixRowMajor<float, 4, 3, 1>(
1, 0, 0, tx,
0, 1, 0, ty,
0, 0, 1, tz
);
}
std::string* TestSphereOrientedBox() {
// `size` is half-extents (the intersection code clamps to ±size).
VectorF32<3, 1> sphereCenter = Vec3(0, 0, 0);
VectorF32<1, 4> radii = Vec1x4(1.0f, 0.5f, 0.5f, 1.0f);
// A: box at origin half-extent 2 -> sphere center inside -> hit.
VectorF32<3, 1> sizeA = Vec3(2, 2, 2);
auto boxA = MakeBoxMatrix(0, 0, 0);
// B: box at (5,0,0) half-extent 1 -> box spans x in [4,6], sphere in [-0.5,0.5] -> miss.
VectorF32<3, 1> sizeB = Vec3(1, 1, 1);
auto boxB = MakeBoxMatrix(5, 0, 0);
// C: box at (3,0,0) half-extent 1 -> box spans [2,4], sphere [-0.5,0.5] -> miss.
VectorF32<3, 1> sizeC = Vec3(1, 1, 1);
auto boxC = MakeBoxMatrix(3, 0, 0);
// D: box at origin half-extent 0.5 -> sphere center inside the box -> hit.
VectorF32<3, 1> sizeD = Vec3(0.5f, 0.5f, 0.5f);
auto boxD = MakeBoxMatrix(0, 0, 0);
VectorF32<1, 4> r = IntersectionTestSphereOrientedBox(sphereCenter, radii,
sizeA, boxA,
sizeB, boxB,
sizeC, boxC,
sizeD, boxD);
std::array<float, 4> s = r.template Store<float>();
if (s[0] != 0.0f)
return new std::string(std::format("SphereOrientedBox A: expected hit (0), got {}", s[0]));
if (s[1] != kMaxF)
return new std::string(std::format("SphereOrientedBox B: expected max (miss), got {}", s[1]));
if (s[2] != kMaxF)
return new std::string(std::format("SphereOrientedBox C: expected max (miss), got {}", s[2]));
if (s[3] != 0.0f)
return new std::string(std::format("SphereOrientedBox D: expected hit (0), got {}", s[3]));
return nullptr;
}
std::string* TestGetOBBCorners() {
// Identity matrix - the 8 corners are exactly ±size on each axis.
VectorF32<3, 1> size = Vec3(2, 3, 4);
auto m = MakeBoxMatrix(0, 0, 0);
std::array<VectorF32<3, 1>, 8> corners = GetOBBCorners(size, m);
constexpr std::array<std::array<float, 3>, 8> expected = {{
{-2, -3, -4}, { 2, -3, -4}, {-2, 3, -4}, { 2, 3, -4},
{-2, -3, 4}, { 2, -3, 4}, {-2, 3, 4}, { 2, 3, 4},
}};
for (std::uint8_t i = 0; i < 8; ++i) {
std::array<float, 4> v = corners[i].template Store<float>();
for (std::uint8_t j = 0; j < 3; ++j) {
if (!FloatEquals(v[j], expected[i][j]))
return new std::string(std::format(
"GetOBBCorners corner {} lane {}: expected {}, got {}",
i, j, expected[i][j], v[j]));
}
}
// Translated matrix - corners shift by the translation column.
auto m2 = MakeBoxMatrix(10, 20, 30);
std::array<VectorF32<3, 1>, 8> corners2 = GetOBBCorners(size, m2);
for (std::uint8_t i = 0; i < 8; ++i) {
std::array<float, 4> v = corners2[i].template Store<float>();
std::array<float, 3> exp = {
expected[i][0] + 10.0f,
expected[i][1] + 20.0f,
expected[i][2] + 30.0f
};
for (std::uint8_t j = 0; j < 3; ++j) {
if (!FloatEquals(v[j], exp[j]))
return new std::string(std::format(
"GetOBBCorners translated corner {} lane {}: expected {}, got {}",
i, j, exp[j], v[j]));
}
}
return nullptr;
}
std::string* TestOBBOBBOverlapping() {
VectorF32<3, 1> size = Vec3(1, 1, 1);
auto boxA = MakeBoxMatrix(0, 0, 0);
auto boxB = MakeBoxMatrix(1, 0, 0); // overlap on x in [-1, 1] (B) and [-1, 1] (A) -> overlap
if (!IntersectionTestOrientedBoxOrientedBox(size, boxA, size, boxB))
return new std::string("OBB-OBB overlapping: expected true");
auto boxFar = MakeBoxMatrix(10, 0, 0);
if (IntersectionTestOrientedBoxOrientedBox(size, boxA, size, boxFar))
return new std::string("OBB-OBB far apart: expected false");
return nullptr;
}
} // namespace
int main() {
using Fn = std::string* (*)();
constexpr std::array<std::pair<const char*, Fn>, 7> tests = {{
{ "RayTriangle", TestRayTriangle },
{ "RayTriangleBackFacing", TestRayTriangleBackFacing },
{ "RaySphere", TestRaySphere },
{ "RayOrientedBox", TestRayOrientedBox },
{ "SphereOrientedBox", TestSphereOrientedBox },
{ "GetOBBCorners", TestGetOBBCorners },
{ "OBBOBB", TestOBBOBBOverlapping },
}};
for (auto const& [name, fn] : tests) {
if (auto err = std::unique_ptr<std::string>(fn())) {
std::println(std::cerr, "[{}] {}", name, *err);
return 1;
}
}
return 0;
}

343
tests/Matrix.cpp Normal file
View file

@ -0,0 +1,343 @@
/*
Crafter® Build
Copyright (C) 2026 Catcrafts®
Catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <cmath>
import Crafter.Math;
import std;
using namespace Crafter;
namespace {
constexpr float kEps = 1e-4f;
constexpr bool FloatEquals(float a, float b, float epsilon = kEps) {
return std::abs(a - b) < epsilon;
}
VectorF32<3, 1> Vec3(float x, float y, float z) {
alignas(16) float buf[4] = { x, y, z, 0.0f };
return VectorF32<3, 1>(buf);
}
std::string* CheckVec3(VectorF32<3, 1> got, float x, float y, float z, std::string_view label) {
std::array<float, 4> v = got.template Store<float>();
if (!FloatEquals(v[0], x) || !FloatEquals(v[1], y) || !FloatEquals(v[2], z))
return new std::string(std::format(
"{}: expected ({},{},{}), got ({},{},{})",
label, x, y, z, v[0], v[1], v[2]));
return nullptr;
}
std::string* CheckMat43(MatrixRowMajor<float, 4, 3, 1> const& m, std::array<float, 12> expected, std::string_view label) {
std::array<float, 12> got = m.Store();
for (std::uint8_t i = 0; i < 12; ++i) {
if (!FloatEquals(got[i], expected[i]))
return new std::string(std::format(
"{} element {}: expected {}, got {}", label, i, expected[i], got[i]));
}
return nullptr;
}
std::string* CheckMat44(MatrixRowMajor<float, 4, 4, 1> const& m, std::array<float, 16> expected, std::string_view label) {
std::array<float, 16> got = m.Store();
for (std::uint8_t i = 0; i < 16; ++i) {
if (!FloatEquals(got[i], expected[i]))
return new std::string(std::format(
"{} element {}: expected {}, got {}", label, i, expected[i], got[i]));
}
return nullptr;
}
// ---------- Identity / Store roundtrip ----------
std::string* TestIdentity43() {
auto m = MatrixRowMajor<float, 4, 3, 1>::Identity();
return CheckMat43(m, {
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
}, "Identity 4x3");
}
std::string* TestIdentity44() {
auto m = MatrixRowMajor<float, 4, 4, 1>::Identity();
return CheckMat44(m, {
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1,
}, "Identity 4x4");
}
std::string* TestStoreRoundtrip() {
MatrixRowMajor<float, 4, 4, 1> m(
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16
);
return CheckMat44(m, {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16,
}, "Store roundtrip 4x4");
}
// ---------- Factory matrices ----------
std::string* TestTranslation() {
auto m = MatrixRowMajor<float, 4, 3, 1>::Translation(10, 20, 30);
if (auto e = CheckMat43(m, {
1, 0, 0, 10,
0, 1, 0, 20,
0, 0, 1, 30,
}, "Translation 4x3")) return e;
auto m4 = MatrixRowMajor<float, 4, 4, 1>::Translation(10, 20, 30);
if (auto e = CheckMat44(m4, {
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
10, 20, 30, 1,
}, "Translation 4x4")) return e;
return nullptr;
}
std::string* TestScaling() {
auto m = MatrixRowMajor<float, 4, 3, 1>::Scaling(2, 3, 4);
if (auto e = CheckMat43(m, {
2, 0, 0, 0,
0, 3, 0, 0,
0, 0, 4, 0,
}, "Scaling 4x3")) return e;
auto m4 = MatrixRowMajor<float, 4, 4, 1>::Scaling(2, 3, 4);
return CheckMat44(m4, {
2, 0, 0, 0,
0, 3, 0, 0,
0, 0, 4, 0,
0, 0, 0, 1,
}, "Scaling 4x4");
}
// ---------- Matrix * Vector ----------
std::string* TestMatVecIdentity() {
auto m = MatrixRowMajor<float, 4, 3, 1>::Identity();
VectorF32<3, 1> v = Vec3(7, -2, 3);
return CheckVec3(m * v, 7, -2, 3, "I * v");
}
std::string* TestMatVecTranslation() {
auto m = MatrixRowMajor<float, 4, 3, 1>::Translation(1, 2, 3);
// Affine multiply with implicit w=1 -> result = v + (1,2,3).
VectorF32<3, 1> v = Vec3(10, 20, 30);
return CheckVec3(m * v, 11, 22, 33, "Translation * v");
}
std::string* TestMatVecScaling() {
auto m = MatrixRowMajor<float, 4, 3, 1>::Scaling(2, 3, 4);
VectorF32<3, 1> v = Vec3(1, 1, 1);
return CheckVec3(m * v, 2, 3, 4, "Scaling * v");
}
std::string* TestMatVecCombined() {
// Scale then translate: full matrix is [diag(s) | t].
MatrixRowMajor<float, 4, 3, 1> m(
2, 0, 0, 10,
0, 3, 0, 20,
0, 0, 4, 30
);
VectorF32<3, 1> v = Vec3(1, 1, 1);
return CheckVec3(m * v, 12, 23, 34, "Scale+Translate * v");
}
std::string* TestTransformNormal() {
// TransformNormal ignores the translation column.
MatrixRowMajor<float, 4, 3, 1> m(
2, 0, 0, 100,
0, 3, 0, 200,
0, 0, 4, 300
);
VectorF32<3, 1> v = Vec3(1, 1, 1);
return CheckVec3(m.TransformNormal(v), 2, 3, 4, "TransformNormal");
}
// ---------- Matrix * Matrix ----------
std::string* TestMatMulIdentityLeft44() {
MatrixRowMajor<float, 4, 4, 1> a = MatrixRowMajor<float, 4, 4, 1>::Identity();
MatrixRowMajor<float, 4, 4, 1> b(
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16
);
auto r = a * b;
return CheckMat44(r, {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16,
}, "I * M");
}
std::string* TestMatMulIdentityRight44() {
MatrixRowMajor<float, 4, 4, 1> a(
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16
);
MatrixRowMajor<float, 4, 4, 1> b = MatrixRowMajor<float, 4, 4, 1>::Identity();
auto r = a * b;
return CheckMat44(r, {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16,
}, "M * I");
}
std::string* TestMatMul44() {
// Hand-computed: standard row-major (B*A)[i][j] = sum_k B[i][k]*A[k][j].
MatrixRowMajor<float, 4, 4, 1> a(
1, 2, 0, 0,
0, 1, 3, 0,
4, 0, 1, 0,
0, 0, 0, 1
);
MatrixRowMajor<float, 4, 4, 1> b(
1, 0, 2, 0,
0, 1, 0, 3,
0, 0, 1, 0,
0, 0, 0, 1
);
auto r = a * b;
// result.m[i][j] = sum_k b.m[i][k] * a.m[k][j]
// row 0: [1*1+0*0+2*4+0*0, 1*2+0*1+2*0+0*0, 1*0+0*3+2*1+0*0, 1*0+0*0+2*0+0*1]
// = [9, 2, 2, 0]
// row 1: [0*1+1*0+0*4+3*0, 0*2+1*1+0*0+3*0, 0*0+1*3+0*1+3*0, 0*0+1*0+0*0+3*1]
// = [0, 1, 3, 3]
// row 2: [0*1+0*0+1*4+0*0, 0*2+0*1+1*0+0*0, 0*0+0*3+1*1+0*0, 0*0+0*0+1*0+0*1]
// = [4, 0, 1, 0]
// row 3: [0*1+0*0+0*4+1*0, 0*2+0*1+0*0+1*0, 0*0+0*3+0*1+1*0, 0*0+0*0+0*0+1*1]
// = [0, 0, 0, 1]
return CheckMat44(r, {
9, 2, 2, 0,
0, 1, 3, 3,
4, 0, 1, 0,
0, 0, 0, 1,
}, "Mat * Mat 4x4");
}
std::string* TestMatMulTranslationCompose() {
// Translation composition: T(a) * T(b) acting on a vector translates by a+b
// (and the resulting matrix has the summed translation in its last column).
auto t1 = MatrixRowMajor<float, 4, 3, 1>::Translation(1, 2, 3);
auto t2 = MatrixRowMajor<float, 4, 3, 1>::Translation(10, 20, 30);
auto r = t1 * t2;
if (auto e = CheckMat43(r, {
1, 0, 0, 11,
0, 1, 0, 22,
0, 0, 1, 33,
}, "T(1,2,3) * T(10,20,30)")) return e;
// Sanity check: applying the composed matrix to origin yields (11,22,33).
return CheckVec3(r * Vec3(0, 0, 0), 11, 22, 33, "Composed translation * origin");
}
std::string* TestMatMulScaleThenTranslate() {
// The matrix product follows the engine's row-vector / post-multiply
// convention: a.operator*(b) yields the matrix b composed with a so that
// applying the result is equivalent to "first apply this, then apply b"
// when read left-to-right with a row vector (v * (a*b) = (v*a) * b).
//
// For column-vector intuition that means s.operator*(t) builds T·S, i.e.
// "scale, then translate"; applying it to (1,1,1) yields s*v + t.
auto s = MatrixRowMajor<float, 4, 3, 1>::Scaling(2, 3, 4);
auto t = MatrixRowMajor<float, 4, 3, 1>::Translation(10, 20, 30);
auto r = s * t;
return CheckVec3(r * Vec3(1, 1, 1), 12, 23, 34, "scale-then-translate * (1,1,1)");
}
// ---------- LookTo / LookAt ----------
std::string* TestLookToBasis() {
// Camera at +Z looking back toward origin. eyeDirection should point from
// focus to eye, i.e. +Z, so the resulting basis has R2 = +Z.
VectorF32<3, 1> eye = Vec3(0, 0, 5);
VectorF32<3, 1> dir = Vec3(0, 0, 1); // already from origin toward eye
VectorF32<3, 1> up = Vec3(0, 1, 0);
auto m = MatrixRowMajor<float, 4, 4, 1>::LookTo(eye, dir, up);
// R2 normalized = (0,0,1). R0 = up x R2 = (0,1,0) x (0,0,1) = (1,0,0).
// R1 = R2 x R0 = (0,0,1) x (1,0,0) = (0,1,0).
// The view matrix stores basis columns in the rotation block, with the
// translation column = -basis dot eye. So m.row[3] = (D0, D1, D2, 1).
// D0 = R0 . -eye = -(0*0+0*0+5*0) = 0
// D1 = R1 . -eye = -(0*0+1*0+0*5) = 0
// D2 = R2 . -eye = -(0*0+0*0+1*5) = -5
return CheckMat44(m, {
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, -5, 1,
}, "LookTo basis");
}
// ---------- Test harness ----------
} // namespace
int main() {
using Fn = std::string* (*)();
constexpr std::array<std::pair<const char*, Fn>, 15> tests = {{
{ "Identity43", TestIdentity43 },
{ "Identity44", TestIdentity44 },
{ "StoreRoundtrip", TestStoreRoundtrip },
{ "Translation", TestTranslation },
{ "Scaling", TestScaling },
{ "MatVecIdentity", TestMatVecIdentity },
{ "MatVecTranslation", TestMatVecTranslation },
{ "MatVecScaling", TestMatVecScaling },
{ "MatVecCombined", TestMatVecCombined },
{ "TransformNormal", TestTransformNormal },
{ "MatMulIdentityLeft", TestMatMulIdentityLeft44 },
{ "MatMulIdentityRight", TestMatMulIdentityRight44 },
{ "MatMul44", TestMatMul44 },
{ "MatMulTranslation", TestMatMulTranslationCompose },
{ "MatMulScaleTranslate",TestMatMulScaleThenTranslate },
}};
for (auto const& [name, fn] : tests) {
if (auto err = std::unique_ptr<std::string>(fn())) {
std::println(std::cerr, "[{}] {}", name, *err);
return 1;
}
}
// LookTo is tested separately - its expected result depends on the
// internal basis convention and a single failure here is informative
// enough to keep in the matrix suite.
if (auto err = std::unique_ptr<std::string>(TestLookToBasis())) {
std::println(std::cerr, "[LookTo] {}", *err);
return 1;
}
return 0;
}