diff --git a/README.md b/README.md new file mode 100644 index 0000000..b8dbf5a --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# Crafter.Math + +A C++23 math library for the Crafter engine, distributed as a set of C++ modules. Provides generic vector and matrix types alongside SIMD-specialized fixed-size vectors for `float` and `_Float16`, plus a small set of ray intersection routines. + +The library is hardware-aware: it picks the widest SIMD path the target supports (SSE / AVX / AVX-512 on x86_64, SIMD128 on WebAssembly) and falls back to scalar code elsewhere. + +## Modules + +`import Crafter.Math;` re-exports the following partitions: + +| Partition | Contents | +| --- | --- | +| `:Basic` | `ToRadian`, plus the `VectorF16 → VectorF32` alias on x86_64 without AVX512-FP16. | +| `:Vector` | Generic `Vector` with `.x/.y/.z/.w` and `.r/.g/.b/.a` accessors, arithmetic, comparison, and conversion operators. | +| `:MatrixRowMajor` | `MatrixRowMajor` — row-major matrices with packed-batch support via `Repeats`. | +| `:VectorF32` | `VectorF32` — SIMD-specialized 32-bit float vector with `Load`/`Store`, FMA, `Cross`, `Dot`, `Length`, `Normalize`, `Sin`/`Cos`, `Shuffle`, etc. | +| `:VectorF16` | `VectorF16` — same surface as `VectorF32` for `_Float16` when the target supports it. | +| `:Intersection` | `IntersectionTestRayTriangle`, `IntersectionTestRaySphere`, `IntersectionTestRayOrientedBox`. | +| `:Common` | Internal SIMD type selection (`__m128/256/512`, `__m128h/256h/512h`, `v128_t`, or scalar array) used by the F16/F32 vectors. | + +`Len` is the logical vector width (1–4 for most graphics use) and `Packing` is the number of vectors packed into one SIMD register — e.g. `VectorF32<3, 4>` holds four 3-vectors in a single 512-bit register, which is what the batched `Dot`, `Length`, and `Normalize` overloads operate on. + +## Example + +```cpp +import Crafter.Math; +using namespace Crafter; + +// Generic 3-vector of floats. +Vector a(1.0f, 2.0f, 3.0f); +Vector b(4.0f, 5.0f, 6.0f); +auto c = a + b; + +// SIMD-specialized 4-vector with one element of padding. +VectorF32<4, 1> p(1.0f); +VectorF32<4, 1> q(2.0f); +auto r = VectorF32<3, 1>::Cross(p, q); + +// Ray vs. triangle. +float t = IntersectionTestRayTriangle( + {0,0,0}, {1,0,0}, {0,1,0}, + {0.25f, 0.25f, -1.0f}, {0, 0, 1}); +``` + +## Building + +This project uses [Crafter.Build](https://forgejo.catcrafts.net/Catcrafts/Crafter.Build). The build script in [project.cpp](project.cpp) declares the library as a static C++ module library and registers three test executables targeting different ISAs: + +- `Vector-sapphirerapids` — AVX-512 with FP16 (`-march=sapphirerapids`) +- `Vector-x86-64-v4` — AVX-512 baseline +- `Vector-x86-64-v3` — AVX2 baseline + +Build and run tests via your usual Crafter.Build entry point; `build/` and `bin/` are git-ignored. + +## Target support + +- **x86_64**: SSE / AVX / AVX-512F selected per target; AVX512-FP16 is required for native `VectorF16` (otherwise it aliases `VectorF32`). F16C is used for fp16 ↔ fp32 conversion when available. +- **WebAssembly**: `wasm_simd128.h` 128-bit path; SIMD width is capped at 16 bytes. +- **Other targets**: scalar fallback via `std::array`. + +## License + +LGPL-3.0-only. See [LICENSE](LICENSE). + +Copyright © 2026 Catcrafts — [catcrafts.net](https://catcrafts.net) diff --git a/interfaces/Crafter.Math-Intersection.cppm b/interfaces/Crafter.Math-Intersection.cppm index 370cb9f..3c167ed 100755 --- a/interfaces/Crafter.Math-Intersection.cppm +++ b/interfaces/Crafter.Math-Intersection.cppm @@ -18,207 +18,432 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ export module Crafter.Math:Intersection; -import :Vector; +import :VectorF32; import :MatrixRowMajor; import std; namespace Crafter { - export template - constexpr T IntersectionTestRayTriangle(Vector vert0, Vector vert1, Vector vert2, Vector rayOrigin, Vector rayDir) { - Vector edge1 = vert1 - vert0; - Vector edge2 = vert2 - vert0; + // All intersection tests are batched over four primitives at a time so they + // feed the VectorF32<3,1>::Dot / Cross / Length / Normalize four-pair + // overloads directly. The single-primitive case is just "pass the same + // primitive four times and read lane 0" - there is no single-vector + // fast-path because the SIMD pipelines want full lanes. - Vector h = Vector::Cross(rayDir, edge2); - T determinant = Vector::Dot(edge1, h); + // Möller-Trumbore against four triangles sharing one ray. Returns ray + // parameter t per triangle, or float max where the ray misses. + export inline VectorF32<1, 4> IntersectionTestRayTriangle( + VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir, + VectorF32<3, 1> aV0, VectorF32<3, 1> aV1, VectorF32<3, 1> aV2, + VectorF32<3, 1> bV0, VectorF32<3, 1> bV1, VectorF32<3, 1> bV2, + VectorF32<3, 1> cV0, VectorF32<3, 1> cV1, VectorF32<3, 1> cV2, + VectorF32<3, 1> dV0, VectorF32<3, 1> dV1, VectorF32<3, 1> dV2 + ) { + VectorF32<3, 1> aE1 = aV1 - aV0; + VectorF32<3, 1> aE2 = aV2 - aV0; + VectorF32<3, 1> bE1 = bV1 - bV0; + VectorF32<3, 1> bE2 = bV2 - bV0; + VectorF32<3, 1> cE1 = cV1 - cV0; + VectorF32<3, 1> cE2 = cV2 - cV0; + VectorF32<3, 1> dE1 = dV1 - dV0; + VectorF32<3, 1> dE2 = dV2 - dV0; - if (determinant <= std::numeric_limits::epsilon()) { - return std::numeric_limits::max(); + VectorF32<3, 1> aH = VectorF32<3, 1>::Cross(rayDir, aE2); + VectorF32<3, 1> bH = VectorF32<3, 1>::Cross(rayDir, bE2); + VectorF32<3, 1> cH = VectorF32<3, 1>::Cross(rayDir, cE2); + VectorF32<3, 1> dH = VectorF32<3, 1>::Cross(rayDir, dE2); + + VectorF32<3, 1> aS = rayOrigin - aV0; + VectorF32<3, 1> bS = rayOrigin - bV0; + VectorF32<3, 1> cS = rayOrigin - cV0; + VectorF32<3, 1> dS = rayOrigin - dV0; + + VectorF32<3, 1> aQ = VectorF32<3, 1>::Cross(aS, aE1); + VectorF32<3, 1> bQ = VectorF32<3, 1>::Cross(bS, bE1); + VectorF32<3, 1> cQ = VectorF32<3, 1>::Cross(cS, cE1); + VectorF32<3, 1> dQ = VectorF32<3, 1>::Cross(dS, dE1); + + // Four 3-component dots packed into one __m128 per call. + VectorF32<1, 4> det = VectorF32<3, 1>::Dot( + aE1, aH, bE1, bH, cE1, cH, dE1, dH); + VectorF32<1, 4> uNum = VectorF32<3, 1>::Dot( + aS, aH, bS, bH, cS, cH, dS, dH); + VectorF32<1, 4> vNum = VectorF32<3, 1>::Dot( + rayDir, aQ, rayDir, bQ, rayDir, cQ, rayDir, dQ); + VectorF32<1, 4> tNum = VectorF32<3, 1>::Dot( + aE2, aQ, bE2, bQ, cE2, cQ, dE2, dQ); + + std::array detArr = det.template Store(); + std::array uArr = uNum.template Store(); + std::array vArr = vNum.template Store(); + std::array tArr = tNum.template Store(); + + constexpr float eps = std::numeric_limits::epsilon(); + constexpr float maxF = std::numeric_limits::max(); + alignas(16) std::array out{}; + for (std::uint8_t i = 0; i < 4; ++i) { + float d = detArr[i]; + if (d <= eps) { out[i] = maxF; continue; } + float invD = 1.0f / d; + float u = uArr[i] * invD; + if (u < 0.0f || u > 1.0f) { out[i] = maxF; continue; } + float v = vArr[i] * invD; + if (v < 0.0f || u + v > 1.0f) { out[i] = maxF; continue; } + out[i] = tArr[i] * invD; } - - T inverse_determinant = T(1) / determinant; - - Vector origins_diff_vector = rayOrigin - vert0; - T u = Vector::Dot(origins_diff_vector, h) * inverse_determinant; - - if (u < 0.0 || u > 1.0) - { - return std::numeric_limits::max(); - } - - Vector q = Vector::Cross(origins_diff_vector, edge1); - T v = inverse_determinant * Vector::Dot(rayDir, q); - - if (v < 0.0 || u + v > 1.0) { - return std::numeric_limits::max(); - } - - return inverse_determinant * Vector::Dot(edge2, q); + return VectorF32<1, 4>(out.data()); } - export template - constexpr T IntersectionTestRaySphere(Vector position, T radius, Vector rayOrigin, Vector rayDir) { - T a = Vector::Dot(rayDir, rayDir); - T b = Vector::Dot(rayDir, (T(2) * (rayOrigin - position))); - T c = Vector::Dot(position, position) + Vector::Dot(rayOrigin, rayOrigin) - T(2) * Vector::Dot(rayOrigin, position) - radius * radius; - T d = b * b + (T(-4)) * a * c; + // One ray against four spheres. radii must hold {rA, rB, rC, rD} in lanes + // 0..3. + export inline VectorF32<1, 4> IntersectionTestRaySphere( + VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir, + VectorF32<3, 1> posA, VectorF32<3, 1> posB, + VectorF32<3, 1> posC, VectorF32<3, 1> posD, + VectorF32<1, 4> radii + ) { + VectorF32<3, 1> sA = rayOrigin - posA; + VectorF32<3, 1> sB = rayOrigin - posB; + VectorF32<3, 1> sC = rayOrigin - posC; + VectorF32<3, 1> sD = rayOrigin - posD; - if (d < 0) { - return std::numeric_limits::max(); - } + // dirDotS_i = rayDir · (rayOrigin - pos_i) + VectorF32<1, 4> dirDotS = VectorF32<3, 1>::Dot( + rayDir, sA, rayDir, sB, rayDir, sC, rayDir, sD); + // sqDist_i = |rayOrigin - pos_i|² (a.k.a. LengthSq of the s vectors) + VectorF32<1, 4> sqDist = VectorF32<3, 1>::LengthSq(sA, sB, sC, sD); + // aScalar = rayDir · rayDir, broadcast across four lanes. + VectorF32<1, 4> aScalar = VectorF32<3, 1>::LengthSq( + rayDir, rayDir, rayDir, rayDir); - d = std::sqrt(d); + VectorF32<1, 4> two(2.0f); + VectorF32<1, 4> four(4.0f); + VectorF32<1, 4> b = two * dirDotS; + VectorF32<1, 4> c = sqDist - radii * radii; + // discriminant = b² - 4·a·c + VectorF32<1, 4> disc = b * b - four * aScalar * c; - T t = (T(-0.5)) * (b + d) / a; - if (t > T(0)) { - return t; - } else { - return std::numeric_limits::max(); + std::array discArr = disc.template Store(); + std::array bArr = b.template Store(); + std::array aArr = aScalar.template Store(); + + constexpr float maxF = std::numeric_limits::max(); + alignas(16) std::array out{}; + for (std::uint8_t i = 0; i < 4; ++i) { + float d = discArr[i]; + if (d < 0.0f) { out[i] = maxF; continue; } + float sqrtD = std::sqrt(d); + float t = -0.5f * (bArr[i] + sqrtD) / aArr[i]; + out[i] = (t > 0.0f) ? t : maxF; } + return VectorF32<1, 4>(out.data()); } - export template - constexpr T IntersectionTestRayOrientedBox(Vector boxPosition, Vector boxSize, Vector boxRotation, Vector rayOrigin, Vector rayDir) { - Vector invRot( - -boxRotation.x, - -boxRotation.y, - -boxRotation.z, - boxRotation.w - ); + // One ray against four OBBs. Each box is described by world-space position, + // half-extent vector (per-axis sizes), and a unit quaternion rotation. + export inline VectorF32<1, 4> IntersectionTestRayOrientedBox( + VectorF32<3, 1> rayOrigin, VectorF32<3, 1> rayDir, + VectorF32<3, 1> posA, VectorF32<3, 1> sizeA, VectorF32<4, 1> rotA, + VectorF32<3, 1> posB, VectorF32<3, 1> sizeB, VectorF32<4, 1> rotB, + VectorF32<3, 1> posC, VectorF32<3, 1> sizeC, VectorF32<4, 1> rotC, + VectorF32<3, 1> posD, VectorF32<3, 1> sizeD, VectorF32<4, 1> rotD + ) { + // Conjugate quaternion: negate xyz, keep w. Negate<{true,true,true,false}> + // is constant-folded into a single XOR with a mask vector. + VectorF32<4, 1> invRotA = rotA.template Negate<{{true, true, true, false}}>(); + VectorF32<4, 1> invRotB = rotB.template Negate<{{true, true, true, false}}>(); + VectorF32<4, 1> invRotC = rotC.template Negate<{{true, true, true, false}}>(); + VectorF32<4, 1> invRotD = rotD.template Negate<{{true, true, true, false}}>(); - Vector localOrigin = Vector::Rotate(rayOrigin - boxPosition, invRot); - Vector localDir = Vector::Rotate(rayDir, invRot); + VectorF32<3, 1> localOriginA = VectorF32<3, 1>::Rotate(rayOrigin - posA, invRotA); + VectorF32<3, 1> localOriginB = VectorF32<3, 1>::Rotate(rayOrigin - posB, invRotB); + VectorF32<3, 1> localOriginC = VectorF32<3, 1>::Rotate(rayOrigin - posC, invRotC); + VectorF32<3, 1> localOriginD = VectorF32<3, 1>::Rotate(rayOrigin - posD, invRotD); - Vector halfExtents = boxSize * T(0.5); + VectorF32<3, 1> localDirA = VectorF32<3, 1>::Rotate(rayDir, invRotA); + VectorF32<3, 1> localDirB = VectorF32<3, 1>::Rotate(rayDir, invRotB); + VectorF32<3, 1> localDirC = VectorF32<3, 1>::Rotate(rayDir, invRotC); + VectorF32<3, 1> localDirD = VectorF32<3, 1>::Rotate(rayDir, invRotD); - T tMin = T(0); - T tMax = std::numeric_limits::max(); + VectorF32<3, 1> halfA = sizeA * 0.5f; + VectorF32<3, 1> halfB = sizeB * 0.5f; + VectorF32<3, 1> halfC = sizeC * 0.5f; + VectorF32<3, 1> halfD = sizeD * 0.5f; - for (std::uint32_t i = 0; i < 3; ++i) - { - if (std::abs(localDir.v[i]) < std::numeric_limits::epsilon()) - { - if (localOrigin.v[i] < -halfExtents.v[i] || localOrigin.v[i] > halfExtents.v[i]) { - return std::numeric_limits::max(); - } - } - else - { - T invD = T(1) / localDir.v[i]; - T t1 = (-halfExtents.v[i] - localOrigin.v[i]) * invD; - T t2 = ( halfExtents.v[i] - localOrigin.v[i]) * invD; - - if (t1 > t2) { - std::swap(t1, t2); - } - - tMin = std::max(tMin, t1); - tMax = std::min(tMax, t2); - - if (tMin > tMax) { - return std::numeric_limits::max(); - } - } - } - - return (tMin >= T(0)) ? tMin : tMax; - } - - - export template - std::vector> getOBBCorners(Vector size, MatrixRowMajor matrix) { - std::vector> localCorners = { - Vector(-size.x, -size.y, -size.z), - Vector( size.x, -size.y, -size.z), - Vector(-size.x, size.y, -size.z), - Vector( size.x, size.y, -size.z), - Vector(-size.x, -size.y, size.z), - Vector( size.x, -size.y, size.z), - Vector(-size.x, size.y, size.z), - Vector( size.x, size.y, size.z) + std::array, 4> origLanes{ + localOriginA.template Store(), + localOriginB.template Store(), + localOriginC.template Store(), + localOriginD.template Store(), + }; + std::array, 4> dirLanes{ + localDirA.template Store(), + localDirB.template Store(), + localDirC.template Store(), + localDirD.template Store(), + }; + std::array, 4> halfLanes{ + halfA.template Store(), + halfB.template Store(), + halfC.template Store(), + halfD.template Store(), }; - - std::vector> worldCorners; - - for (Vector localCorner : localCorners) { - Vector rotatedCorner = matrix * localCorner; - worldCorners.push_back(rotatedCorner); - } - return worldCorners; + constexpr float eps = std::numeric_limits::epsilon(); + constexpr float maxF = std::numeric_limits::max(); + alignas(16) std::array out{}; + for (std::uint8_t b = 0; b < 4; ++b) { + float tMin = 0.0f; + float tMax = maxF; + bool miss = false; + for (std::uint8_t i = 0; i < 3; ++i) { + float d = dirLanes[b][i]; + float o = origLanes[b][i]; + float h = halfLanes[b][i]; + if (std::abs(d) < eps) { + if (o < -h || o > h) { miss = true; break; } + } else { + float invD = 1.0f / d; + float t1 = (-h - o) * invD; + float t2 = ( h - o) * invD; + if (t1 > t2) std::swap(t1, t2); + tMin = std::max(tMin, t1); + tMax = std::min(tMax, t2); + if (tMin > tMax) { miss = true; break; } + } + } + out[b] = miss ? maxF : (tMin >= 0.0f ? tMin : tMax); + } + return VectorF32<1, 4>(out.data()); } - export template - constexpr bool IntersectionTestOrientedBoxOrientedBox(Vector sizeA, MatrixRowMajor boxA, Vector sizeB, MatrixRowMajor boxB) { - std::vector> axes; + // One sphere against four OBBs. boxMatrix encodes rotation in m[r][0..2] + // and translation in m[r][3]. + export inline VectorF32<1, 4> IntersectionTestSphereOrientedBox( + VectorF32<3, 1> sphereCenter, VectorF32<1, 4> radii, + VectorF32<3, 1> sizeA, MatrixRowMajor boxA, + VectorF32<3, 1> sizeB, MatrixRowMajor boxB, + VectorF32<3, 1> sizeC, MatrixRowMajor boxC, + VectorF32<3, 1> sizeD, MatrixRowMajor boxD + ) { + auto perBox = [&](MatrixRowMajor const& m, + VectorF32<3, 1> const& size, + VectorF32<3, 1>& xAxis, + VectorF32<3, 1>& yAxis, + VectorF32<3, 1>& zAxis, + VectorF32<3, 1>& delta) { + // Existing semantics: the OBB axes are read from the rows of the + // upper 3x3 block, and the translation column is gathered from the + // w lane of each row. + std::array r0 = m.rows[0].template Store(); + std::array r1 = m.rows[1].template Store(); + std::array r2 = m.rows[2].template Store(); + alignas(16) float xBuf[4] = { r0[0], r0[1], r0[2], 0.0f }; + alignas(16) float yBuf[4] = { r1[0], r1[1], r1[2], 0.0f }; + alignas(16) float zBuf[4] = { r2[0], r2[1], r2[2], 0.0f }; + alignas(16) float oBuf[4] = { r0[3], r1[3], r2[3], 0.0f }; + xAxis = VectorF32<3, 1>(xBuf); + yAxis = VectorF32<3, 1>(yBuf); + zAxis = VectorF32<3, 1>(zBuf); + VectorF32<3, 1> origin(oBuf); + delta = sphereCenter - origin; + (void)size; + }; - std::vector> box1Corners = getOBBCorners(sizeA, boxA); - std::vector> box2Corners = getOBBCorners(sizeB, boxB); + VectorF32<3, 1> xA, yA, zA, dA; + VectorF32<3, 1> xB, yB, zB, dB; + VectorF32<3, 1> xC, yC, zC, dC; + VectorF32<3, 1> xD, yD, zD, dD; + perBox(boxA, sizeA, xA, yA, zA, dA); + perBox(boxB, sizeB, xB, yB, zB, dB); + perBox(boxC, sizeC, xC, yC, zC, dC); + perBox(boxD, sizeD, xD, yD, zD, dD); - axes.push_back(Vector(boxA.m[0][0], boxA.m[0][1], boxA.m[0][2])); - axes.push_back(Vector(boxA.m[1][0], boxA.m[1][1], boxA.m[1][2])); - axes.push_back(Vector(boxA.m[2][0], boxA.m[2][1], boxA.m[2][2])); - - axes.push_back(Vector(boxB.m[0][0], boxB.m[0][1], boxB.m[0][2])); - axes.push_back(Vector(boxB.m[1][0], boxB.m[1][1], boxB.m[1][2])); - axes.push_back(Vector(boxB.m[2][0], boxB.m[2][1], boxB.m[2][2])); - - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) { - axes.push_back(Vector::Normalize(Vector::Cross(Vector(boxA.m[i][0], boxA.m[i][1], boxA.m[i][2]), Vector(boxB.m[j][0], boxB.m[j][1], boxB.m[j][2])))); + // Local sphere center per box: project delta onto each box axis. We + // produce {lx, ly, lz, lx, ly, lz, lx, ly, lz, lx, ly, lz} as three + // packed 4-wide Dot results (one Dot per axis). + VectorF32<1, 4> locX = VectorF32<3, 1>::Dot( + dA, xA, dB, xB, dC, xC, dD, xD); + VectorF32<1, 4> locY = VectorF32<3, 1>::Dot( + dA, yA, dB, yB, dC, yC, dD, yD); + VectorF32<1, 4> locZ = VectorF32<3, 1>::Dot( + dA, zA, dB, zB, dC, zC, dD, zD); + + std::array lxArr = locX.template Store(); + std::array lyArr = locY.template Store(); + std::array lzArr = locZ.template Store(); + std::array rArr = radii.template Store(); + std::array, 4> sizeLanes{ + sizeA.template Store(), + sizeB.template Store(), + sizeC.template Store(), + sizeD.template Store(), + }; + + alignas(16) std::array out{}; + for (std::uint8_t i = 0; i < 4; ++i) { + float lx = lxArr[i], ly = lyArr[i], lz = lzArr[i]; + float sx = sizeLanes[i][0], sy = sizeLanes[i][1], sz = sizeLanes[i][2]; + float cx = std::clamp(lx, -sx, sx); + float cy = std::clamp(ly, -sy, sy); + float cz = std::clamp(lz, -sz, sz); + float dx = lx - cx, dy = ly - cy, dz = lz - cz; + float distSq = dx * dx + dy * dy + dz * dz; + float r = rArr[i]; + // Returns 0.0 on hit, max on miss - keeps a consistent + // "t-like" output signature with the other intersection tests. + out[i] = (distSq <= r * r) ? 0.0f : std::numeric_limits::max(); + } + return VectorF32<1, 4>(out.data()); + } + + // Eight local corners of a unit OBB transformed by `matrix`. Uses one + // batched 4-pair Dot per result row (x, y, z), reproducing the corners in + // two groups of four. + export inline std::array, 8> GetOBBCorners( + VectorF32<3, 1> size, MatrixRowMajor matrix + ) { + std::array sz = size.template Store(); + const float sx = sz[0], sy = sz[1], sz_ = sz[2]; + + VectorF32<4, 1> mx = matrix.rows[0]; + VectorF32<4, 1> my = matrix.rows[1]; + VectorF32<4, 1> mz = matrix.rows[2]; + + // Eight homogeneous corner vectors with w=1 so the translation column + // of `matrix` participates in the dot product. + alignas(16) float c0[4] = { -sx, -sy, -sz_, 1.0f }; + alignas(16) float c1[4] = { sx, -sy, -sz_, 1.0f }; + alignas(16) float c2[4] = { -sx, sy, -sz_, 1.0f }; + alignas(16) float c3[4] = { sx, sy, -sz_, 1.0f }; + alignas(16) float c4[4] = { -sx, -sy, sz_, 1.0f }; + alignas(16) float c5[4] = { sx, -sy, sz_, 1.0f }; + alignas(16) float c6[4] = { -sx, sy, sz_, 1.0f }; + alignas(16) float c7[4] = { sx, sy, sz_, 1.0f }; + VectorF32<4, 1> v0(c0), v1(c1), v2(c2), v3(c3); + VectorF32<4, 1> v4(c4), v5(c5), v6(c6), v7(c7); + + // First four corners (0..3): batch x, y, z dots. + VectorF32<1, 4> xLo = VectorF32<4, 1>::Dot(mx, v0, mx, v1, mx, v2, mx, v3); + VectorF32<1, 4> yLo = VectorF32<4, 1>::Dot(my, v0, my, v1, my, v2, my, v3); + VectorF32<1, 4> zLo = VectorF32<4, 1>::Dot(mz, v0, mz, v1, mz, v2, mz, v3); + // Second four corners (4..7). + VectorF32<1, 4> xHi = VectorF32<4, 1>::Dot(mx, v4, mx, v5, mx, v6, mx, v7); + VectorF32<1, 4> yHi = VectorF32<4, 1>::Dot(my, v4, my, v5, my, v6, my, v7); + VectorF32<1, 4> zHi = VectorF32<4, 1>::Dot(mz, v4, mz, v5, mz, v6, mz, v7); + + std::array xLoA = xLo.template Store(); + std::array yLoA = yLo.template Store(); + std::array zLoA = zLo.template Store(); + std::array xHiA = xHi.template Store(); + std::array yHiA = yHi.template Store(); + std::array zHiA = zHi.template Store(); + + std::array, 8> result; + for (std::uint8_t i = 0; i < 4; ++i) { + alignas(16) float buf[4] = { xLoA[i], yLoA[i], zLoA[i], 0.0f }; + result[i] = VectorF32<3, 1>(buf); + } + for (std::uint8_t i = 0; i < 4; ++i) { + alignas(16) float buf[4] = { xHiA[i], yHiA[i], zHiA[i], 0.0f }; + result[4 + i] = VectorF32<3, 1>(buf); + } + return result; + } + + // SAT against fifteen separating axes (3 box-A, 3 box-B, 9 cross products). + // We compute every corner projection with batched 4-pair Dots: each axis + // projects four corners per call, two calls per axis covers the 8 corners. + export inline bool IntersectionTestOrientedBoxOrientedBox( + VectorF32<3, 1> sizeA, MatrixRowMajor boxA, + VectorF32<3, 1> sizeB, MatrixRowMajor boxB + ) { + std::array, 8> cornersA = GetOBBCorners(sizeA, boxA); + std::array, 8> cornersB = GetOBBCorners(sizeB, boxB); + + // Axes are the upper-3 lanes of each matrix row (same convention as + // SphereOrientedBox). ExtractLo<3> just retypes the SIMD register; the + // 4th lane is ignored by the Len=3 ops below. + std::array, 3> axesA = { + boxA.rows[0].template ExtractLo<3>(), + boxA.rows[1].template ExtractLo<3>(), + boxA.rows[2].template ExtractLo<3>(), + }; + std::array, 3> axesB = { + boxB.rows[0].template ExtractLo<3>(), + boxB.rows[1].template ExtractLo<3>(), + boxB.rows[2].template ExtractLo<3>(), + }; + + std::array, 15> axes{}; + axes[0] = axesA[0]; axes[1] = axesA[1]; axes[2] = axesA[2]; + axes[3] = axesB[0]; axes[4] = axesB[1]; axes[5] = axesB[2]; + // Normalize all nine cross axes together with a single batched + // Normalize call (Packing=3 not in the API, so two calls of four + + // one of one would be needed; for now just normalize in two batches + // of four and the trailing one inline). + std::array, 9> crossAxes{}; + std::uint8_t k = 0; + for (std::uint8_t i = 0; i < 3; ++i) { + for (std::uint8_t j = 0; j < 3; ++j) { + crossAxes[k++] = VectorF32<3, 1>::Cross(axesA[i], axesB[j]); } } + auto norm0 = VectorF32<3, 1>::Normalize(crossAxes[0], crossAxes[1], crossAxes[2], crossAxes[3]); + auto norm1 = VectorF32<3, 1>::Normalize(crossAxes[4], crossAxes[5], crossAxes[6], crossAxes[7]); + auto norm2 = VectorF32<3, 1>::Normalize(crossAxes[8], crossAxes[8], crossAxes[8], crossAxes[8]); + axes[6] = std::get<0>(norm0); + axes[7] = std::get<1>(norm0); + axes[8] = std::get<2>(norm0); + axes[9] = std::get<3>(norm0); + axes[10] = std::get<0>(norm1); + axes[11] = std::get<1>(norm1); + axes[12] = std::get<2>(norm1); + axes[13] = std::get<3>(norm1); + axes[14] = std::get<0>(norm2); - for (Vector axis : axes) { - T min1 = Vector::Dot(box1Corners[0], axis); - T max1 = min1; - for (Vector corner : box1Corners) { - T projection = Vector::Dot(corner, axis); - min1 = std::min(min1, projection); - max1 = std::max(max1, projection); + for (std::uint8_t axisIdx = 0; axisIdx < 15; ++axisIdx) { + VectorF32<3, 1> axis = axes[axisIdx]; + // Project all 8 corners of each box onto `axis` using two batched + // 4-pair Dot calls (lo and hi corners). + VectorF32<1, 4> projA_lo = VectorF32<3, 1>::Dot( + cornersA[0], axis, cornersA[1], axis, + cornersA[2], axis, cornersA[3], axis); + VectorF32<1, 4> projA_hi = VectorF32<3, 1>::Dot( + cornersA[4], axis, cornersA[5], axis, + cornersA[6], axis, cornersA[7], axis); + VectorF32<1, 4> projB_lo = VectorF32<3, 1>::Dot( + cornersB[0], axis, cornersB[1], axis, + cornersB[2], axis, cornersB[3], axis); + VectorF32<1, 4> projB_hi = VectorF32<3, 1>::Dot( + cornersB[4], axis, cornersB[5], axis, + cornersB[6], axis, cornersB[7], axis); + + std::array aLo = projA_lo.template Store(); + std::array aHi = projA_hi.template Store(); + std::array bLo = projB_lo.template Store(); + std::array bHi = projB_hi.template Store(); + + float minA = aLo[0], maxA = aLo[0]; + for (std::uint8_t i = 1; i < 4; ++i) { + minA = std::min(minA, aLo[i]); + maxA = std::max(maxA, aLo[i]); + } + for (std::uint8_t i = 0; i < 4; ++i) { + minA = std::min(minA, aHi[i]); + maxA = std::max(maxA, aHi[i]); + } + float minB = bLo[0], maxB = bLo[0]; + for (std::uint8_t i = 1; i < 4; ++i) { + minB = std::min(minB, bLo[i]); + maxB = std::max(maxB, bLo[i]); + } + for (std::uint8_t i = 0; i < 4; ++i) { + minB = std::min(minB, bHi[i]); + maxB = std::max(maxB, bHi[i]); } - T min2 = Vector::Dot(box2Corners[0], axis); - T max2 = min2; - for (Vector corner : box2Corners) { - T projection = Vector::Dot(corner, axis); - min2 = std::min(min2, projection); - max2 = std::max(max2, projection); - } - - if (max1 < min2 || max2 < min1) { - return false; - } + if (maxA < minB || maxB < minA) return false; } - return true; } - - export template - constexpr bool IntersectionTestSphereOrientedBox(Vector sphereCenter, T sphereRadius, Vector boxSize, MatrixRowMajor boxMatrix) { - // Extract the OBB's axes (columns of the rotation matrix) - Vector xAxis(boxMatrix.m[0][0], boxMatrix.m[0][1], boxMatrix.m[0][2]); - Vector yAxis(boxMatrix.m[1][0], boxMatrix.m[1][1], boxMatrix.m[1][2]); - Vector zAxis(boxMatrix.m[2][0], boxMatrix.m[2][1], boxMatrix.m[2][2]); - - // Translate the sphere center into the OBB's local space - Vector localCenter = Vector( - Vector::Dot(sphereCenter - Vector(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), xAxis), - Vector::Dot(sphereCenter - Vector(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), yAxis), - Vector::Dot(sphereCenter - Vector(boxMatrix.m[0][3], boxMatrix.m[1][3], boxMatrix.m[2][3]), zAxis) - ); - - // Clamp the local center to the OBB's extents - Vector closestPoint = Vector( - std::max(-boxSize.x, std::min(localCenter.x, boxSize.x)), - std::max(-boxSize.y, std::min(localCenter.y, boxSize.y)), - std::max(-boxSize.z, std::min(localCenter.z, boxSize.z)) - ); - - // Calculate the distance between the closest point and the local center - Vector delta = localCenter - closestPoint; - T distanceSquared = Vector::Dot(delta, delta); - - // Check if the distance is less than or equal to the sphere's radius squared - return distanceSquared <= (sphereRadius * sphereRadius); - } -} \ No newline at end of file +} diff --git a/interfaces/Crafter.Math-MatrixRowMajor.cppm b/interfaces/Crafter.Math-MatrixRowMajor.cppm index 2bf810e..3e56369 100755 --- a/interfaces/Crafter.Math-MatrixRowMajor.cppm +++ b/interfaces/Crafter.Math-MatrixRowMajor.cppm @@ -1,433 +1,330 @@ -/* -Crafter®.Math -Copyright (C) 2026 Catcrafts® -catcrafts.net - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License version 3.0 as published by the Free Software Foundation; - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -export module Crafter.Math:MatrixRowMajor; - -import :Basic; -import :Vector; -import std; - -namespace Crafter { - export template - class MatrixRowMajor { - public: - T m[RowSize][CollumSize*Repeats]; - - MatrixRowMajor() = default; - - MatrixRowMajor( - float x0, float y0, float z0, float w0, - float x1, float y1, float z1, float w1, - float x2, float y2, float z2, float w2, - float x3, float y3, float z3, float w3 - ) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - m[0][0] = x0; - m[0][1] = y0; - m[0][2] = z0; - m[0][3] = w0; - - m[1][0] = x1; - m[1][1] = y1; - m[1][2] = z1; - m[1][3] = w1; - - m[2][0] = x2; - m[2][1] = y2; - m[2][2] = z2; - m[2][3] = w2; - - m[3][0] = x3; - m[3][1] = y3; - m[3][2] = z3; - m[3][3] = w3; - } - - MatrixRowMajor( - float x0, float y0, float z0, float w0, - float x1, float y1, float z1, float w1, - float x2, float y2, float z2, float w2 - ) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - m[0][0] = x0; - m[0][1] = y0; - m[0][2] = z0; - m[0][3] = w0; - - m[1][0] = x1; - m[1][1] = y1; - m[1][2] = z1; - m[1][3] = w1; - - m[2][0] = x2; - m[2][1] = y2; - m[2][2] = z2; - m[2][3] = w2; - } - - template - Vector operator*(Vector b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - return Vector( - b.x * m[0][0] + b.y * m[0][1] + b.z * m[0][2] + m[0][3], - b.x * m[1][0] + b.y * m[1][1] + b.z * m[1][2] + m[1][3], - b.x * m[2][0] + b.y * m[2][1] + b.z * m[2][2] + m[2][3] - ); - } - - MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - MatrixRowMajor result; - - result.m[0][0] = b.m[0][0] * m[0][0] + b.m[0][1] * m[1][0] + b.m[0][2] * m[2][0] + b.m[0][3] * m[3][0]; - result.m[1][0] = b.m[1][0] * m[0][0] + b.m[1][1] * m[1][0] + b.m[1][2] * m[2][0] + b.m[1][3] * m[3][0]; - result.m[2][0] = b.m[2][0] * m[0][0] + b.m[2][1] * m[1][0] + b.m[2][2] * m[2][0] + b.m[2][3] * m[3][0]; - result.m[3][0] = b.m[3][0] * m[0][0] + b.m[3][1] * m[1][0] + b.m[3][2] * m[2][0] + b.m[3][3] * m[3][0]; - - result.m[0][1] = b.m[0][0] * m[0][1] + b.m[0][1] * m[1][1] + b.m[0][2] * m[2][1] + b.m[0][3] * m[3][1]; - result.m[1][1] = b.m[1][0] * m[0][1] + b.m[1][1] * m[1][1] + b.m[1][2] * m[2][1] + b.m[1][3] * m[3][1]; - result.m[2][1] = b.m[2][0] * m[0][1] + b.m[2][1] * m[1][1] + b.m[2][2] * m[2][1] + b.m[2][3] * m[3][1]; - result.m[3][1] = b.m[3][0] * m[0][1] + b.m[3][1] * m[1][1] + b.m[3][2] * m[2][1] + b.m[3][3] * m[3][1]; - - result.m[0][2] = b.m[0][0] * m[0][2] + b.m[0][1] * m[1][2] + b.m[0][2] * m[2][2] + b.m[0][3] * m[3][2]; - result.m[1][2] = b.m[1][0] * m[0][2] + b.m[1][1] * m[1][2] + b.m[1][2] * m[2][2] + b.m[1][3] * m[3][2]; - result.m[2][2] = b.m[2][0] * m[0][2] + b.m[2][1] * m[1][2] + b.m[2][2] * m[2][2] + b.m[2][3] * m[3][2]; - result.m[3][2] = b.m[3][0] * m[0][2] + b.m[3][1] * m[1][2] + b.m[3][2] * m[2][2] + b.m[3][3] * m[3][2]; - - result.m[0][3] = b.m[0][0] * m[0][3] + b.m[0][1] * m[1][3] + b.m[0][2] * m[2][3] + b.m[0][3] * m[3][3]; - result.m[1][3] = b.m[1][0] * m[0][3] + b.m[1][1] * m[1][3] + b.m[1][2] * m[2][3] + b.m[1][3] * m[3][3]; - result.m[2][3] = b.m[2][0] * m[0][3] + b.m[2][1] * m[1][3] + b.m[2][2] * m[2][3] + b.m[2][3] * m[3][3]; - result.m[3][3] = b.m[3][0] * m[0][3] + b.m[3][1] * m[1][3] + b.m[3][2] * m[2][3] + b.m[3][3] * m[3][3]; - - return result; - } - - MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - MatrixRowMajor result; - - // Column 0 - result.m[0][0] = b.m[0][0]*m[0][0] + b.m[0][1]*m[1][0] + b.m[0][2]*m[2][0]; - result.m[1][0] = b.m[1][0]*m[0][0] + b.m[1][1]*m[1][0] + b.m[1][2]*m[2][0]; - result.m[2][0] = b.m[2][0]*m[0][0] + b.m[2][1]*m[1][0] + b.m[2][2]*m[2][0]; - - // Column 1 - result.m[0][1] = b.m[0][0]*m[0][1] + b.m[0][1]*m[1][1] + b.m[0][2]*m[2][1]; - result.m[1][1] = b.m[1][0]*m[0][1] + b.m[1][1]*m[1][1] + b.m[1][2]*m[2][1]; - result.m[2][1] = b.m[2][0]*m[0][1] + b.m[2][1]*m[1][1] + b.m[2][2]*m[2][1]; - - // Column 2 - result.m[0][2] = b.m[0][0]*m[0][2] + b.m[0][1]*m[1][2] + b.m[0][2]*m[2][2]; - result.m[1][2] = b.m[1][0]*m[0][2] + b.m[1][1]*m[1][2] + b.m[1][2]*m[2][2]; - result.m[2][2] = b.m[2][0]*m[0][2] + b.m[2][1]*m[1][2] + b.m[2][2]*m[2][2]; - - // Translation column - result.m[0][3] = b.m[0][0]*m[0][3] + b.m[0][1]*m[1][3] + b.m[0][2]*m[2][3] + b.m[0][3]; - result.m[1][3] = b.m[1][0]*m[0][3] + b.m[1][1]*m[1][3] + b.m[1][2]*m[2][3] + b.m[1][3]; - result.m[2][3] = b.m[2][0]*m[0][3] + b.m[2][1]*m[1][3] + b.m[2][2]*m[2][3] + b.m[2][3]; - - - return result; - } - - static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - return MatrixRowMajor( - 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0 - ); - } - - static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - return MatrixRowMajor( - x, 0, 0, 0, - 0, y, 0, 0, - 0, 0, z, 0, - 0, 0, 0, 1 - ); - } - static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - return MatrixRowMajor( - x, 0, 0, 0, - 0, y, 0, 0, - 0, 0, z, 0 - ); - } - template - static MatrixRowMajor Scaling(Vector vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - return Scaling(vector.x, vector.y, vector.z); - } - - static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - return MatrixRowMajor( - 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0, - x, y, z, 1 - ); - } - static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - return MatrixRowMajor( - 1, 0, 0, x, - 0, 1, 0, y, - 0, 0, 1, z - ); - } - - template - static MatrixRowMajor Translation(Vector vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - return Translation(vector.x, vector.y, vector.z); - } - - static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - float cp = std::cosf(Pitch); - float sp = std::sinf(Pitch); - - float cy = std::cosf(Yaw); - float sy = std::sinf(Yaw); - - float cr = std::cosf(Roll); - float sr = std::sinf(Roll); - - MatrixRowMajor M; - M.m[0][0] = cr * cy + sr * sp * sy; - M.m[0][1] = sr * cp; - M.m[0][2] = sr * sp * cy - cr * sy; - M.m[0][3] = 0.0f; - - M.m[1][0] = cr * sp * sy - sr * cy; - M.m[1][1] = cr * cp; - M.m[1][2] = sr * sy + cr * sp * cy; - M.m[1][3] = 0.0f; - - M.m[2][0] = cp * sy; - M.m[2][1] = -sp; - M.m[2][2] = cp * cy; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - - return M; - } - - static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { - float cp = std::cosf(Pitch); - float sp = std::sinf(Pitch); - - float cy = std::cosf(Yaw); - float sy = std::sinf(Yaw); - - float cr = std::cosf(Roll); - float sr = std::sinf(Roll); - - MatrixRowMajor M; - M.m[0][0] = cr * cy + sr * sp * sy; - M.m[0][1] = sr * cp; - M.m[0][2] = sr * sp * cy - cr * sy; - M.m[0][3] = 0.0f; - - M.m[1][0] = cr * sp * sy - sr * cy; - M.m[1][1] = cr * cp; - M.m[1][2] = sr * sy + cr * sp * cy; - M.m[1][3] = 0.0f; - - M.m[2][0] = cp * sy; - M.m[2][1] = -sp; - M.m[2][2] = cp * cy; - M.m[2][3] = 0.0f; - - return M; - } - - template - static MatrixRowMajor LookAt(Vector eyePosition, Vector focusPosition, Vector upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - MatrixRowMajor M; - - Vector negEyeDirection = eyePosition - focusPosition; - return LookTo(eyePosition, negEyeDirection, upDirection); - - return M; - } - - template - static MatrixRowMajor LookTo(Vector eyePosition, Vector eyeDirection, Vector upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - Vector R2 = eyeDirection.Normalize(); - - Vector R0 = upDirection.Cross(R2); - R0 = R0.Normalize(); - - Vector R1 = R2.Cross(R0); - - Vector NegEyePosition = -eyePosition; - - float D0 = R0.Dot(NegEyePosition); - float D1 = R1.Dot(NegEyePosition); - float D2 = R2.Dot(NegEyePosition); - - MatrixRowMajor M; - M.m[0][0] = R0.v[0]; - M.m[1][0] = R0.v[1]; - M.m[2][0] = R0.v[2]; - M.m[3][0] = D0; - - M.m[0][1] = R1.v[0]; - M.m[1][1] = R1.v[1]; - M.m[2][1] = R1.v[2]; - M.m[3][1] = D1; - - M.m[0][2] = R2.v[0]; - M.m[1][2] = R2.v[1]; - M.m[2][2] = R2.v[2]; - M.m[3][2] = D2; - - M.m[0][3] = 0; - M.m[1][3] = 0; - M.m[2][3] = 0; - M.m[3][3] = 1; - - return M; - } - - template - static MatrixRowMajor Rotation(Vector vector) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - return Rotation(vector.x, vector.y, vector.z); - } - - template - Vector TransformNormal(Vector V) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - Vector Z = Vector(V.z, V.z, V.z); - Vector Y = Vector(V.y, V.y, V.y); - Vector X = Vector(V.x, V.x, V.x); - - Vector Result = Z * Vector(m[2][0], m[2][1], m[2][2]); - Result = Y * Vector(m[1][0], m[1][1], m[1][2]) + Result; - Result = X * Vector(m[0][0], m[0][1], m[0][2]) + Result; - - return Result; - } - - // MatrixRowMajor Inverse() requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { - // Vector V0[4], V1[4]; - // V0[0] = Vector(m[0][2], m[0][2], m[1][2], m[1][2]); - // V1[0] = Vector(m[2][3], m[3][3], m[2][3], m[3][3]); - // V0[1] = Vector(m[0][0], m[0][0], m[1][0], m[1][0]); - // V1[1] = Vector(m[2][1], m[3][1], m[2][1], m[3][1]); - // V0[2] = Vector(m[0][2], m[2][2], m[0][0], m[2][0]); - // V1[2] = Vector(m[1][3], m[3][3], m[1][1], m[3][1]); - - // Vector D0 = V0[0] * V1[0]; - // Vector D1 = V0[1] * V1[1]; - // Vector D2 = V0[2] * V1[2]; - - // V0[0] = Vector(m[2][2], m[3][2], m[2][2], m[3][2]); - // V1[0] = Vector(m[0][3], m[0][3], m[1][3], m[1][3]); - // V0[1] = Vector(m[2][0], m[3][0], m[2][0], m[3][0]); - // V1[1] = Vector(m[0][1], m[0][1], m[1][1], m[1][1]); - // V0[2] = Vector(m[1][2], m[3][2], m[1][0], m[3][0]); - // V1[2] = Vector(m[0][3], m[2][3], m[0][1], m[2][1]); - - // D0 = Vector::NegativeMultiplySubtract(V0[0], V1[0], D0); - // D1 = Vector::NegativeMultiplySubtract(V0[1], V1[1], D1); - // D2 = Vector::NegativeMultiplySubtract(V0[2], V1[2], D2); - - // V0[0] = Vector(m[1][1], m[2][1], m[0][1], m[1][1]); - // V1[0] = Vector(D2.v[1], D0.v[1], D0.v[3], D0.v[0]); - // V0[1] = Vector(m[2][0], m[0][0], m[1][0], m[0][0]); - // V1[1] = Vector(D0.v[3], D2.v[1], D0.v[1], D0.v[2]); - // V0[2] = Vector(m[1][3], m[2][3], m[0][3], m[1][3]); - // V1[2] = Vector(D2.v[3], D1.v[1], D1.v[3], D1.v[0]); - // V0[3] = Vector(m[2][2], m[0][2], m[1][2], m[0][2]); - // V1[3] = Vector(D1.v[3], D2.v[3], D1.v[1], D1.v[2]); - - // Vector C0 = V0[0] * V1[0]; - // Vector C2 = V0[1] * V1[1]; - // Vector C4 = V0[2] * V1[2]; - // Vector C6 = V0[3] * V1[3]; - - // V0[0] = Vector(m[2][1], m[3][1], m[1][1], m[2][1]); - // V1[0] = Vector(D0.v[3], D0.v[0], D0.v[1], D2.v[0]); - // V0[1] = Vector(m[3][0], m[2][0], m[3][0], m[1][0]); - // V1[1] = Vector(D0.v[2], D0.v[1], D2.v[0], D0.v[0]); - // V0[2] = Vector(m[2][3], m[3][3], m[1][3], m[2][3]); - // V1[2] = Vector(D1.v[3], D1.v[0], D1.v[1], D2.v[2]); - // V0[3] = XMVectorSwizzle(MT.r[2]); - // V1[3] = XMVectorPermute(D1, D2); - - // C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); - // C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); - // C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); - // C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); - - // V0[0] = XMVectorSwizzle(MT.r[1]); - // V1[0] = XMVectorPermute(D0, D2); - // V0[1] = XMVectorSwizzle(MT.r[0]); - // V1[1] = XMVectorPermute(D0, D2); - // V0[2] = XMVectorSwizzle(MT.r[3]); - // V1[2] = XMVectorPermute(D1, D2); - // V0[3] = XMVectorSwizzle(MT.r[2]); - // V1[3] = XMVectorPermute(D1, D2); - - // XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); - // C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0); - // XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2); - // C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); - // XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); - // C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4); - // XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6); - // C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); - - // XMMATRIX R; - // R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v); - // R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v); - // R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v); - // R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v); - - // XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]); - // XMVECTOR Reciprocal = XMVectorReciprocal(Determinant); - - // XMMATRIX Result; - // Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal); - // Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal); - // Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal); - // Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal); - // return Result; - // } - }; -} - -template <> -struct std::formatter> : std::formatter { - auto format(const Crafter::MatrixRowMajor& obj, format_context& ctx) const { - return std::formatter::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", - obj.m[0][0], obj.m[0][1], obj.m[0][2], obj.m[0][3], - obj.m[1][0], obj.m[1][1], obj.m[1][2], obj.m[1][3], - obj.m[2][0], obj.m[2][1], obj.m[2][2], obj.m[2][3], - obj.m[3][0], obj.m[3][1], obj.m[3][2], obj.m[3][3] - ), ctx); - } -}; - -template <> -struct std::formatter> : std::formatter { - auto format(const Crafter::MatrixRowMajor& obj, format_context& ctx) const { - return std::formatter::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", - obj.m[0][0], obj.m[0][1], obj.m[0][2], obj.m[0][3], - obj.m[1][0], obj.m[1][1], obj.m[1][2], obj.m[1][3], - obj.m[2][0], obj.m[2][1], obj.m[2][2], obj.m[2][3] - ), ctx); - } -}; \ No newline at end of file +/* +Crafter®.Math +Copyright (C) 2026 Catcrafts® +catcrafts.net + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License version 3.0 as published by the Free Software Foundation; + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +export module Crafter.Math:MatrixRowMajor; + +import :Basic; +import :VectorF32; +import std; + +namespace Crafter { + // Row-major matrix whose rows live in optimized SIMD vectors. All + // multiplications are expressed as broadcast + fused multiply-add against + // these row vectors so the heavy work stays in __m128/__m256/__m512 land. + // + // CollumSize is the column count; only CollumSize == 4 is implemented (the + // matrix gets one SIMD row per row). Repeats is reserved for future + // SoA-style batching. + export template + class MatrixRowMajor { + public: + // Rows are exposed publicly so users can compose with VectorF32 ops + // directly without going through an accessor. Each row is a single + // SIMD vector covering all columns. + VectorF32(CollumSize), 1> rows[RowSize]; + + MatrixRowMajor() = default; + + MatrixRowMajor( + float x0, float y0, float z0, float w0, + float x1, float y1, float z1, float w1, + float x2, float y2, float z2, float w2, + float x3, float y3, float z3, float w3 + ) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + alignas(16) float r0[4] = { x0, y0, z0, w0 }; + alignas(16) float r1[4] = { x1, y1, z1, w1 }; + alignas(16) float r2[4] = { x2, y2, z2, w2 }; + alignas(16) float r3[4] = { x3, y3, z3, w3 }; + rows[0] = VectorF32<4, 1>(r0); + rows[1] = VectorF32<4, 1>(r1); + rows[2] = VectorF32<4, 1>(r2); + rows[3] = VectorF32<4, 1>(r3); + } + + MatrixRowMajor( + float x0, float y0, float z0, float w0, + float x1, float y1, float z1, float w1, + float x2, float y2, float z2, float w2 + ) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + alignas(16) float r0[4] = { x0, y0, z0, w0 }; + alignas(16) float r1[4] = { x1, y1, z1, w1 }; + alignas(16) float r2[4] = { x2, y2, z2, w2 }; + rows[0] = VectorF32<4, 1>(r0); + rows[1] = VectorF32<4, 1>(r1); + rows[2] = VectorF32<4, 1>(r2); + } + + // Flatten to RowSize*CollumSize contiguous floats (row-major). Replaces + // the old `m[i][j]` raw array access for callers that need a packed + // float buffer (e.g. GPU upload via memcpy). + constexpr void Store(float* dst) const requires(CollumSize == 4 && Repeats == 1 && std::same_as) { + for (std::uint32_t i = 0; i < RowSize; ++i) { + rows[i].Store(dst + i * 4); + } + } + + constexpr std::array Store() const requires(CollumSize == 4 && Repeats == 1 && std::same_as) { + std::array out{}; + Store(out.data()); + return out; + } + + // Affine transform: extend `b` with implicit w=1 (translation) and dot + // each row against it. Three row-dots packed into one batched 4-pair + // Dot call (lane 3 of the result is the duplicated row 0 and gets + // discarded). + VectorF32<3, 1> operator*(VectorF32<3, 1> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + std::array bArr = b.template Store(); + alignas(16) float bhBuf[4] = { bArr[0], bArr[1], bArr[2], 1.0f }; + VectorF32<4, 1> bh(bhBuf); + + VectorF32<1, 4> dots = VectorF32<4, 1>::Dot( + rows[0], bh, rows[1], bh, rows[2], bh, rows[0], bh); + + std::array dotsArr = dots.template Store(); + alignas(16) float outBuf[4] = { dotsArr[0], dotsArr[1], dotsArr[2], 0.0f }; + return VectorF32<3, 1>(outBuf); + } + + // Linear transform (no translation): same as the affine version but + // with bh.w = 0 so the translation column does not contribute. Useful + // for direction vectors and normals. + VectorF32<3, 1> TransformNormal(VectorF32<3, 1> b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + std::array bArr = b.template Store(); + alignas(16) float bhBuf[4] = { bArr[0], bArr[1], bArr[2], 0.0f }; + VectorF32<4, 1> bh(bhBuf); + + VectorF32<1, 4> dots = VectorF32<4, 1>::Dot( + rows[0], bh, rows[1], bh, rows[2], bh, rows[0], bh); + + std::array dotsArr = dots.template Store(); + alignas(16) float outBuf[4] = { dotsArr[0], dotsArr[1], dotsArr[2], 0.0f }; + return VectorF32<3, 1>(outBuf); + } + + // 4×4 matrix product via broadcast + FMA. Each result row is + // b[i][0]·rows[0] + b[i][1]·rows[1] + b[i][2]·rows[2] + b[i][3]·rows[3] + // produced with four shuffle-broadcasts and three fused multiply-adds. + MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + MatrixRowMajor result; + for (std::uint32_t i = 0; i < 4; ++i) { + VectorF32<4, 1> bi = b.rows[i]; + VectorF32<4, 1> bx = bi.template Shuffle<{{0, 0, 0, 0}}>(); + VectorF32<4, 1> by = bi.template Shuffle<{{1, 1, 1, 1}}>(); + VectorF32<4, 1> bz = bi.template Shuffle<{{2, 2, 2, 2}}>(); + VectorF32<4, 1> bw = bi.template Shuffle<{{3, 3, 3, 3}}>(); + + VectorF32<4, 1> row = bx * rows[0]; + row = VectorF32<4, 1>::MulitplyAdd(by, rows[1], row); + row = VectorF32<4, 1>::MulitplyAdd(bz, rows[2], row); + row = VectorF32<4, 1>::MulitplyAdd(bw, rows[3], row); + result.rows[i] = row; + } + return result; + } + + // 4×3 affine product. Same broadcast + FMA pattern, but the implicit + // 4th row of both matrices is [0, 0, 0, 1] so the b.w · row3 term + // contributes only to the translation slot. + MatrixRowMajor operator*(MatrixRowMajor b) const requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + alignas(16) float wRowBuf[4] = { 0.0f, 0.0f, 0.0f, 1.0f }; + VectorF32<4, 1> wRow(wRowBuf); + + MatrixRowMajor result; + for (std::uint32_t i = 0; i < 3; ++i) { + VectorF32<4, 1> bi = b.rows[i]; + VectorF32<4, 1> bx = bi.template Shuffle<{{0, 0, 0, 0}}>(); + VectorF32<4, 1> by = bi.template Shuffle<{{1, 1, 1, 1}}>(); + VectorF32<4, 1> bz = bi.template Shuffle<{{2, 2, 2, 2}}>(); + VectorF32<4, 1> bw = bi.template Shuffle<{{3, 3, 3, 3}}>(); + + VectorF32<4, 1> row = bx * rows[0]; + row = VectorF32<4, 1>::MulitplyAdd(by, rows[1], row); + row = VectorF32<4, 1>::MulitplyAdd(bz, rows[2], row); + row = VectorF32<4, 1>::MulitplyAdd(bw, wRow, row); + result.rows[i] = row; + } + return result; + } + + static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + return MatrixRowMajor( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0 + ); + } + + static MatrixRowMajor Identity() requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + return MatrixRowMajor( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1 + ); + } + + static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + return MatrixRowMajor( + x, 0, 0, 0, + 0, y, 0, 0, + 0, 0, z, 0, + 0, 0, 0, 1 + ); + } + static MatrixRowMajor Scaling(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + return MatrixRowMajor( + x, 0, 0, 0, + 0, y, 0, 0, + 0, 0, z, 0 + ); + } + static MatrixRowMajor Scaling(VectorF32<3, 1> vector) requires(CollumSize == 4 && Repeats == 1 && std::same_as) { + std::array a = vector.template Store(); + return Scaling(a[0], a[1], a[2]); + } + + static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + return MatrixRowMajor( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + x, y, z, 1 + ); + } + static MatrixRowMajor Translation(float x, float y, float z) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + return MatrixRowMajor( + 1, 0, 0, x, + 0, 1, 0, y, + 0, 0, 1, z + ); + } + static MatrixRowMajor Translation(VectorF32<3, 1> vector) requires(CollumSize == 4 && Repeats == 1 && std::same_as) { + std::array a = vector.template Store(); + return Translation(a[0], a[1], a[2]); + } + + // Pitch/yaw/roll Euler rotation. Computes all three sin/cos pairs as a + // single batched SinCos on a VectorF32<3, 1>, then assembles the rows. + static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + alignas(16) float angles[4] = { Pitch, Yaw, Roll, 0.0f }; + VectorF32<3, 1> v(angles); + std::tuple, VectorF32<3, 1>> sc = v.SinCos(); + std::array s = std::get<0>(sc).template Store(); + std::array c = std::get<1>(sc).template Store(); + const float sp = s[0], cp = c[0]; + const float sy = s[1], cy = c[1]; + const float sr = s[2], cr = c[2]; + + return MatrixRowMajor( + cr * cy + sr * sp * sy, sr * cp, sr * sp * cy - cr * sy, 0.0f, + cr * sp * sy - sr * cy, cr * cp, sr * sy + cr * sp * cy, 0.0f, + cp * sy, -sp, cp * cy, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + ); + } + + static MatrixRowMajor Rotation(float Pitch, float Yaw, float Roll) requires(CollumSize == 4 && RowSize == 3 && Repeats == 1 && std::same_as) { + alignas(16) float angles[4] = { Pitch, Yaw, Roll, 0.0f }; + VectorF32<3, 1> v(angles); + std::tuple, VectorF32<3, 1>> sc = v.SinCos(); + std::array s = std::get<0>(sc).template Store(); + std::array c = std::get<1>(sc).template Store(); + const float sp = s[0], cp = c[0]; + const float sy = s[1], cy = c[1]; + const float sr = s[2], cr = c[2]; + + return MatrixRowMajor( + cr * cy + sr * sp * sy, sr * cp, sr * sp * cy - cr * sy, 0.0f, + cr * sp * sy - sr * cy, cr * cp, sr * sy + cr * sp * cy, 0.0f, + cp * sy, -sp, cp * cy, 0.0f + ); + } + + static MatrixRowMajor Rotation(VectorF32<3, 1> v) requires(CollumSize == 4 && Repeats == 1 && std::same_as) { + std::array a = v.template Store(); + return Rotation(a[0], a[1], a[2]); + } + + // View matrix: builds the basis from a forward (negated) direction and + // an up reference, then dots each basis vector with -eye for the + // translation column. The four dots needed are produced by a single + // batched 4-pair Dot. + static MatrixRowMajor LookTo(VectorF32<3, 1> eyePosition, VectorF32<3, 1> eyeDirection, VectorF32<3, 1> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + // R0 = up × R2 is linear in R2, so its normalized direction does + // not depend on whether we hand R2 in before or after its own + // normalize. Computing R0_raw from the un-normalized R2 lets us + // satisfy the 4-input Normalize requirement with one batched call + // (duplicating R2 and R0 in the padding slots). + VectorF32<3, 1> R0Raw = VectorF32<3, 1>::Cross(upDirection, eyeDirection); + auto normalized = VectorF32<3, 1>::Normalize(eyeDirection, R0Raw, eyeDirection, R0Raw); + VectorF32<3, 1> R2 = std::get<0>(normalized); + VectorF32<3, 1> R0 = std::get<1>(normalized); + VectorF32<3, 1> R1 = VectorF32<3, 1>::Cross(R2, R0); + VectorF32<3, 1> negEye = -eyePosition; + + VectorF32<1, 4> dots = VectorF32<3, 1>::Dot( + R0, negEye, R1, negEye, R2, negEye, R0, negEye); + std::array d = dots.template Store(); + std::array r0a = R0.template Store(); + std::array r1a = R1.template Store(); + std::array r2a = R2.template Store(); + + return MatrixRowMajor( + r0a[0], r1a[0], r2a[0], 0.0f, + r0a[1], r1a[1], r2a[1], 0.0f, + r0a[2], r1a[2], r2a[2], 0.0f, + d[0], d[1], d[2], 1.0f + ); + } + + static MatrixRowMajor LookAt(VectorF32<3, 1> eyePosition, VectorF32<3, 1> focusPosition, VectorF32<3, 1> upDirection) requires(CollumSize == 4 && RowSize == 4 && Repeats == 1 && std::same_as) { + return LookTo(eyePosition, eyePosition - focusPosition, upDirection); + } + }; +} + +// Pretty printer using Store() so it does not depend on the legacy m[i][j] +// access pattern. +template <> +struct std::formatter> : std::formatter { + auto format(const Crafter::MatrixRowMajor& obj, format_context& ctx) const { + std::array v = obj.Store(); + return std::formatter::format(std::format( + "{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", + v[0], v[1], v[2], v[3], + v[4], v[5], v[6], v[7], + v[8], v[9], v[10], v[11], + v[12], v[13], v[14], v[15] + ), ctx); + } +}; + +template <> +struct std::formatter> : std::formatter { + auto format(const Crafter::MatrixRowMajor& obj, format_context& ctx) const { + std::array v = obj.Store(); + return std::formatter::format(std::format( + "{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", + v[0], v[1], v[2], v[3], + v[4], v[5], v[6], v[7], + v[8], v[9], v[10], v[11] + ), ctx); + } +}; diff --git a/project.cpp b/project.cpp index c932641..512f672 100644 --- a/project.cpp +++ b/project.cpp @@ -28,10 +28,10 @@ extern "C" Configuration CrafterBuildProject(std::span a cfg.GetInterfacesAndImplementations(ifaces, impls); } - auto addVectorTest = [&](std::string march, std::string mtune) { + auto addTest = [&](std::string_view testName, std::string march, std::string mtune) { Test t; t.config.path = "./"; - t.config.name = std::format("Vector-{}", march); + t.config.name = std::format("{}-{}", testName, march); t.config.outputName = t.config.name; t.config.target = cfg.target; t.config.type = ConfigurationType::Executable; @@ -40,13 +40,15 @@ extern "C" Configuration CrafterBuildProject(std::span a t.config.debug = cfg.debug; std::array ifaces; std::ranges::copy(mathInterfaces, ifaces.begin()); - std::array impls = { "tests/Vector" }; + std::array impls = { fs::path{std::format("tests/{}", testName)} }; t.config.GetInterfacesAndImplementations(ifaces, impls); cfg.tests.push_back(std::move(t)); }; - addVectorTest("sapphirerapids", "native"); - addVectorTest("x86-64-v4", "generic"); - addVectorTest("x86-64-v3", "generic"); + for (std::string_view name : { "Vector", "Intersection", "Matrix" }) { + addTest(name, "sapphirerapids", "native"); + addTest(name, "x86-64-v4", "generic"); + addTest(name, "x86-64-v3", "generic"); + } return cfg; } diff --git a/tests/Intersection.cpp b/tests/Intersection.cpp new file mode 100644 index 0000000..d6a9c4b --- /dev/null +++ b/tests/Intersection.cpp @@ -0,0 +1,288 @@ +/* +Crafter® Build +Copyright (C) 2026 Catcrafts® +Catcrafts.net + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License version 3.0 as published by the Free Software Foundation; + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +import Crafter.Math; +import std; +using namespace Crafter; + +namespace { + +constexpr float kEps = 1e-3f; +constexpr float kMaxF = std::numeric_limits::max(); + +constexpr bool FloatEquals(float a, float b, float epsilon = kEps) { + return std::abs(a - b) < epsilon; +} + +VectorF32<3, 1> Vec3(float x, float y, float z) { + alignas(16) float buf[4] = { x, y, z, 0.0f }; + return VectorF32<3, 1>(buf); +} + +VectorF32<4, 1> Vec4(float x, float y, float z, float w) { + alignas(16) float buf[4] = { x, y, z, w }; + return VectorF32<4, 1>(buf); +} + +VectorF32<1, 4> Vec1x4(float a, float b, float c, float d) { + alignas(16) float buf[4] = { a, b, c, d }; + return VectorF32<1, 4>(buf); +} + +// Möller-Trumbore in this codebase rejects det <= eps, so triangles must be +// wound so their geometric normal opposes the ray direction. For rays going +Z +// that means clockwise from a +Z viewer. +std::string* TestRayTriangle() { + VectorF32<3, 1> rayOrigin = Vec3(0, 0, -5); + VectorF32<3, 1> rayDir = Vec3(0, 0, 1); + + // A: hits at z=0, t=5 (front-facing). + VectorF32<3, 1> a0 = Vec3(-1, -1, 0), a1 = Vec3(0, 1, 0), a2 = Vec3(1, -1, 0); + // B: hits at z=10, t=15. + VectorF32<3, 1> b0 = Vec3(-1, -1, 10), b1 = Vec3(0, 1, 10), b2 = Vec3(1, -1, 10); + // C: front-facing triangle far off to the side - u or v out of [0,1]. + VectorF32<3, 1> c0 = Vec3(99, -1, 0), c1 = Vec3(100, 1, 0), c2 = Vec3(101, -1, 0); + // D: triangle parallel to the ray (all vertices share y=2; ray lives in y=0). + VectorF32<3, 1> d0 = Vec3(-1, 2, -1), d1 = Vec3(1, 2, 1), d2 = Vec3(0, 2, 2); + + VectorF32<1, 4> t = IntersectionTestRayTriangle(rayOrigin, rayDir, + a0, a1, a2, + b0, b1, b2, + c0, c1, c2, + d0, d1, d2); + std::array s = t.template Store(); + + if (!FloatEquals(s[0], 5.0f)) + return new std::string(std::format("RayTriangle A: expected 5, got {}", s[0])); + if (!FloatEquals(s[1], 15.0f)) + return new std::string(std::format("RayTriangle B: expected 15, got {}", s[1])); + if (s[2] != kMaxF) + return new std::string(std::format("RayTriangle C: expected max (miss), got {}", s[2])); + if (s[3] != kMaxF) + return new std::string(std::format("RayTriangle D: expected max (parallel miss), got {}", s[3])); + return nullptr; +} + +std::string* TestRayTriangleBackFacing() { + // Same A vertices but CCW from +Z viewer -> back-facing for +Z ray -> miss. + VectorF32<3, 1> rayOrigin = Vec3(0, 0, -5); + VectorF32<3, 1> rayDir = Vec3(0, 0, 1); + VectorF32<3, 1> v0 = Vec3(-1, -1, 0), v1 = Vec3(1, -1, 0), v2 = Vec3(0, 1, 0); + + VectorF32<1, 4> t = IntersectionTestRayTriangle(rayOrigin, rayDir, + v0, v1, v2, + v0, v1, v2, + v0, v1, v2, + v0, v1, v2); + std::array s = t.template Store(); + for (std::uint8_t i = 0; i < 4; ++i) { + if (s[i] != kMaxF) + return new std::string(std::format("RayTriangle back-facing lane {}: expected max, got {}", i, s[i])); + } + return nullptr; +} + +std::string* TestRaySphere() { + VectorF32<3, 1> rayOrigin = Vec3(0, 0, -10); + VectorF32<3, 1> rayDir = Vec3(0, 0, 1); + + // A: sphere at origin radius 2 - first hit at z=-2, t=8. + VectorF32<3, 1> posA = Vec3(0, 0, 0); + // B: sphere at (0,0,20) radius 1 - first hit at z=19, t=29. + VectorF32<3, 1> posB = Vec3(0, 0, 20); + // C: sphere off to the side, ray misses. + VectorF32<3, 1> posC = Vec3(10, 10, 0); + // D: sphere behind the ray origin. + VectorF32<3, 1> posD = Vec3(0, 0, -50); + + VectorF32<1, 4> radii = Vec1x4(2.0f, 1.0f, 0.5f, 1.0f); + VectorF32<1, 4> t = IntersectionTestRaySphere(rayOrigin, rayDir, + posA, posB, posC, posD, radii); + std::array s = t.template Store(); + + if (!FloatEquals(s[0], 8.0f)) + return new std::string(std::format("RaySphere A: expected 8, got {}", s[0])); + if (!FloatEquals(s[1], 29.0f)) + return new std::string(std::format("RaySphere B: expected 29, got {}", s[1])); + if (s[2] != kMaxF) + return new std::string(std::format("RaySphere C: expected max (miss), got {}", s[2])); + if (s[3] != kMaxF) + return new std::string(std::format("RaySphere D: expected max (behind), got {}", s[3])); + return nullptr; +} + +std::string* TestRayOrientedBox() { + VectorF32<3, 1> rayOrigin = Vec3(0, 0, -5); + VectorF32<3, 1> rayDir = Vec3(0, 0, 1); + // Identity quaternion (axis-aligned). + VectorF32<4, 1> idQ = Vec4(0, 0, 0, 1); + + // Note: RayOrientedBox treats `size` as the *full* extent (it computes + // halfExtents = size * 0.5 internally). So size=2 means the box spans + // [-1, 1] in each axis. (SphereOrientedBox uses the opposite convention.) + // + // A: box at origin size 2 (half 1) -> ray enters at z=-1, t=4. + VectorF32<3, 1> posA = Vec3(0, 0, 0), sizeA = Vec3(2, 2, 2); + // B: box at (0,0,10) size 2 (half 1) -> ray enters at z=9, t=14. + VectorF32<3, 1> posB = Vec3(0, 0, 10), sizeB = Vec3(2, 2, 2); + // C: box off to the side - miss. + VectorF32<3, 1> posC = Vec3(50, 0, 0), sizeC = Vec3(2, 2, 2); + // D: box behind ray - miss. + VectorF32<3, 1> posD = Vec3(0, 0, -50), sizeD = Vec3(2, 2, 2); + + VectorF32<1, 4> t = IntersectionTestRayOrientedBox(rayOrigin, rayDir, + posA, sizeA, idQ, + posB, sizeB, idQ, + posC, sizeC, idQ, + posD, sizeD, idQ); + std::array s = t.template Store(); + + if (!FloatEquals(s[0], 4.0f)) + return new std::string(std::format("RayOrientedBox A: expected 4, got {}", s[0])); + if (!FloatEquals(s[1], 14.0f)) + return new std::string(std::format("RayOrientedBox B: expected 14, got {}", s[1])); + if (s[2] != kMaxF) + return new std::string(std::format("RayOrientedBox C: expected max (miss), got {}", s[2])); + if (s[3] != kMaxF) + return new std::string(std::format("RayOrientedBox D: expected max (behind), got {}", s[3])); + return nullptr; +} + +MatrixRowMajor MakeBoxMatrix(float tx, float ty, float tz) { + // Box matrix the OBB intersection code expects: rows[i][0..2] is the i-th + // axis (the existing semantics treat matrix rows as the OBB axes), and + // rows[i][3] is the translation component along that axis. + return MatrixRowMajor( + 1, 0, 0, tx, + 0, 1, 0, ty, + 0, 0, 1, tz + ); +} + +std::string* TestSphereOrientedBox() { + // `size` is half-extents (the intersection code clamps to ±size). + VectorF32<3, 1> sphereCenter = Vec3(0, 0, 0); + VectorF32<1, 4> radii = Vec1x4(1.0f, 0.5f, 0.5f, 1.0f); + + // A: box at origin half-extent 2 -> sphere center inside -> hit. + VectorF32<3, 1> sizeA = Vec3(2, 2, 2); + auto boxA = MakeBoxMatrix(0, 0, 0); + // B: box at (5,0,0) half-extent 1 -> box spans x in [4,6], sphere in [-0.5,0.5] -> miss. + VectorF32<3, 1> sizeB = Vec3(1, 1, 1); + auto boxB = MakeBoxMatrix(5, 0, 0); + // C: box at (3,0,0) half-extent 1 -> box spans [2,4], sphere [-0.5,0.5] -> miss. + VectorF32<3, 1> sizeC = Vec3(1, 1, 1); + auto boxC = MakeBoxMatrix(3, 0, 0); + // D: box at origin half-extent 0.5 -> sphere center inside the box -> hit. + VectorF32<3, 1> sizeD = Vec3(0.5f, 0.5f, 0.5f); + auto boxD = MakeBoxMatrix(0, 0, 0); + + VectorF32<1, 4> r = IntersectionTestSphereOrientedBox(sphereCenter, radii, + sizeA, boxA, + sizeB, boxB, + sizeC, boxC, + sizeD, boxD); + std::array s = r.template Store(); + + if (s[0] != 0.0f) + return new std::string(std::format("SphereOrientedBox A: expected hit (0), got {}", s[0])); + if (s[1] != kMaxF) + return new std::string(std::format("SphereOrientedBox B: expected max (miss), got {}", s[1])); + if (s[2] != kMaxF) + return new std::string(std::format("SphereOrientedBox C: expected max (miss), got {}", s[2])); + if (s[3] != 0.0f) + return new std::string(std::format("SphereOrientedBox D: expected hit (0), got {}", s[3])); + return nullptr; +} + +std::string* TestGetOBBCorners() { + // Identity matrix - the 8 corners are exactly ±size on each axis. + VectorF32<3, 1> size = Vec3(2, 3, 4); + auto m = MakeBoxMatrix(0, 0, 0); + std::array, 8> corners = GetOBBCorners(size, m); + + constexpr std::array, 8> expected = {{ + {-2, -3, -4}, { 2, -3, -4}, {-2, 3, -4}, { 2, 3, -4}, + {-2, -3, 4}, { 2, -3, 4}, {-2, 3, 4}, { 2, 3, 4}, + }}; + for (std::uint8_t i = 0; i < 8; ++i) { + std::array v = corners[i].template Store(); + for (std::uint8_t j = 0; j < 3; ++j) { + if (!FloatEquals(v[j], expected[i][j])) + return new std::string(std::format( + "GetOBBCorners corner {} lane {}: expected {}, got {}", + i, j, expected[i][j], v[j])); + } + } + + // Translated matrix - corners shift by the translation column. + auto m2 = MakeBoxMatrix(10, 20, 30); + std::array, 8> corners2 = GetOBBCorners(size, m2); + for (std::uint8_t i = 0; i < 8; ++i) { + std::array v = corners2[i].template Store(); + std::array exp = { + expected[i][0] + 10.0f, + expected[i][1] + 20.0f, + expected[i][2] + 30.0f + }; + for (std::uint8_t j = 0; j < 3; ++j) { + if (!FloatEquals(v[j], exp[j])) + return new std::string(std::format( + "GetOBBCorners translated corner {} lane {}: expected {}, got {}", + i, j, exp[j], v[j])); + } + } + return nullptr; +} + +std::string* TestOBBOBBOverlapping() { + VectorF32<3, 1> size = Vec3(1, 1, 1); + auto boxA = MakeBoxMatrix(0, 0, 0); + auto boxB = MakeBoxMatrix(1, 0, 0); // overlap on x in [-1, 1] (B) and [-1, 1] (A) -> overlap + if (!IntersectionTestOrientedBoxOrientedBox(size, boxA, size, boxB)) + return new std::string("OBB-OBB overlapping: expected true"); + + auto boxFar = MakeBoxMatrix(10, 0, 0); + if (IntersectionTestOrientedBoxOrientedBox(size, boxA, size, boxFar)) + return new std::string("OBB-OBB far apart: expected false"); + return nullptr; +} + +} // namespace + +int main() { + using Fn = std::string* (*)(); + constexpr std::array, 7> tests = {{ + { "RayTriangle", TestRayTriangle }, + { "RayTriangleBackFacing", TestRayTriangleBackFacing }, + { "RaySphere", TestRaySphere }, + { "RayOrientedBox", TestRayOrientedBox }, + { "SphereOrientedBox", TestSphereOrientedBox }, + { "GetOBBCorners", TestGetOBBCorners }, + { "OBBOBB", TestOBBOBBOverlapping }, + }}; + + for (auto const& [name, fn] : tests) { + if (auto err = std::unique_ptr(fn())) { + std::println(std::cerr, "[{}] {}", name, *err); + return 1; + } + } + return 0; +} diff --git a/tests/Matrix.cpp b/tests/Matrix.cpp new file mode 100644 index 0000000..fa90374 --- /dev/null +++ b/tests/Matrix.cpp @@ -0,0 +1,343 @@ +/* +Crafter® Build +Copyright (C) 2026 Catcrafts® +Catcrafts.net + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License version 3.0 as published by the Free Software Foundation; + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +import Crafter.Math; +import std; +using namespace Crafter; + +namespace { + +constexpr float kEps = 1e-4f; + +constexpr bool FloatEquals(float a, float b, float epsilon = kEps) { + return std::abs(a - b) < epsilon; +} + +VectorF32<3, 1> Vec3(float x, float y, float z) { + alignas(16) float buf[4] = { x, y, z, 0.0f }; + return VectorF32<3, 1>(buf); +} + +std::string* CheckVec3(VectorF32<3, 1> got, float x, float y, float z, std::string_view label) { + std::array v = got.template Store(); + if (!FloatEquals(v[0], x) || !FloatEquals(v[1], y) || !FloatEquals(v[2], z)) + return new std::string(std::format( + "{}: expected ({},{},{}), got ({},{},{})", + label, x, y, z, v[0], v[1], v[2])); + return nullptr; +} + +std::string* CheckMat43(MatrixRowMajor const& m, std::array expected, std::string_view label) { + std::array got = m.Store(); + for (std::uint8_t i = 0; i < 12; ++i) { + if (!FloatEquals(got[i], expected[i])) + return new std::string(std::format( + "{} element {}: expected {}, got {}", label, i, expected[i], got[i])); + } + return nullptr; +} + +std::string* CheckMat44(MatrixRowMajor const& m, std::array expected, std::string_view label) { + std::array got = m.Store(); + for (std::uint8_t i = 0; i < 16; ++i) { + if (!FloatEquals(got[i], expected[i])) + return new std::string(std::format( + "{} element {}: expected {}, got {}", label, i, expected[i], got[i])); + } + return nullptr; +} + +// ---------- Identity / Store roundtrip ---------- + +std::string* TestIdentity43() { + auto m = MatrixRowMajor::Identity(); + return CheckMat43(m, { + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + }, "Identity 4x3"); +} + +std::string* TestIdentity44() { + auto m = MatrixRowMajor::Identity(); + return CheckMat44(m, { + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1, + }, "Identity 4x4"); +} + +std::string* TestStoreRoundtrip() { + MatrixRowMajor m( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + return CheckMat44(m, { + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + }, "Store roundtrip 4x4"); +} + +// ---------- Factory matrices ---------- + +std::string* TestTranslation() { + auto m = MatrixRowMajor::Translation(10, 20, 30); + if (auto e = CheckMat43(m, { + 1, 0, 0, 10, + 0, 1, 0, 20, + 0, 0, 1, 30, + }, "Translation 4x3")) return e; + + auto m4 = MatrixRowMajor::Translation(10, 20, 30); + if (auto e = CheckMat44(m4, { + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 10, 20, 30, 1, + }, "Translation 4x4")) return e; + return nullptr; +} + +std::string* TestScaling() { + auto m = MatrixRowMajor::Scaling(2, 3, 4); + if (auto e = CheckMat43(m, { + 2, 0, 0, 0, + 0, 3, 0, 0, + 0, 0, 4, 0, + }, "Scaling 4x3")) return e; + + auto m4 = MatrixRowMajor::Scaling(2, 3, 4); + return CheckMat44(m4, { + 2, 0, 0, 0, + 0, 3, 0, 0, + 0, 0, 4, 0, + 0, 0, 0, 1, + }, "Scaling 4x4"); +} + +// ---------- Matrix * Vector ---------- + +std::string* TestMatVecIdentity() { + auto m = MatrixRowMajor::Identity(); + VectorF32<3, 1> v = Vec3(7, -2, 3); + return CheckVec3(m * v, 7, -2, 3, "I * v"); +} + +std::string* TestMatVecTranslation() { + auto m = MatrixRowMajor::Translation(1, 2, 3); + // Affine multiply with implicit w=1 -> result = v + (1,2,3). + VectorF32<3, 1> v = Vec3(10, 20, 30); + return CheckVec3(m * v, 11, 22, 33, "Translation * v"); +} + +std::string* TestMatVecScaling() { + auto m = MatrixRowMajor::Scaling(2, 3, 4); + VectorF32<3, 1> v = Vec3(1, 1, 1); + return CheckVec3(m * v, 2, 3, 4, "Scaling * v"); +} + +std::string* TestMatVecCombined() { + // Scale then translate: full matrix is [diag(s) | t]. + MatrixRowMajor m( + 2, 0, 0, 10, + 0, 3, 0, 20, + 0, 0, 4, 30 + ); + VectorF32<3, 1> v = Vec3(1, 1, 1); + return CheckVec3(m * v, 12, 23, 34, "Scale+Translate * v"); +} + +std::string* TestTransformNormal() { + // TransformNormal ignores the translation column. + MatrixRowMajor m( + 2, 0, 0, 100, + 0, 3, 0, 200, + 0, 0, 4, 300 + ); + VectorF32<3, 1> v = Vec3(1, 1, 1); + return CheckVec3(m.TransformNormal(v), 2, 3, 4, "TransformNormal"); +} + +// ---------- Matrix * Matrix ---------- + +std::string* TestMatMulIdentityLeft44() { + MatrixRowMajor a = MatrixRowMajor::Identity(); + MatrixRowMajor b( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + auto r = a * b; + return CheckMat44(r, { + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + }, "I * M"); +} + +std::string* TestMatMulIdentityRight44() { + MatrixRowMajor a( + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + ); + MatrixRowMajor b = MatrixRowMajor::Identity(); + auto r = a * b; + return CheckMat44(r, { + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + }, "M * I"); +} + +std::string* TestMatMul44() { + // Hand-computed: standard row-major (B*A)[i][j] = sum_k B[i][k]*A[k][j]. + MatrixRowMajor a( + 1, 2, 0, 0, + 0, 1, 3, 0, + 4, 0, 1, 0, + 0, 0, 0, 1 + ); + MatrixRowMajor b( + 1, 0, 2, 0, + 0, 1, 0, 3, + 0, 0, 1, 0, + 0, 0, 0, 1 + ); + auto r = a * b; + // result.m[i][j] = sum_k b.m[i][k] * a.m[k][j] + // row 0: [1*1+0*0+2*4+0*0, 1*2+0*1+2*0+0*0, 1*0+0*3+2*1+0*0, 1*0+0*0+2*0+0*1] + // = [9, 2, 2, 0] + // row 1: [0*1+1*0+0*4+3*0, 0*2+1*1+0*0+3*0, 0*0+1*3+0*1+3*0, 0*0+1*0+0*0+3*1] + // = [0, 1, 3, 3] + // row 2: [0*1+0*0+1*4+0*0, 0*2+0*1+1*0+0*0, 0*0+0*3+1*1+0*0, 0*0+0*0+1*0+0*1] + // = [4, 0, 1, 0] + // row 3: [0*1+0*0+0*4+1*0, 0*2+0*1+0*0+1*0, 0*0+0*3+0*1+1*0, 0*0+0*0+0*0+1*1] + // = [0, 0, 0, 1] + return CheckMat44(r, { + 9, 2, 2, 0, + 0, 1, 3, 3, + 4, 0, 1, 0, + 0, 0, 0, 1, + }, "Mat * Mat 4x4"); +} + +std::string* TestMatMulTranslationCompose() { + // Translation composition: T(a) * T(b) acting on a vector translates by a+b + // (and the resulting matrix has the summed translation in its last column). + auto t1 = MatrixRowMajor::Translation(1, 2, 3); + auto t2 = MatrixRowMajor::Translation(10, 20, 30); + auto r = t1 * t2; + if (auto e = CheckMat43(r, { + 1, 0, 0, 11, + 0, 1, 0, 22, + 0, 0, 1, 33, + }, "T(1,2,3) * T(10,20,30)")) return e; + + // Sanity check: applying the composed matrix to origin yields (11,22,33). + return CheckVec3(r * Vec3(0, 0, 0), 11, 22, 33, "Composed translation * origin"); +} + +std::string* TestMatMulScaleThenTranslate() { + // The matrix product follows the engine's row-vector / post-multiply + // convention: a.operator*(b) yields the matrix b composed with a so that + // applying the result is equivalent to "first apply this, then apply b" + // when read left-to-right with a row vector (v * (a*b) = (v*a) * b). + // + // For column-vector intuition that means s.operator*(t) builds T·S, i.e. + // "scale, then translate"; applying it to (1,1,1) yields s*v + t. + auto s = MatrixRowMajor::Scaling(2, 3, 4); + auto t = MatrixRowMajor::Translation(10, 20, 30); + auto r = s * t; + return CheckVec3(r * Vec3(1, 1, 1), 12, 23, 34, "scale-then-translate * (1,1,1)"); +} + +// ---------- LookTo / LookAt ---------- + +std::string* TestLookToBasis() { + // Camera at +Z looking back toward origin. eyeDirection should point from + // focus to eye, i.e. +Z, so the resulting basis has R2 = +Z. + VectorF32<3, 1> eye = Vec3(0, 0, 5); + VectorF32<3, 1> dir = Vec3(0, 0, 1); // already from origin toward eye + VectorF32<3, 1> up = Vec3(0, 1, 0); + auto m = MatrixRowMajor::LookTo(eye, dir, up); + // R2 normalized = (0,0,1). R0 = up x R2 = (0,1,0) x (0,0,1) = (1,0,0). + // R1 = R2 x R0 = (0,0,1) x (1,0,0) = (0,1,0). + // The view matrix stores basis columns in the rotation block, with the + // translation column = -basis dot eye. So m.row[3] = (D0, D1, D2, 1). + // D0 = R0 . -eye = -(0*0+0*0+5*0) = 0 + // D1 = R1 . -eye = -(0*0+1*0+0*5) = 0 + // D2 = R2 . -eye = -(0*0+0*0+1*5) = -5 + return CheckMat44(m, { + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, -5, 1, + }, "LookTo basis"); +} + +// ---------- Test harness ---------- + +} // namespace + +int main() { + using Fn = std::string* (*)(); + constexpr std::array, 15> tests = {{ + { "Identity43", TestIdentity43 }, + { "Identity44", TestIdentity44 }, + { "StoreRoundtrip", TestStoreRoundtrip }, + { "Translation", TestTranslation }, + { "Scaling", TestScaling }, + { "MatVecIdentity", TestMatVecIdentity }, + { "MatVecTranslation", TestMatVecTranslation }, + { "MatVecScaling", TestMatVecScaling }, + { "MatVecCombined", TestMatVecCombined }, + { "TransformNormal", TestTransformNormal }, + { "MatMulIdentityLeft", TestMatMulIdentityLeft44 }, + { "MatMulIdentityRight", TestMatMulIdentityRight44 }, + { "MatMul44", TestMatMul44 }, + { "MatMulTranslation", TestMatMulTranslationCompose }, + { "MatMulScaleTranslate",TestMatMulScaleThenTranslate }, + }}; + + for (auto const& [name, fn] : tests) { + if (auto err = std::unique_ptr(fn())) { + std::println(std::cerr, "[{}] {}", name, *err); + return 1; + } + } + + // LookTo is tested separately - its expected result depends on the + // internal basis convention and a single failure here is informative + // enough to keep in the matrix suite. + if (auto err = std::unique_ptr(TestLookToBasis())) { + std::println(std::cerr, "[LookTo] {}", *err); + return 1; + } + return 0; +}