This commit is contained in:
Jorijn van der Graaf 2026-05-18 18:15:12 +02:00
commit 027947cae6

View file

@ -1578,6 +1578,32 @@ namespace Crafter {
return sq; return sq;
} }
// Four pairwise dot products packed into one v128. Only the first Len
// lanes contribute, so the same routine handles 3- and 4-component
// inputs — the 4th lane of Len==3 inputs may be garbage from Cross()
// and must not be summed.
constexpr static VectorF32<1, 4> Dot(
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1,
VectorF32<Len, Packing> D0, VectorF32<Len, Packing> D1
) requires((Len == 3 || Len == 4) && Packing == 1) {
alignas(16) float a0[4], a1[4], b0[4], b1[4], c0[4], c1[4], d0[4], d1[4];
wasm_v128_store(a0, A0.v); wasm_v128_store(a1, A1.v);
wasm_v128_store(b0, B0.v); wasm_v128_store(b1, B1.v);
wasm_v128_store(c0, C0.v); wasm_v128_store(c1, C1.v);
wasm_v128_store(d0, D0.v); wasm_v128_store(d1, D1.v);
alignas(16) float out[4] = {0,0,0,0};
for (std::uint8_t k = 0; k < Len; ++k) {
out[0] += a0[k] * a1[k];
out[1] += b0[k] * b1[k];
out[2] += c0[k] * c1[k];
out[3] += d0[k] * d1[k];
}
return VectorF32<1, 4>(wasm_v128_load(out));
}
template<typename... Rest> template<typename... Rest>
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...)) requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
constexpr static auto Normalize(VectorF32<Len, Packing> first, Rest... rest) { constexpr static auto Normalize(VectorF32<Len, Packing> first, Rest... rest) {