From 027947cae6ef1e881e541d68a50d01ad7a559a50 Mon Sep 17 00:00:00 2001 From: Jorijn van der Graaf Date: Mon, 18 May 2026 18:15:12 +0200 Subject: [PATCH] wasm dot --- interfaces/Crafter.Math-VectorF32.cppm | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/interfaces/Crafter.Math-VectorF32.cppm b/interfaces/Crafter.Math-VectorF32.cppm index cc86764..6582d94 100755 --- a/interfaces/Crafter.Math-VectorF32.cppm +++ b/interfaces/Crafter.Math-VectorF32.cppm @@ -1578,6 +1578,32 @@ namespace Crafter { return sq; } + // Four pairwise dot products packed into one v128. Only the first Len + // lanes contribute, so the same routine handles 3- and 4-component + // inputs — the 4th lane of Len==3 inputs may be garbage from Cross() + // and must not be summed. + constexpr static VectorF32<1, 4> Dot( + VectorF32 A0, VectorF32 A1, + VectorF32 B0, VectorF32 B1, + VectorF32 C0, VectorF32 C1, + VectorF32 D0, VectorF32 D1 + ) requires((Len == 3 || Len == 4) && Packing == 1) { + alignas(16) float a0[4], a1[4], b0[4], b1[4], c0[4], c1[4], d0[4], d1[4]; + wasm_v128_store(a0, A0.v); wasm_v128_store(a1, A1.v); + wasm_v128_store(b0, B0.v); wasm_v128_store(b1, B1.v); + wasm_v128_store(c0, C0.v); wasm_v128_store(c1, C1.v); + wasm_v128_store(d0, D0.v); wasm_v128_store(d1, D1.v); + + alignas(16) float out[4] = {0,0,0,0}; + for (std::uint8_t k = 0; k < Len; ++k) { + out[0] += a0[k] * a1[k]; + out[1] += b0[k] * b1[k]; + out[2] += c0[k] * c1[k]; + out[3] += d0[k] * d1[k]; + } + return VectorF32<1, 4>(wasm_v128_load(out)); + } + template requires((std::is_same_v> && ...)) constexpr static auto Normalize(VectorF32 first, Rest... rest) {