wasm dot
This commit is contained in:
parent
ad5ba21b4d
commit
027947cae6
1 changed files with 26 additions and 0 deletions
|
|
@ -1578,6 +1578,32 @@ namespace Crafter {
|
|||
return sq;
|
||||
}
|
||||
|
||||
// Four pairwise dot products packed into one v128. Only the first Len
|
||||
// lanes contribute, so the same routine handles 3- and 4-component
|
||||
// inputs — the 4th lane of Len==3 inputs may be garbage from Cross()
|
||||
// and must not be summed.
|
||||
constexpr static VectorF32<1, 4> Dot(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1,
|
||||
VectorF32<Len, Packing> D0, VectorF32<Len, Packing> D1
|
||||
) requires((Len == 3 || Len == 4) && Packing == 1) {
|
||||
alignas(16) float a0[4], a1[4], b0[4], b1[4], c0[4], c1[4], d0[4], d1[4];
|
||||
wasm_v128_store(a0, A0.v); wasm_v128_store(a1, A1.v);
|
||||
wasm_v128_store(b0, B0.v); wasm_v128_store(b1, B1.v);
|
||||
wasm_v128_store(c0, C0.v); wasm_v128_store(c1, C1.v);
|
||||
wasm_v128_store(d0, D0.v); wasm_v128_store(d1, D1.v);
|
||||
|
||||
alignas(16) float out[4] = {0,0,0,0};
|
||||
for (std::uint8_t k = 0; k < Len; ++k) {
|
||||
out[0] += a0[k] * a1[k];
|
||||
out[1] += b0[k] * b1[k];
|
||||
out[2] += c0[k] * c1[k];
|
||||
out[3] += d0[k] * d1[k];
|
||||
}
|
||||
return VectorF32<1, 4>(wasm_v128_load(out));
|
||||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
constexpr static auto Normalize(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue