packed intersection and matrix
This commit is contained in:
parent
027947cae6
commit
f0becd1582
7 changed files with 948 additions and 557 deletions
|
|
@ -449,8 +449,8 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template <std::array<bool, Len> values>
|
||||
constexpr VectorF32<Len, Packing> Negate() {
|
||||
std::array<float, VectorBase<Len, Packing, float>::AlignmentElement> mask = VectorBase<Len, Packing, float>::template GetNegateMask<values>();
|
||||
constexpr VectorF32<Len, Packing> Negate() const {
|
||||
std::array<float, VectorBase<Len, Packing, float>::AlignmentElement> mask = VectorBase<Len, Packing, float>::template GetNegateMask<values>();
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_xor_si128(_mm_castps_si128(this->v), _mm_loadu_si128(reinterpret_cast<__m128i*>(mask.data())))));
|
||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||
|
|
@ -549,9 +549,41 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
// Public variadic surface — one name per op, arity locked to BatchSize.
|
||||
// The Pack helpers below carry the SIMD bodies and the per-(Len,Packing)
|
||||
// requires clauses; this wrapper just forwards once arity matches.
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Normalize(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
return NormalizePack(first, rest...);
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Length(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
return LengthPack(first, rest...);
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto LengthSq(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
return LengthSqPack(first, rest...);
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == 2 * VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Dot(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
return DotPack(first, rest...);
|
||||
}
|
||||
|
||||
private:
|
||||
constexpr static std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize> NormalizePack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 4 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
|
|
@ -614,9 +646,9 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
constexpr static std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize> NormalizePack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 3 && Packing == 1) {
|
||||
|
|
@ -638,9 +670,9 @@ namespace Crafter {
|
|||
};
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
constexpr static std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize> NormalizePack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 3 && Packing == 2) {
|
||||
|
|
@ -663,9 +695,9 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
#ifdef __AVX512F__
|
||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
constexpr static std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize> NormalizePack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C
|
||||
) requires(Len == 3 && Packing == 5) {
|
||||
VectorF32<1, 15> lenght = Length(A, B, C);
|
||||
|
|
@ -685,8 +717,8 @@ namespace Crafter {
|
|||
}
|
||||
#endif
|
||||
|
||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||
VectorF32<Len, Packing> A,
|
||||
constexpr static std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize> NormalizePack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||
|
|
@ -733,13 +765,13 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, Packing*4> Length(
|
||||
VectorF32<Len, Packing> A,
|
||||
constexpr static VectorF32<1, Packing*4> LengthPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 4 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
VectorF32<1, Packing*4> lenghtSq = LengthSq(A, B, C, D);
|
||||
VectorF32<1, Packing*4> lenghtSq = LengthSqPack(A, B, C, D);
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||
return VectorF32<1, Packing*4>(_mm_sqrt_ps(lenghtSq.v));
|
||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||
|
|
@ -749,42 +781,42 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, 4> Length(
|
||||
constexpr static VectorF32<1, 4> LengthPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 3 && Packing == 1) {
|
||||
VectorF32<1, 4> lenghtSq = LengthSq(A, B, C, D);
|
||||
VectorF32<1, 4> lenghtSq = LengthSqPack(A, B, C, D);
|
||||
return VectorF32<1, 4>(_mm_sqrt_ps(lenghtSq.v));
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, 8> Length(
|
||||
constexpr static VectorF32<1, 8> LengthPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 3 && Packing == 2) {
|
||||
VectorF32<1, 8> lenghtSq = LengthSq(A, B, C, D);
|
||||
VectorF32<1, 8> lenghtSq = LengthSqPack(A, B, C, D);
|
||||
return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v));
|
||||
}
|
||||
|
||||
#ifdef __AVX512F__
|
||||
constexpr static VectorF32<1, 15> Length(
|
||||
constexpr static VectorF32<1, 15> LengthPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C
|
||||
) requires(Len == 3 && Packing == 5) {
|
||||
VectorF32<1, 15> lenghtSq = LengthSq(A, B, C);
|
||||
VectorF32<1, 15> lenghtSq = LengthSqPack(A, B, C);
|
||||
return VectorF32<1, 15>(_mm512_sqrt_ps(lenghtSq.v));
|
||||
}
|
||||
#endif
|
||||
|
||||
constexpr static VectorF32<1, Packing*2> Length(
|
||||
VectorF32<Len, Packing> A,
|
||||
constexpr static VectorF32<1, Packing*2> LengthPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> C
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
VectorF32<1, Packing*2> lenghtSq = LengthSq(A, C);
|
||||
VectorF32<1, Packing*2> lenghtSq = LengthSqPack(A, C);
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||
return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v));
|
||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||
|
|
@ -796,51 +828,51 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, Packing*4> LengthSq(
|
||||
VectorF32<Len, Packing> A,
|
||||
constexpr static VectorF32<1, Packing*4> LengthSqPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 4 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
return Dot(A, A, B, B, C, C, D, D);
|
||||
return DotPack(A, A, B, B, C, C, D, D);
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, 4> LengthSq(
|
||||
constexpr static VectorF32<1, 4> LengthSqPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 3 && Packing == 1) {
|
||||
return Dot(A, A, B, B, C, C, D, D);
|
||||
return DotPack(A, A, B, B, C, C, D, D);
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, 8> LengthSq(
|
||||
constexpr static VectorF32<1, 8> LengthSqPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C,
|
||||
VectorF32<Len, Packing> D
|
||||
) requires(Len == 3 && Packing == 2) {
|
||||
return Dot(A, A, B, B, C, C, D, D);
|
||||
return DotPack(A, A, B, B, C, C, D, D);
|
||||
}
|
||||
|
||||
#ifdef __AVX512F__
|
||||
constexpr static VectorF32<1, 15> LengthSq(
|
||||
constexpr static VectorF32<1, 15> LengthSqPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> B,
|
||||
VectorF32<Len, Packing> C
|
||||
) requires(Len == 3 && Packing == 5) {
|
||||
return Dot(A, A, B, B, C, C);
|
||||
return DotPack(A, A, B, B, C, C);
|
||||
}
|
||||
#endif
|
||||
|
||||
constexpr static VectorF32<1, Packing*2> LengthSq(
|
||||
VectorF32<Len, Packing> A,
|
||||
constexpr static VectorF32<1, Packing*2> LengthSqPack(
|
||||
VectorF32<Len, Packing> A,
|
||||
VectorF32<Len, Packing> C
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
return Dot(A, A, C, C);
|
||||
return DotPack(A, A, C, C);
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, Packing*4> Dot(
|
||||
constexpr static VectorF32<1, Packing*4> DotPack(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1,
|
||||
|
|
@ -869,7 +901,7 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, 4> Dot(
|
||||
constexpr static VectorF32<1, 4> DotPack(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1,
|
||||
|
|
@ -914,7 +946,7 @@ namespace Crafter {
|
|||
return row1;
|
||||
}
|
||||
|
||||
constexpr static VectorF32<1, 8> Dot(
|
||||
constexpr static VectorF32<1, 8> DotPack(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1,
|
||||
|
|
@ -1021,7 +1053,7 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
#ifdef __AVX512F__
|
||||
constexpr static VectorF32<1, 15> Dot(
|
||||
constexpr static VectorF32<1, 15> DotPack(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1
|
||||
|
|
@ -1112,8 +1144,8 @@ namespace Crafter {
|
|||
}
|
||||
#endif
|
||||
|
||||
constexpr static VectorF32<1, Packing*2> Dot(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
constexpr static VectorF32<1, Packing*2> DotPack(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, float>::AlignmentElement) {
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||
|
|
@ -1548,9 +1580,10 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto LengthSq(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
constexpr std::uint8_t N = 1 + sizeof...(Rest);
|
||||
constexpr std::uint8_t N = VectorBase<Len, Packing, float>::BatchSize;
|
||||
VectorF32<1, static_cast<std::uint8_t>(Packing * N)> r;
|
||||
std::array<VectorF32<Len, Packing>, N> args{ first, rest... };
|
||||
alignas(16) float buf[4] = {0,0,0,0};
|
||||
|
|
@ -1571,41 +1604,39 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Length(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
auto sq = LengthSq(first, rest...);
|
||||
sq.v = wasm_f32x4_sqrt(sq.v);
|
||||
return sq;
|
||||
}
|
||||
|
||||
// Four pairwise dot products packed into one v128. Only the first Len
|
||||
// Pairwise dot products packed into one v128. Only the first Len
|
||||
// lanes contribute, so the same routine handles 3- and 4-component
|
||||
// inputs — the 4th lane of Len==3 inputs may be garbage from Cross()
|
||||
// and must not be summed.
|
||||
constexpr static VectorF32<1, 4> Dot(
|
||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||
VectorF32<Len, Packing> C0, VectorF32<Len, Packing> C1,
|
||||
VectorF32<Len, Packing> D0, VectorF32<Len, Packing> D1
|
||||
) requires((Len == 3 || Len == 4) && Packing == 1) {
|
||||
alignas(16) float a0[4], a1[4], b0[4], b1[4], c0[4], c1[4], d0[4], d1[4];
|
||||
wasm_v128_store(a0, A0.v); wasm_v128_store(a1, A1.v);
|
||||
wasm_v128_store(b0, B0.v); wasm_v128_store(b1, B1.v);
|
||||
wasm_v128_store(c0, C0.v); wasm_v128_store(c1, C1.v);
|
||||
wasm_v128_store(d0, D0.v); wasm_v128_store(d1, D1.v);
|
||||
|
||||
// and must not be summed. Takes BatchSize pairs (== 4 here since
|
||||
// WASM AlignmentElement is always 4 and Packing must be 1).
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == 2 * VectorBase<Len, Packing, float>::BatchSize) &&
|
||||
(Len == 3 || Len == 4) && Packing == 1)
|
||||
constexpr static VectorF32<1, 4> Dot(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
constexpr std::uint8_t N = VectorBase<Len, Packing, float>::BatchSize;
|
||||
std::array<VectorF32<Len, Packing>, 2 * N> args{ first, rest... };
|
||||
alignas(16) float out[4] = {0,0,0,0};
|
||||
for (std::uint8_t k = 0; k < Len; ++k) {
|
||||
out[0] += a0[k] * a1[k];
|
||||
out[1] += b0[k] * b1[k];
|
||||
out[2] += c0[k] * c1[k];
|
||||
out[3] += d0[k] * d1[k];
|
||||
for (std::uint8_t i = 0; i < N; ++i) {
|
||||
alignas(16) float a[4], b[4];
|
||||
wasm_v128_store(a, args[2 * i].v);
|
||||
wasm_v128_store(b, args[2 * i + 1].v);
|
||||
for (std::uint8_t k = 0; k < Len; ++k) out[i] += a[k] * b[k];
|
||||
}
|
||||
return VectorF32<1, 4>(wasm_v128_load(out));
|
||||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Normalize(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
auto normOne = [](VectorF32<Len, Packing> u) {
|
||||
alignas(16) float tmp[4]; wasm_v128_store(tmp, u.v);
|
||||
|
|
@ -1622,7 +1653,7 @@ namespace Crafter {
|
|||
}
|
||||
return VectorF32<Len, Packing>(wasm_v128_load(out));
|
||||
};
|
||||
return std::make_tuple(normOne(first), normOne(rest)...);
|
||||
return std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize>{ normOne(first), normOne(rest)... };
|
||||
}
|
||||
|
||||
constexpr static VectorF32<Len, Packing> Rotate(VectorF32<3, Packing> v, VectorF32<4, Packing> q) requires(Len == 3) {
|
||||
|
|
@ -1842,9 +1873,10 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto LengthSq(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
constexpr std::uint8_t N = 1 + sizeof...(Rest);
|
||||
constexpr std::uint8_t N = VectorBase<Len, Packing, float>::BatchSize;
|
||||
VectorF32<1, static_cast<std::uint8_t>(Packing * N)> r;
|
||||
std::array<VectorF32<Len, Packing>, N> args{ first, rest... };
|
||||
for (std::uint8_t i = 0; i < N; ++i)
|
||||
|
|
@ -1860,7 +1892,8 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Length(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
auto sq = LengthSq(first, rest...);
|
||||
for (std::uint8_t i = 0; i < decltype(sq)::NElems; ++i) sq.v[i] = std::sqrt(sq.v[i]);
|
||||
|
|
@ -1868,7 +1901,8 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF32<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, float>::BatchSize))
|
||||
constexpr static auto Normalize(VectorF32<Len, Packing> first, Rest... rest) {
|
||||
auto normOne = [](VectorF32<Len, Packing> u) {
|
||||
VectorF32<Len, Packing> out;
|
||||
|
|
@ -1884,7 +1918,7 @@ namespace Crafter {
|
|||
}
|
||||
return out;
|
||||
};
|
||||
return std::make_tuple(normOne(first), normOne(rest)...);
|
||||
return std::array<VectorF32<Len, Packing>, VectorBase<Len, Packing, float>::BatchSize>{ normOne(first), normOne(rest)... };
|
||||
}
|
||||
|
||||
constexpr static VectorF32<Len, Packing> Rotate(VectorF32<3, Packing> v, VectorF32<4, Packing> q) requires(Len == 3) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue