packed intersection and matrix
This commit is contained in:
parent
027947cae6
commit
f0becd1582
7 changed files with 948 additions and 557 deletions
|
|
@ -554,9 +554,41 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF16<Len, Packing>, VectorF16<Len, Packing>, VectorF16<Len, Packing>, VectorF16<Len, Packing>> Normalize(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> C,
|
||||
// Public variadic surface — one name per op, arity locked to BatchSize
|
||||
// (or 2*BatchSize for Dot). Forwards to the *Pack helpers below which
|
||||
// carry the SIMD bodies and per-(Len,Packing) requires clauses.
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto Normalize(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
return NormalizePack(first, rest...);
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto Length(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
return LengthPack(first, rest...);
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto LengthSq(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
return LengthSqPack(first, rest...);
|
||||
}
|
||||
|
||||
template <typename... Rest>
|
||||
requires ((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == 2 * VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto Dot(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
return DotPack(first, rest...);
|
||||
}
|
||||
|
||||
private:
|
||||
constexpr static std::array<VectorF16<Len, Packing>, VectorBase<Len, Packing, _Float16>::BatchSize> NormalizePack(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> C,
|
||||
VectorF16<Len, Packing> E,
|
||||
VectorF16<Len, Packing> G
|
||||
) requires(Len == 4 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
|
|
@ -616,8 +648,8 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF16<Len, Packing>, VectorF16<Len, Packing>> Normalize(
|
||||
VectorF16<Len, Packing> A,
|
||||
constexpr static std::array<VectorF16<Len, Packing>, VectorBase<Len, Packing, _Float16>::BatchSize> NormalizePack(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> E
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, _Float16>::VectorType, __m128h>) {
|
||||
|
|
@ -662,13 +694,13 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<1, Packing*4> Length(
|
||||
VectorF16<Len, Packing> A,
|
||||
constexpr static VectorF16<1, Packing*4> LengthPack(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> C,
|
||||
VectorF16<Len, Packing> E,
|
||||
VectorF16<Len, Packing> G
|
||||
) requires(Len == 4 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
VectorF16<1, Packing*4> lenghtSq = LengthSq(A, C, E, G);
|
||||
VectorF16<1, Packing*4> lenghtSq = LengthSqPack(A, C, E, G);
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, _Float16>::VectorType, __m128h>) {
|
||||
return VectorF16<1, Packing*4>(_mm_sqrt_ph(lenghtSq.v));
|
||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, _Float16>::VectorType, __m256h>) {
|
||||
|
|
@ -678,11 +710,11 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<1, Packing*2> Length(
|
||||
VectorF16<Len, Packing> A,
|
||||
constexpr static VectorF16<1, Packing*2> LengthPack(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> E
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
VectorF16<1, Packing*2> lenghtSq = LengthSq(A, E);
|
||||
VectorF16<1, Packing*2> lenghtSq = LengthSqPack(A, E);
|
||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, _Float16>::VectorType, __m128h>) {
|
||||
return VectorF16<1, Packing*2>(_mm_sqrt_ph(lenghtSq.v));
|
||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, _Float16>::VectorType, __m256h>) {
|
||||
|
|
@ -692,23 +724,23 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<1, Packing*4> LengthSq(
|
||||
VectorF16<Len, Packing> A,
|
||||
constexpr static VectorF16<1, Packing*4> LengthSqPack(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> C,
|
||||
VectorF16<Len, Packing> E,
|
||||
VectorF16<Len, Packing> G
|
||||
) requires(Len == 4 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
return Dot(A, A, C, C, E, E, G, G);
|
||||
return DotPack(A, A, C, C, E, E, G, G);
|
||||
}
|
||||
|
||||
constexpr static VectorF16<1, Packing*2> LengthSq(
|
||||
VectorF16<Len, Packing> A,
|
||||
constexpr static VectorF16<1, Packing*2> LengthSqPack(
|
||||
VectorF16<Len, Packing> A,
|
||||
VectorF16<Len, Packing> E
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
return Dot(A, A, E, E);
|
||||
return DotPack(A, A, E, E);
|
||||
}
|
||||
|
||||
constexpr static VectorF16<1, Packing*4> Dot(
|
||||
constexpr static VectorF16<1, Packing*4> DotPack(
|
||||
VectorF16<Len, Packing> A0, VectorF16<Len, Packing> A1,
|
||||
VectorF16<Len, Packing> C0, VectorF16<Len, Packing> C1,
|
||||
VectorF16<Len, Packing> E0, VectorF16<Len, Packing> E1,
|
||||
|
|
@ -744,7 +776,7 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<1, Packing*2> Dot(
|
||||
constexpr static VectorF16<1, Packing*2> DotPack(
|
||||
VectorF16<Len, Packing> A0, VectorF16<Len, Packing> A1,
|
||||
VectorF16<Len, Packing> E0, VectorF16<Len, Packing> E1
|
||||
) requires(Len == 2 && Packing*Len == VectorBase<Len, Packing, _Float16>::AlignmentElement) {
|
||||
|
|
@ -1200,9 +1232,10 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto LengthSq(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
constexpr std::uint8_t N = 1 + sizeof...(Rest);
|
||||
constexpr std::uint8_t N = VectorBase<Len, Packing, _Float16>::BatchSize;
|
||||
VectorF16<1, static_cast<std::uint8_t>(Packing * N)> r;
|
||||
std::array<VectorF16<Len, Packing>, N> args{ first, rest... };
|
||||
for (std::uint8_t i = 0; i < N; ++i)
|
||||
|
|
@ -1218,7 +1251,8 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto Length(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
auto sq = LengthSq(first, rest...);
|
||||
for (std::uint8_t i = 0; i < decltype(sq)::NElems; ++i)
|
||||
|
|
@ -1227,7 +1261,8 @@ namespace Crafter {
|
|||
}
|
||||
|
||||
template<typename... Rest>
|
||||
requires((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...))
|
||||
requires((std::is_same_v<Rest, VectorF16<Len, Packing>> && ...) &&
|
||||
(1 + sizeof...(Rest) == VectorBase<Len, Packing, _Float16>::BatchSize))
|
||||
constexpr static auto Normalize(VectorF16<Len, Packing> first, Rest... rest) {
|
||||
auto normOne = [](VectorF16<Len, Packing> u) {
|
||||
VectorF16<Len, Packing> out;
|
||||
|
|
@ -1243,7 +1278,7 @@ namespace Crafter {
|
|||
}
|
||||
return out;
|
||||
};
|
||||
return std::make_tuple(normOne(first), normOne(rest)...);
|
||||
return std::array<VectorF16<Len, Packing>, VectorBase<Len, Packing, _Float16>::BatchSize>{ normOne(first), normOne(rest)... };
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing> Rotate(VectorF16<3, Packing> v, VectorF16<4, Packing> q) requires(Len == 3) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue