more F16 math
This commit is contained in:
parent
c54ff6228c
commit
f1fbbe0faf
3 changed files with 82 additions and 42 deletions
|
|
@ -580,7 +580,7 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>> Normalize(
|
||||
constexpr static std::tuple<VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>> Normalize(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> C,
|
||||
VectorF16<Len, Packing, Repeats> E,
|
||||
|
|
@ -705,15 +705,15 @@ namespace Crafter {
|
|||
__m512h fLenghtG = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecG));
|
||||
|
||||
return {
|
||||
_mm512_mul_ph(A.v, fLenghtA),
|
||||
_mm512_mul_ph(C.v, fLenghtC),
|
||||
_mm512_mul_ph(E.v, fLenghtE),
|
||||
_mm512_mul_ph(G.v, fLenghtG),
|
||||
VectorF16<Len, Packing, Repeats>(_mm512_mul_ph(A.v, fLenghtA)),
|
||||
VectorF16<Len, Packing, Repeats>(_mm512_mul_ph(C.v, fLenghtC)),
|
||||
VectorF16<Len, Packing, Repeats>(_mm512_mul_ph(E.v, fLenghtE)),
|
||||
VectorF16<Len, Packing, Repeats>(_mm512_mul_ph(G.v, fLenghtG)),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
constexpr static std::tuple<VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>> Normalize(
|
||||
constexpr static std::tuple<VectorF16<Len, Packing, Repeats>, VectorF16<Len, Packing, Repeats>> Normalize(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> E
|
||||
) requires(Packing == 4) {
|
||||
|
|
@ -932,6 +932,36 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing, Repeats> Length(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> C,
|
||||
VectorF16<Len, Packing, Repeats> E,
|
||||
VectorF16<Len, Packing, Repeats> G
|
||||
) requires(Packing == 2) {
|
||||
VectorF16<Len, Packing, Repeats> lenghtSq = LengthSq(A, C, E, G);
|
||||
if constexpr(std::is_same_v<VectorType, __m128h>) {
|
||||
return VectorF16<Len, Packing, Repeats>(_mm_sqrt_ph(lenghtSq.v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
|
||||
return VectorF16<Len, Packing, Repeats>(_mm256_sqrt_ph(lenghtSq.v));
|
||||
} else {
|
||||
return VectorF16<Len, Packing, Repeats>(_mm512_sqrt_ph(lenghtSq.v));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing, Repeats> Length(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> E
|
||||
) requires(Packing == 2) {
|
||||
VectorF16<Len, Packing, Repeats> lenghtSq = LengthSq(A, E);
|
||||
if constexpr(std::is_same_v<VectorType, __m128h>) {
|
||||
return VectorF16<Len, Packing, Repeats>(_mm_sqrt_ph(lenghtSq.v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
|
||||
return VectorF16<Len, Packing, Repeats>(_mm256_sqrt_ph(lenghtSq.v));
|
||||
} else {
|
||||
return VectorF16<Len, Packing, Repeats>(_mm512_sqrt_ph(lenghtSq.v));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing, Repeats> LengthSq(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> B,
|
||||
|
|
@ -945,6 +975,22 @@ namespace Crafter {
|
|||
return Dot(A, A, B, B, C, C, D, D, E, E, F, F, G, G, H, H);
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing, Repeats> LengthSq(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> C,
|
||||
VectorF16<Len, Packing, Repeats> E,
|
||||
VectorF16<Len, Packing, Repeats> G
|
||||
) requires(Packing == 2) {
|
||||
return Dot(A, A, C, C, E, E, G, G);
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing, Repeats> LengthSq(
|
||||
VectorF16<Len, Packing, Repeats> A,
|
||||
VectorF16<Len, Packing, Repeats> E
|
||||
) requires(Packing == 4) {
|
||||
return Dot(A, A, E, E);
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing, Repeats> Dot(
|
||||
VectorF16<Len, Packing, Repeats> A0, VectorF16<Len, Packing, Repeats> A1,
|
||||
VectorF16<Len, Packing, Repeats> B0, VectorF16<Len, Packing, Repeats> B1,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue