more tests
This commit is contained in:
parent
6b15089e24
commit
b582e168e3
6 changed files with 460 additions and 288 deletions
|
|
@ -29,11 +29,10 @@ namespace Crafter {
|
|||
return degrees * (std::numbers::pi / 180);
|
||||
}
|
||||
|
||||
#ifdef __AVX512FP16__
|
||||
#ifdef __x86_64
|
||||
#ifndef __AVX512FP16__
|
||||
export template <std::uint32_t Len, std::uint32_t Packing>
|
||||
using VectorF16L = VectorF16<Len, Packing>;
|
||||
#else
|
||||
export template <std::uint32_t Len, std::uint32_t Packing>
|
||||
using VectorF16L = VectorF32<Len, Packing>;
|
||||
using VectorF16 = VectorF32<Len, Packing>;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
@ -439,13 +439,13 @@ namespace Crafter {
|
|||
return q;
|
||||
}
|
||||
|
||||
constexpr static Vector<T, 4, Aligment> QuanternionFromEuler(T roll, T pitch, T yaw) {
|
||||
T cr = std::cos(roll * 0.5);
|
||||
T sr = std::sin(roll * 0.5);
|
||||
T cp = std::cos(pitch * 0.5);
|
||||
T sp = std::sin(pitch * 0.5);
|
||||
T cy = std::cos(yaw * 0.5);
|
||||
T sy = std::sin(yaw * 0.5);
|
||||
constexpr static Vector<T, 4, Aligment> QuanternionFromEuler(T rollHalf, T pitchHalf, T yawHalf) {
|
||||
T cr = std::cos(rollHalf);
|
||||
T sr = std::sin(rollHalf);
|
||||
T cp = std::cos(pitchHalf);
|
||||
T sp = std::sin(pitchHalf);
|
||||
T cy = std::cos(yawHalf);
|
||||
T sy = std::sin(yawHalf);
|
||||
|
||||
return Vector<T, 4, Aligment>(
|
||||
sr * cp * cy - cr * sp * sy,
|
||||
|
|
|
|||
|
|
@ -92,20 +92,24 @@ namespace Crafter {
|
|||
|
||||
template <std::uint32_t BLen, std::uint32_t BPacking>
|
||||
constexpr operator VectorF16<BLen, BPacking>() const {
|
||||
if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm256_castph256_ph128(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph512_ph128(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph512_ph256(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm256_castph128_ph256(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph128_ph512(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph256_ph512(v));
|
||||
if(constexpr Len == Blen) {
|
||||
if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm256_castph256_ph128(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph512_ph128(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph512_ph256(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm256_castph128_ph256(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph128_ph512(v));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
|
||||
return VectorF16<BLen, BPacking>(_mm512_castph256_ph512(v));
|
||||
} else {
|
||||
return VectorF16<BLen, BPacking>(v);
|
||||
}
|
||||
} else {
|
||||
return VectorF16<BLen, BPacking>(v);
|
||||
return ExtractLo<BLen>();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -364,13 +368,14 @@ namespace Crafter {
|
|||
|
||||
template <std::array<bool, Len> values>
|
||||
constexpr VectorF16<Len, Packing> Negate() {
|
||||
std::array<std::uint16_t, Len> mask = GetNegateMask<values>();
|
||||
std::array<std::uint16_t, Alignment> mask = GetNegateMask<values>();
|
||||
std::println("{}", mask);
|
||||
if constexpr(std::is_same_v<VectorType, __m128h>) {
|
||||
return VectorF16<Len, Packing>(_mm_castsi128_ph(_mm_xor_si128(_mm_castph_si128(v), _mm_loadu_epi16(mask.data()))));
|
||||
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
|
||||
return VectorF16<Len, Packing>(_mm256_castsi2568_ph(_mm256_xor_si256(_mm256_castph_si256(v), _mm_loadu_epi16(mask.data()))));
|
||||
return VectorF16<Len, Packing>(_mm256_castsi256_ph(_mm256_xor_si256(_mm256_castph_si256(v), _mm256_loadu_epi16(mask.data()))));
|
||||
} else {
|
||||
return VectorF16<Len, Packing>(_mm512_castsi512_ph(_mm512_xor_si256(_mm512_castph_si512(v), _mm_loadu_epi16(mask.data()))));
|
||||
return VectorF16<Len, Packing>(_mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(v), _mm512_loadu_epi16(mask.data()))));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1170,14 +1175,14 @@ namespace Crafter {
|
|||
static_assert(false, "No __AVX512BW__ and __AVX512VL__ support");
|
||||
#endif
|
||||
#endif
|
||||
return _mm256_castsi256_ph(_mm256_mask_blend_epi16(_mm256_castph_si256(a.v), _mm256_castph_si256(b.v), GetBlendMaskEpi16<ShuffleValues>()));
|
||||
return _mm256_castsi256_ph(_mm256_mask_blend_epi16(GetBlendMaskEpi16<ShuffleValues>(), _mm256_castph_si256(a.v), _mm256_castph_si256(b.v)));
|
||||
} else {
|
||||
return _mm512_castsi512_ph(_mm512_blend_epi16(GetBlendMaskEpi16<ShuffleValues>(), _mm512_castph_si512(a.v), _mm512_castph_si512(b.v)));
|
||||
return _mm512_castsi512_ph(_mm512_mask_blend_epi16(GetBlendMaskEpi16<ShuffleValues>(), _mm512_castph_si512(a.v), _mm512_castph_si512(b.v)));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr static VectorF16<Len, Packing> Rotate(VectorF16<3, Packing> v, VectorF16<4, Packing> q) requires(Len == 3) {
|
||||
VectorF16<3, Packing> qv(q.v);
|
||||
VectorF16<3, Packing> qv(q);
|
||||
VectorF16<Len, Packing> t = Cross(qv, v) * _Float16(2);
|
||||
return v + t * q.template Shuffle<{{3,3,3,3}}>() + Cross(qv, t);
|
||||
}
|
||||
|
|
@ -1226,21 +1231,23 @@ namespace Crafter {
|
|||
}
|
||||
private:
|
||||
template <std::array<bool, Len> values>
|
||||
static consteval std::array<std::uint16_t, Len> GetNegateMask() {
|
||||
std::array<std::uint16_t, Len> mask;
|
||||
for(std::uint8_t i = 0; i < Len; i++) {
|
||||
if(values[i]) {
|
||||
mask[i] = 0b1000000000000000;
|
||||
} else {
|
||||
mask[i] = 0;
|
||||
static consteval std::array<std::uint16_t, Alignment> GetNegateMask() {
|
||||
std::array<std::uint16_t, Alignment> mask{0};
|
||||
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
|
||||
for(std::uint8_t i = 0; i < Len; i++) {
|
||||
if(values[i]) {
|
||||
mask[i2*Len+i] = 0b1000000000000000;
|
||||
} else {
|
||||
mask[i2*Len+i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
static consteval std::array<std::uint16_t, Len> GetNegateMaskAll() {
|
||||
std::array<std::uint16_t, Len> mask;
|
||||
for(std::uint8_t i = 0; i < Len; i++) {
|
||||
static consteval std::array<std::uint16_t, Alignment> GetNegateMaskAll() {
|
||||
std::array<std::uint16_t, Alignment> mask{0};
|
||||
for(std::uint8_t i = 0; i < Packing*Len; i++) {
|
||||
mask[i] = 0b1000000000000000;
|
||||
}
|
||||
return mask;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue