more tests

This commit is contained in:
Jorijn van der Graaf 2026-03-25 00:42:04 +01:00
commit b582e168e3
6 changed files with 460 additions and 288 deletions

View file

@ -92,20 +92,24 @@ namespace Crafter {
template <std::uint32_t BLen, std::uint32_t BPacking>
constexpr operator VectorF16<BLen, BPacking>() const {
if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
return VectorF16<BLen, BPacking>(_mm256_castph256_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
return VectorF16<BLen, BPacking>(_mm512_castph512_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
return VectorF16<BLen, BPacking>(_mm512_castph512_ph256(v));
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
return VectorF16<BLen, BPacking>(_mm256_castph128_ph256(v));
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
return VectorF16<BLen, BPacking>(_mm512_castph128_ph512(v));
} else if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
return VectorF16<BLen, BPacking>(_mm512_castph256_ph512(v));
if(constexpr Len == Blen) {
if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
return VectorF16<BLen, BPacking>(_mm256_castph256_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
return VectorF16<BLen, BPacking>(_mm512_castph512_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
return VectorF16<BLen, BPacking>(_mm512_castph512_ph256(v));
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m256h>) {
return VectorF16<BLen, BPacking>(_mm256_castph128_ph256(v));
} else if constexpr(std::is_same_v<VectorType, __m128h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
return VectorF16<BLen, BPacking>(_mm512_castph128_ph512(v));
} else if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m512h>) {
return VectorF16<BLen, BPacking>(_mm512_castph256_ph512(v));
} else {
return VectorF16<BLen, BPacking>(v);
}
} else {
return VectorF16<BLen, BPacking>(v);
return ExtractLo<BLen>();
}
}
@ -364,13 +368,14 @@ namespace Crafter {
template <std::array<bool, Len> values>
constexpr VectorF16<Len, Packing> Negate() {
std::array<std::uint16_t, Len> mask = GetNegateMask<values>();
std::array<std::uint16_t, Alignment> mask = GetNegateMask<values>();
std::println("{}", mask);
if constexpr(std::is_same_v<VectorType, __m128h>) {
return VectorF16<Len, Packing>(_mm_castsi128_ph(_mm_xor_si128(_mm_castph_si128(v), _mm_loadu_epi16(mask.data()))));
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
return VectorF16<Len, Packing>(_mm256_castsi2568_ph(_mm256_xor_si256(_mm256_castph_si256(v), _mm_loadu_epi16(mask.data()))));
return VectorF16<Len, Packing>(_mm256_castsi256_ph(_mm256_xor_si256(_mm256_castph_si256(v), _mm256_loadu_epi16(mask.data()))));
} else {
return VectorF16<Len, Packing>(_mm512_castsi512_ph(_mm512_xor_si256(_mm512_castph_si512(v), _mm_loadu_epi16(mask.data()))));
return VectorF16<Len, Packing>(_mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(v), _mm512_loadu_epi16(mask.data()))));
}
}
@ -1170,14 +1175,14 @@ namespace Crafter {
static_assert(false, "No __AVX512BW__ and __AVX512VL__ support");
#endif
#endif
return _mm256_castsi256_ph(_mm256_mask_blend_epi16(_mm256_castph_si256(a.v), _mm256_castph_si256(b.v), GetBlendMaskEpi16<ShuffleValues>()));
return _mm256_castsi256_ph(_mm256_mask_blend_epi16(GetBlendMaskEpi16<ShuffleValues>(), _mm256_castph_si256(a.v), _mm256_castph_si256(b.v)));
} else {
return _mm512_castsi512_ph(_mm512_blend_epi16(GetBlendMaskEpi16<ShuffleValues>(), _mm512_castph_si512(a.v), _mm512_castph_si512(b.v)));
return _mm512_castsi512_ph(_mm512_mask_blend_epi16(GetBlendMaskEpi16<ShuffleValues>(), _mm512_castph_si512(a.v), _mm512_castph_si512(b.v)));
}
}
constexpr static VectorF16<Len, Packing> Rotate(VectorF16<3, Packing> v, VectorF16<4, Packing> q) requires(Len == 3) {
VectorF16<3, Packing> qv(q.v);
VectorF16<3, Packing> qv(q);
VectorF16<Len, Packing> t = Cross(qv, v) * _Float16(2);
return v + t * q.template Shuffle<{{3,3,3,3}}>() + Cross(qv, t);
}
@ -1226,21 +1231,23 @@ namespace Crafter {
}
private:
template <std::array<bool, Len> values>
static consteval std::array<std::uint16_t, Len> GetNegateMask() {
std::array<std::uint16_t, Len> mask;
for(std::uint8_t i = 0; i < Len; i++) {
if(values[i]) {
mask[i] = 0b1000000000000000;
} else {
mask[i] = 0;
static consteval std::array<std::uint16_t, Alignment> GetNegateMask() {
std::array<std::uint16_t, Alignment> mask{0};
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
for(std::uint8_t i = 0; i < Len; i++) {
if(values[i]) {
mask[i2*Len+i] = 0b1000000000000000;
} else {
mask[i2*Len+i] = 0;
}
}
}
return mask;
}
static consteval std::array<std::uint16_t, Len> GetNegateMaskAll() {
std::array<std::uint16_t, Len> mask;
for(std::uint8_t i = 0; i < Len; i++) {
static consteval std::array<std::uint16_t, Alignment> GetNegateMaskAll() {
std::array<std::uint16_t, Alignment> mask{0};
for(std::uint8_t i = 0; i < Packing*Len; i++) {
mask[i] = 0b1000000000000000;
}
return mask;