x86 gating

This commit is contained in:
Jorijn van der Graaf 2026-05-18 01:18:30 +02:00
commit 99972d8c81
3 changed files with 22 additions and 4 deletions

View file

@ -32,16 +32,22 @@ namespace Crafter {
}
}
#ifdef __x86_64
using VectorType = std::conditional_t<std::is_same_v<T, _Float16>,
#ifdef __AVX512FP16__
std::conditional_t<(Len * Packing > 16), __m512h,
std::conditional_t<(Len * Packing > 8), __m256h, __m128h>>,
#else
void,
#endif
std::conditional_t<(Len * Packing > 8), __m512,
std::conditional_t<(Len * Packing > 4), __m256, __m128>>
>;
VectorType v;
#endif
public:
@ -100,7 +106,7 @@ namespace Crafter {
template <std::array<std::uint8_t, Len> ShuffleValues>
static consteval std::array<std::uint8_t, Alignment> GetShuffleMaskEpi8() {
std::array<std::uint8_t, Alignment> shuffleMask {{0}};
if constexpr(std::same_as<T, _Float16>) {
if constexpr(sizeof(T) == 2) {
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
for(std::uint8_t i = 0; i < Len; i++) {
shuffleMask[(i2*Len*sizeof(T))+(i*sizeof(T))] = ShuffleValues[i]*sizeof(T)+(i2*Len*sizeof(T));
@ -213,6 +219,8 @@ namespace Crafter {
return shuffleMask;
}
#ifdef __x86_64
#ifdef __AVX512FP16__
template <std::array<bool, Len> ShuffleValues>
static consteval std::uint8_t GetBlendMaskEpi16() requires (std::is_same_v<VectorType, __m128h>){
std::uint8_t mask = 0;
@ -251,6 +259,7 @@ namespace Crafter {
}
return mask;
}
#endif
template <std::array<bool, Len> ShuffleValues>
static consteval std::uint8_t GetBlendMaskEpi32() requires (std::is_same_v<VectorType, __m128>){
@ -290,6 +299,7 @@ namespace Crafter {
}
return mask;
}
#endif
static constexpr float two_over_pi = 0.6366197723675814f;
static constexpr float pi_over_2_hi = 1.5707963267341256f;
@ -310,6 +320,7 @@ namespace Crafter {
static constexpr float s7 = 0.0000027526372f;
static constexpr float s9 = -0.0000000239013f;
#ifdef __x86_64
// --- 128-bit (SSE) helpers ---
static constexpr void range_reduce_f32x4(__m128 ax, __m128& r, __m128& r2, __m128i& q) {
__m128 fq = _mm_round_ps(_mm_mul_ps(ax, _mm_set1_ps(two_over_pi)), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
@ -590,5 +601,6 @@ namespace Crafter {
out_sin = _mm512_xor_ps(out_sin, _mm512_castsi512_ps(_mm512_slli_epi32(sin_neg, 30)));
out_sin = _mm512_xor_ps(out_sin, x_sign);
}
#endif
};
}