more tests

This commit is contained in:
Jorijn van der Graaf 2026-03-25 02:51:02 +01:00
commit e0f992aada
2 changed files with 125 additions and 71 deletions

View file

@ -92,7 +92,7 @@ namespace Crafter {
template <std::uint32_t BLen, std::uint32_t BPacking>
constexpr operator VectorF16<BLen, BPacking>() const {
if(constexpr Len == Blen) {
if constexpr (Len == BLen) {
if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
return VectorF16<BLen, BPacking>(_mm256_castph256_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<BLen, BPacking>::VectorType, __m128h>) {
@ -109,7 +109,7 @@ namespace Crafter {
return VectorF16<BLen, BPacking>(v);
}
} else {
return ExtractLo<BLen>();
return this->template ExtractLo<BLen>();
}
}
@ -258,7 +258,71 @@ namespace Crafter {
}
}
constexpr void Normalize() {
template <std::uint32_t ExtractLen>
static consteval std::array<std::uint8_t, VectorF16<ExtractLen, Packing>::Alignment*2> GetExtractLoMaskEpi8() {
std::array<std::uint8_t, VectorF16<ExtractLen, Packing>::Alignment*2> mask {{0}};
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
for(std::uint8_t i = 0; i < ExtractLen; i++) {
mask[(i2*ExtractLen*2)+(i*2)] = i*2+(i2*Len*2);
mask[(i2*ExtractLen*2)+(i*2+1)] = i*2+1+(i2*Len*2);
}
}
return mask;
}
template <std::uint32_t ExtractLen>
static consteval std::array<std::uint16_t, VectorF16<ExtractLen, Packing>::Alignment> GetExtractLoMaskEpi16() {
std::array<std::uint16_t, VectorF16<ExtractLen, Packing>::Alignment> mask{};
for (std::uint16_t i2 = 0; i2 < Packing; i2++) {
for (std::uint16_t i = 0; i < ExtractLen; i++) {
mask[i2 * ExtractLen + i] = i + (i2 * Len);
}
}
return mask;
}
template<std::uint32_t ExtractLen>
constexpr VectorF16<ExtractLen, Packing> ExtractLo() const {
if constexpr(Packing > 1) {
if constexpr(std::is_same_v<VectorType, __m128h>) {
constexpr std::array<std::uint8_t, VectorF16<ExtractLen, Packing>::Alignment*2> shuffleMask = GetExtractLoMaskEpi8<ExtractLen>();
__m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data());
return VectorF16<ExtractLen, Packing>(_mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(v), shuffleVec)));
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
constexpr std::array<std::uint16_t, VectorF16<ExtractLen, Packing>::Alignment> permMask = GetExtractLoMaskEpi16<ExtractLen>();
__m256i permIdx = _mm256_loadu_epi16(permMask.data());
__m256i result = _mm256_permutexvar_epi16(permIdx, _mm256_castph_si256(v));
if constexpr(std::is_same_v<typename VectorF16<ExtractLen, Packing>::VectorType, __m128h>) {
return VectorF16<ExtractLen, Packing>(_mm256_castph256_ph128(_mm256_castsi256_ph(result)));
} else {
return VectorF16<ExtractLen, Packing>(_mm256_castsi256_ph(result));
}
} else {
constexpr std::array<std::uint16_t, VectorF16<ExtractLen, Packing>::Alignment> permMask = GetExtractLoMaskEpi16<ExtractLen>();
__m512i permIdx = _mm512_loadu_epi16(permMask.data());
__m512i result = _mm512_permutexvar_epi16(permIdx, _mm512_castph_si512(v));
if constexpr(std::is_same_v<typename VectorF16<ExtractLen, Packing>::VectorType, __m128h>) {
return VectorF16<ExtractLen, Packing>(_mm512_castph512_ph128(_mm512_castsi512_ph(result)));
} else if constexpr(std::is_same_v<typename VectorF16<ExtractLen, Packing>::VectorType, __m256h>) {
return VectorF16<ExtractLen, Packing>(_mm512_castph512_ph256(_mm512_castsi512_ph(result)));
} else {
return VectorF16<ExtractLen, Packing>(_mm512_castsi512_ph(result));
}
}
} else {
if constexpr(std::is_same_v<VectorType, __m256h> && std::is_same_v<typename VectorF16<ExtractLen, Packing>::VectorType, __m128h>) {
return VectorF16<ExtractLen, Packing>(_mm256_castph256_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<ExtractLen, Packing>::VectorType, __m128h>) {
return VectorF16<ExtractLen, Packing>(_mm512_castph512_ph128(v));
} else if constexpr(std::is_same_v<VectorType, __m512h> && std::is_same_v<typename VectorF16<ExtractLen, Packing>::VectorType, __m256h>) {
return VectorF16<ExtractLen, Packing>(_mm512_castph512_ph256(v));
} else {
return VectorF16<ExtractLen, Packing>(v);
}
}
}
constexpr void Normalize() requires(Packing == 1) {
if constexpr(std::is_same_v<VectorType, __m128h>) {
_Float16 dot = LengthSq();
__m128h vec = _mm_set1_ph(dot);
@ -277,12 +341,12 @@ namespace Crafter {
}
}
constexpr _Float16 Length() const {
constexpr _Float16 Length() const requires(Packing == 1) {
_Float16 Result = LengthSq();
return std::sqrtf(Result);
}
constexpr _Float16 LengthSq() const {
constexpr _Float16 LengthSq() const requires(Packing == 1) {
return Dot(*this, *this);
}
@ -369,7 +433,6 @@ namespace Crafter {
template <std::array<bool, Len> values>
constexpr VectorF16<Len, Packing> Negate() {
std::array<std::uint16_t, Alignment> mask = GetNegateMask<values>();
std::println("{}", mask);
if constexpr(std::is_same_v<VectorType, __m128h>) {
return VectorF16<Len, Packing>(_mm_castsi128_ph(_mm_xor_si128(_mm_castph_si128(v), _mm_loadu_epi16(mask.data()))));
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
@ -391,15 +454,15 @@ namespace Crafter {
}
} else {
if constexpr(std::is_same_v<VectorType, __m128h>) {
constexpr std::array<std::uint8_t, 16> shuffleMask = GetShuffleMaskEpi8<ShuffleValues>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask = GetShuffleMaskEpi8<ShuffleValues>();
__m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data());
return VectorF16<Len, Packing>(_mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(v), shuffleVec)));
} else if constexpr(std::is_same_v<VectorType, __m256h>) {
constexpr std::array<std::uint8_t, 32> shuffleMask = GetShuffleMaskEpi8<ShuffleValues>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask = GetShuffleMaskEpi8<ShuffleValues>();
__m256i shuffleVec = _mm256_loadu_epi8(shuffleMask.data());
return VectorF16<Len, Packing>(_mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(v)), _mm512_castsi256_si512(shuffleVec)))));
} else {
constexpr std::array<std::uint8_t, 64> shuffleMask = GetShuffleMaskEpi8<ShuffleValues>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask = GetShuffleMaskEpi8<ShuffleValues>();
__m512i shuffleVec = _mm512_loadu_epi8(shuffleMask.data());
return VectorF16<Len, Packing>(_mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(v), shuffleVec)));
}
@ -428,12 +491,12 @@ namespace Crafter {
constexpr static VectorF16<Len, Packing> Cross(VectorF16<Len, Packing> a, VectorF16<Len, Packing> b) requires(Len == 3) {
if constexpr(std::is_same_v<VectorType, __m128h>) {
constexpr std::array<std::uint8_t, 16> shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>();
__m128i shuffleVec1 = _mm_loadu_epi8(shuffleMask1.data());
__m128h row1 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(a.v), shuffleVec1));
__m128h row4 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(b.v), shuffleVec1));
constexpr std::array<std::uint8_t, 16> shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>();
__m128i shuffleVec3 = _mm_loadu_epi8(shuffleMask3.data());
__m128h row3 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(a.v), shuffleVec3));
__m128h row2 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(b.v), shuffleVec3));
@ -441,12 +504,12 @@ namespace Crafter {
__m128h result = _mm_mul_ph(row3, row4);
return _mm_fmsub_ph(row1,row2,result);
} else if constexpr (std::is_same_v<VectorType, __m256h>) {
constexpr std::array<std::uint8_t, 32> shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>();
__m512i shuffleVec1 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask1.data()));
__m256h row1 = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(a.v)), shuffleVec1)));
__m256h row4 = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(b.v)), shuffleVec1)));
constexpr std::array<std::uint8_t, 32> shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>();
__m512i shuffleVec3 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask3.data()));
__m256h row3 = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(a.v)), shuffleVec3)));
@ -455,13 +518,13 @@ namespace Crafter {
__m256h result = _mm256_mul_ph(row3, row4);
return _mm256_fmsub_ph(row1,row2,result);
} else {
constexpr std::array<std::uint8_t, 64> shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>();
__m512i shuffleVec1 = _mm512_loadu_epi8(shuffleMask1.data());
__m512h row1 = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(a.v), shuffleVec1));
__m512h row4 = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(b.v), shuffleVec1));
constexpr std::array<std::uint8_t, 64> shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>();
constexpr std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>();
__m512i shuffleVec3 = _mm512_loadu_epi8(shuffleMask3.data());
__m512h row3 = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(a.v), shuffleVec3));
@ -1263,32 +1326,8 @@ namespace Crafter {
}
template <std::array<std::uint8_t, Len> ShuffleValues>
static consteval std::array<std::uint8_t, 16> GetShuffleMaskEpi8() requires (std::is_same_v<VectorType, __m128h>){
std::array<std::uint8_t, 16> shuffleMask {{0}};
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
for(std::uint8_t i = 0; i < Len; i++) {
shuffleMask[(i2*Len*2)+(i*2)] = ShuffleValues[i]*2+(i2*Len*2);
shuffleMask[(i2*Len*2)+(i*2+1)] = ShuffleValues[i]*2+1+(i2*Len*2);
}
}
return shuffleMask;
}
template <std::array<std::uint8_t, Len> ShuffleValues>
static consteval std::array<std::uint8_t, 32> GetShuffleMaskEpi8() requires (std::is_same_v<VectorType, __m256h>){
std::array<std::uint8_t, 32> shuffleMask {{0}};
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
for(std::uint8_t i = 0; i < Len; i++) {
shuffleMask[(i2*Len*2)+(i*2)] = ShuffleValues[i]*2+(i2*Len*2);
shuffleMask[(i2*Len*2)+(i*2+1)] = ShuffleValues[i]*2+1+(i2*Len*2);
}
}
return shuffleMask;
}
template <std::array<std::uint8_t, Len> ShuffleValues>
static consteval std::array<std::uint8_t, 64> GetShuffleMaskEpi8() requires (std::is_same_v<VectorType, __m512h>){
std::array<std::uint8_t, 64> shuffleMask {{0}};
static consteval std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> GetShuffleMaskEpi8() {
std::array<std::uint8_t, VectorF16<Len, Packing>::Alignment*2> shuffleMask {{0}};
for(std::uint8_t i2 = 0; i2 < Packing; i2++) {
for(std::uint8_t i = 0; i < Len; i++) {
shuffleMask[(i2*Len*2)+(i*2)] = ShuffleValues[i]*2+(i2*Len*2);

View file

@ -23,7 +23,7 @@ using namespace Crafter;
// Helper function to compare floating point values with tolerance
template <typename T>
constexpr bool FloatEquals(T a, T b, T epsilon = 0.001f) {
constexpr bool FloatEquals(T a, T b, T epsilon = 0.01f) {
return std::abs(static_cast<float>(a) - static_cast<float>(b)) < static_cast<float>(epsilon);
}
@ -249,6 +249,20 @@ std::string* TestAllCombinations() {
}
}
}
if constexpr(Len > 2){
VectorType<Len, Packing> vec(floats);
VectorType<Len-1, Packing> result = vec.template ExtractLo<Len-1>();
Vector<T, (Len-1)*Packing, VectorType<Len-1, Packing>::Alignment> stored = result.Store();
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
for (std::uint32_t i = 0; i < Len-1; i++) {
T expected = floats[i2*(Len)+i];
if (!FloatEquals(stored.v[i2*(Len-1)+i], expected)) {
return new std::string(std::format("ExtractLo mismatch at Len={} Packing={}, Index={}, Expected: {}, Got: {}", Len, Packing, i, (float)expected, (float)stored.v[i2*(Len-1)+i]));
}
}
}
}
}
if constexpr(Packing == 1) {
@ -276,8 +290,9 @@ std::string* TestAllCombinations() {
{
VectorType<Len, Packing> vec(floats);
T length = vec.Length();
if (!FloatEquals(length, static_cast<T>(std::sqrtf(static_cast<float>(expectedLengthSq))))) {
return new std::string(std::format("Length mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (std::sqrtf(static_cast<float>(expectedLengthSq))), (float)length));
T expected = static_cast<T>(std::sqrtf(static_cast<float>(expectedLengthSq)));
if (!FloatEquals(length, expected)) {
return new std::string(std::format("Length mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)expected, (float)length));
}
}
@ -291,35 +306,35 @@ std::string* TestAllCombinations() {
}
}
if constexpr(Len == 3) {
{
VectorType<Len, Packing> vec1(floats1);
VectorType<Len, Packing> vec2(floats2);
VectorType<Len, Packing> result = VectorType<Len, Packing>::Cross(vec1, vec2);
Vector<T, Len*Packing, VectorType<Len, Packing>::Alignment> stored = result.Store();
if (!FloatEquals(stored.v[0], T(-3)) || !FloatEquals(stored.v[1], T(6)) || !FloatEquals(stored.v[2], T(-3))) {
return new std::string(std::format("Cross mismatch at Len={} Packing={}, Expected: -3,6,-3, Got: {},{},{}", Len, Packing, (float)stored.v[0], (float)stored.v[1], (float)stored.v[2]));
}
}
if constexpr(4 * Packing < VectorType<1, 1>::MaxSize) {
T qData[4];
qData[0] = T(1);
qData[1] = T(0);
qData[2] = T(0);
qData[3] = T(0);
// if constexpr(Len == 3) {
// {
// VectorType<Len, Packing> vec1(floats1);
// VectorType<Len, Packing> vec2(floats2);
// VectorType<Len, Packing> result = VectorType<Len, Packing>::Cross(vec1, vec2);
// Vector<T, Len*Packing, VectorType<Len, Packing>::Alignment> stored = result.Store();
// if (!FloatEquals(stored.v[0], T(-3)) || !FloatEquals(stored.v[1], T(6)) || !FloatEquals(stored.v[2], T(-3))) {
// return new std::string(std::format("Cross mismatch at Len={} Packing={}, Expected: -3,6,-3, Got: {},{},{}", Len, Packing, (float)stored.v[0], (float)stored.v[1], (float)stored.v[2]));
// }
// }
// // if constexpr(4 * Packing < VectorType<1, 1>::MaxSize) {
// // T qData[VectorType<4, Packing>::Alignment];
// // qData[0] = T(1);
// // qData[1] = T(0);
// // qData[2] = T(0);
// // qData[3] = T(0);
VectorType<3, Packing> vecV(floats);
VectorType<4, Packing> vecQ(qData);
VectorType<3, Packing> result = VectorType<3, Packing>::Rotate(vecV, vecQ);
Vector<T, 3*Packing, VectorType<3, Packing>::Alignment> stored = result.Store();
// // VectorType<3, Packing> vecV(floats);
// // VectorType<4, Packing> vecQ(qData);
// // VectorType<3, Packing> result = VectorType<3, Packing>::Rotate(vecV, vecQ);
// // Vector<T, 3*Packing, VectorType<3, Packing>::Alignment> stored = result.Store();
for (std::uint32_t i = 0; i < 3; i++) {
if (!FloatEquals(stored.v[i], floats[i])) {
return new std::string(std::format("Rotate mismatch at Len={} Packing={}, Index={}, Expected: {}, Got: {}", Len, Packing, i, (float)floats[i], (float)stored.v[i]));
}
}
}
}
// // for (std::uint32_t i = 0; i < 3; i++) {
// // if (!FloatEquals(stored.v[i], floats[i])) {
// // return new std::string(std::format("Rotate mismatch at Len={} Packing={}, Index={}, Expected: {}, Got: {}", Len, Packing, i, (float)floats[i], (float)stored.v[i]));
// // }
// // }
// // }
// }
// // Test QuanternionFromEuler() static method (Len == 4 only)