diff --git a/interfaces/Crafter.Math-Vector.cppm b/interfaces/Crafter.Math-Vector.cppm index d8618cf..87703b6 100755 --- a/interfaces/Crafter.Math-Vector.cppm +++ b/interfaces/Crafter.Math-Vector.cppm @@ -458,34 +458,24 @@ namespace Crafter { } -template -struct std::formatter> : std::formatter { - auto format(const Crafter::Vector& obj, format_context& ctx) const { - return std::formatter::format(std::format("{{{}, {}}}", - obj.x, obj.y - ), ctx); + +template +struct std::formatter> : std::formatter { + auto format(const Crafter::Vector& vec, format_context& ctx) const { + std::string out = "{"; + for(std::uint32_t i2 = 0; i2 < Len; i2++) { + if constexpr(std::same_as) { + out += std::format("{}", static_cast(vec.v[i2])); + } else { + out += std::format("{}", vec.v[i2]); + } + if (i2 + 1 < Len) out += ","; + } + out += "}"; + return std::formatter::format(out, ctx); } }; -template -struct std::formatter> : std::formatter { - auto format(const Crafter::Vector& obj, format_context& ctx) const { - return std::formatter::format(std::format("{{{}, {}, {}}}", - obj.x, obj.y, obj.z - ), ctx); - } -}; - -template -struct std::formatter> : std::formatter { - auto format(const Crafter::Vector& obj, format_context& ctx) const { - return std::formatter::format(std::format("{{{}, {}, {}, {}}}", - (float)obj.x, (float)obj.y, (float)obj.z, (float)obj.w - ), ctx); - } -}; - - template constexpr Crafter::Vector operator*(BT b, const Crafter::Vector& v) requires std::is_arithmetic_v { return v * b; diff --git a/interfaces/Crafter.Math-VectorF16.cppm b/interfaces/Crafter.Math-VectorF16.cppm index 20e17ab..8b5d9d7 100755 --- a/interfaces/Crafter.Math-VectorF16.cppm +++ b/interfaces/Crafter.Math-VectorF16.cppm @@ -45,6 +45,8 @@ namespace Crafter { VectorType v; public: + template + friend class VectorF16; static constexpr std::uint32_t MaxSize = 32; static constexpr std::uint8_t Alignment = GetAlingment(); static_assert(Len * Packing <= MaxSize, "Len * Packing exceeds MaxSize"); @@ -256,18 +258,18 @@ namespace Crafter { if constexpr(std::is_same_v) { _Float16 dot = LengthSq(); __m128h vec = _mm_set1_ph(dot); - __m128h sqrt = _mm_rsqrt_ph(vec); - v = _mm_div_ps(v, sqrt); + __m128h sqrt = _mm_sqrt_ph(vec); + v = _mm_div_ph(v, sqrt); } else if constexpr(std::is_same_v) { _Float16 dot = LengthSq(); __m256h vec = _mm256_set1_ph(dot); - __m256h sqrt = _mm256_rsqrt_ph(vec); - v = _mm256_div_ps(v, sqrt); + __m256h sqrt = _mm256_sqrt_ph(vec); + v = _mm256_div_ph(v, sqrt); } else { _Float16 dot = LengthSq(); __m512h vec = _mm512_set1_ph(dot); - __m512h sqrt = _mm512_rsqrt_ph(vec); - v = _mm512_div_ps(v, sqrt); + __m512h sqrt = _mm512_sqrt_ph(vec); + v = _mm512_div_ph(v, sqrt); } } @@ -323,42 +325,42 @@ namespace Crafter { } } - std::tuple, VectorF16> SinCos() { - if constexpr (std::is_same_v) { - __m256 wide = _mm256_cvtph_ps(_mm_castph_si128(v)); - __m256 s, c; - sincos_f32x8(wide, s, c); - return { - VectorF16(_mm_castsi128_ph(_mm256_cvtps_ph(s, _MM_FROUND_TO_NEAREST_INT))), - VectorF16(_mm_castsi128_ph(_mm256_cvtps_ph(c, _MM_FROUND_TO_NEAREST_INT))) - }; + std::tuple, VectorF16> SinCos() { + if constexpr (std::is_same_v) { + __m256 wide = _mm256_cvtph_ps(_mm_castph_si128(v)); + __m256 s, c; + sincos_f32x8(wide, s, c); + return { + VectorF16(_mm_castsi128_ph(_mm256_cvtps_ph(s, _MM_FROUND_TO_NEAREST_INT))), + VectorF16(_mm_castsi128_ph(_mm256_cvtps_ph(c, _MM_FROUND_TO_NEAREST_INT))) + }; - } else if constexpr (std::is_same_v) { - __m512 wide = _mm512_cvtph_ps(_mm256_castph_si256(v)); - __m512 s, c; - sincos_f32x16(wide, s, c); - return { - VectorF16(_mm256_castsi256_ph(_mm512_cvtps_ph(s, _MM_FROUND_TO_NEAREST_INT))), - VectorF16(_mm256_castsi256_ph(_mm512_cvtps_ph(c, _MM_FROUND_TO_NEAREST_INT))) - }; + } else if constexpr (std::is_same_v) { + __m512 wide = _mm512_cvtph_ps(_mm256_castph_si256(v)); + __m512 s, c; + sincos_f32x16(wide, s, c); + return { + VectorF16(_mm256_castsi256_ph(_mm512_cvtps_ph(s, _MM_FROUND_TO_NEAREST_INT))), + VectorF16(_mm256_castsi256_ph(_mm512_cvtps_ph(c, _MM_FROUND_TO_NEAREST_INT))) + }; - } else { - __m256i lo = _mm512_castsi512_si256(_mm512_castph_si512(v)); - __m256i hi = _mm512_extracti64x4_epi64(_mm512_castph_si512(v), 1); + } else { + __m256i lo = _mm512_castsi512_si256(_mm512_castph_si512(v)); + __m256i hi = _mm512_extracti64x4_epi64(_mm512_castph_si512(v), 1); - __m512 s_lo, c_lo, s_hi, c_hi; - sincos_f32x16(_mm512_cvtph_ps(lo), s_lo, c_lo); - sincos_f32x16(_mm512_cvtph_ps(hi), s_hi, c_hi); + __m512 s_lo, c_lo, s_hi, c_hi; + sincos_f32x16(_mm512_cvtph_ps(lo), s_lo, c_lo); + sincos_f32x16(_mm512_cvtph_ps(hi), s_hi, c_hi); - auto pack = [](__m256i lo_ph, __m256i hi_ph) { - return _mm512_castsi512_ph(_mm512_inserti64x4(_mm512_castsi256_si512(lo_ph), hi_ph, 1)); - }; - return { - VectorF16(pack(_mm512_cvtps_ph(s_lo, _MM_FROUND_TO_NEAREST_INT), _mm512_cvtps_ph(s_hi, _MM_FROUND_TO_NEAREST_INT))), - VectorF16(pack( _mm512_cvtps_ph(c_lo, _MM_FROUND_TO_NEAREST_INT), _mm512_cvtps_ph(c_hi, _MM_FROUND_TO_NEAREST_INT))) - }; + auto pack = [](__m256i lo_ph, __m256i hi_ph) { + return _mm512_castsi512_ph(_mm512_inserti64x4(_mm512_castsi256_si512(lo_ph), hi_ph, 1)); + }; + return { + VectorF16(pack(_mm512_cvtps_ph(s_lo, _MM_FROUND_TO_NEAREST_INT), _mm512_cvtps_ph(s_hi, _MM_FROUND_TO_NEAREST_INT))), + VectorF16(pack( _mm512_cvtps_ph(c_lo, _MM_FROUND_TO_NEAREST_INT), _mm512_cvtps_ph(c_hi, _MM_FROUND_TO_NEAREST_INT))) + }; + } } - } template values> constexpr VectorF16 Negate() { @@ -421,12 +423,12 @@ namespace Crafter { constexpr static VectorF16 Cross(VectorF16 a, VectorF16 b) requires(Len == 3) { if constexpr(std::is_same_v) { - constexpr std::array shuffleMask1 = GetShuffleMaskEpi8<{1,2,0}>(); + constexpr std::array shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>(); __m128i shuffleVec1 = _mm_loadu_epi8(shuffleMask1.data()); __m128h row1 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(a.v), shuffleVec1)); __m128h row4 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(b.v), shuffleVec1)); - constexpr std::array shuffleMask3 = GetShuffleMaskEpi8<{2,0,1}>(); + constexpr std::array shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>(); __m128i shuffleVec3 = _mm_loadu_epi8(shuffleMask3.data()); __m128h row3 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(a.v), shuffleVec3)); __m128h row2 = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(b.v), shuffleVec3)); @@ -434,12 +436,12 @@ namespace Crafter { __m128h result = _mm_mul_ph(row3, row4); return _mm_fmsub_ph(row1,row2,result); } else if constexpr (std::is_same_v) { - constexpr std::array shuffleMask1 = GetShuffleMaskEpi8<{1,2,0}>(); + constexpr std::array shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>(); __m512i shuffleVec1 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask1.data())); __m256h row1 = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(a.v)), shuffleVec1))); __m256h row4 = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(b.v)), shuffleVec1))); - constexpr std::array shuffleMask3 = GetShuffleMaskEpi8<{2,0,1}>(); + constexpr std::array shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>(); __m512i shuffleVec3 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask3.data())); __m256h row3 = _mm256_castsi256_ph(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castph_si256(a.v)), shuffleVec3))); @@ -448,13 +450,13 @@ namespace Crafter { __m256h result = _mm256_mul_ph(row3, row4); return _mm256_fmsub_ph(row1,row2,result); } else { - constexpr std::array shuffleMask1 = GetShuffleMaskEpi8<{1,2,0}>(); + constexpr std::array shuffleMask1 = GetShuffleMaskEpi8<{{1,2,0}}>(); __m512i shuffleVec1 = _mm512_loadu_epi8(shuffleMask1.data()); __m512h row1 = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(a.v), shuffleVec1)); __m512h row4 = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(b.v), shuffleVec1)); - constexpr std::array shuffleMask3 = GetShuffleMaskEpi8<{2,0,1}>(); + constexpr std::array shuffleMask3 = GetShuffleMaskEpi8<{{2,0,1}}>(); __m512i shuffleVec3 = _mm512_loadu_epi8(shuffleMask3.data()); __m512h row3 = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(a.v), shuffleVec3)); @@ -488,14 +490,14 @@ namespace Crafter { VectorF16 G, VectorF16 H ) requires(Len == 8) { - constexpr std::uint8_t shuffleMaskA[] = GetShuffleMaskEpi8<{0,0,0,0,0,0,0,0}>(); - constexpr std::uint8_t shuffleMaskB[] = GetShuffleMaskEpi8<{1,1,1,1,1,1,1,1}>(); - constexpr std::uint8_t shuffleMaskC[] = GetShuffleMaskEpi8<{2,2,2,2,2,2,2,2}>(); - constexpr std::uint8_t shuffleMaskD[] = GetShuffleMaskEpi8<{3,3,3,3,3,3,3,3}>(); - constexpr std::uint8_t shuffleMaskE[] = GetShuffleMaskEpi8<{4,4,4,4,4,4,4,4}>(); - constexpr std::uint8_t shuffleMaskF[] = GetShuffleMaskEpi8<{5,5,5,5,5,5,5,5}>(); - constexpr std::uint8_t shuffleMaskG[] = GetShuffleMaskEpi8<{6,6,6,6,6,6,6,6}>(); - constexpr std::uint8_t shuffleMaskH[] = GetShuffleMaskEpi8<{7,7,7,7,7,7,7,7}>(); + constexpr std::array shuffleMaskA = GetShuffleMaskEpi8<{{0,0,0,0,0,0,0,0}}>(); + constexpr std::array shuffleMaskB = GetShuffleMaskEpi8<{{1,1,1,1,1,1,1,1}}>(); + constexpr std::array shuffleMaskC = GetShuffleMaskEpi8<{{2,2,2,2,2,2,2,2}}>(); + constexpr std::array shuffleMaskD = GetShuffleMaskEpi8<{{3,3,3,3,3,3,3,3}}>(); + constexpr std::array shuffleMaskE = GetShuffleMaskEpi8<{{4,4,4,4,4,4,4,4}}>(); + constexpr std::array shuffleMaskF = GetShuffleMaskEpi8<{{5,5,5,5,5,5,5,5}}>(); + constexpr std::array shuffleMaskG = GetShuffleMaskEpi8<{{6,6,6,6,6,6,6,6}}>(); + constexpr std::array shuffleMaskH = GetShuffleMaskEpi8<{{7,7,7,7,7,7,7,7}}>(); if constexpr(std::is_same_v) { VectorF16 lenght = Length(A, B, C, D, E, F, G, H); @@ -503,29 +505,28 @@ namespace Crafter { __m128h one = _mm_loadu_ph(oneArr); __m128h fLenght = _mm_div_ph(one, lenght.v); - - __m128i shuffleVecA = _mm_loadu_epi8(shuffleMaskA); + __m128i shuffleVecA = _mm_loadu_epi8(shuffleMaskA.data()); __m128h fLenghtA = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecA)); - __m128i shuffleVecB = _mm_loadu_epi8(shuffleMaskB); + __m128i shuffleVecB = _mm_loadu_epi8(shuffleMaskB.data()); __m128h fLenghtB = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecB)); - __m128i shuffleVecC = _mm_loadu_epi8(shuffleMaskC); + __m128i shuffleVecC = _mm_loadu_epi8(shuffleMaskC.data()); __m128h fLenghtC = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecC)); - __m128i shuffleVecD = _mm_loadu_epi8(shuffleMaskD); + __m128i shuffleVecD = _mm_loadu_epi8(shuffleMaskD.data()); __m128h fLenghtD = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecD)); - __m128i shuffleVecE = _mm_loadu_epi8(shuffleMaskE); + __m128i shuffleVecE = _mm_loadu_epi8(shuffleMaskE.data()); __m128h fLenghtE = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecE)); - __m128i shuffleVecF = _mm_loadu_epi8(shuffleMaskF); + __m128i shuffleVecF = _mm_loadu_epi8(shuffleMaskF.data()); __m128h fLenghtF = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecF)); - __m128i shuffleVecG = _mm_loadu_epi8(shuffleMaskG); + __m128i shuffleVecG = _mm_loadu_epi8(shuffleMaskG.data()); __m128h fLenghtG = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecG)); - __m128i shuffleVecH = _mm_loadu_epi8(shuffleMaskH); + __m128i shuffleVecH = _mm_loadu_epi8(shuffleMaskH.data()); __m128h fLenghtH = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecH)); return { @@ -544,28 +545,28 @@ namespace Crafter { __m256h one = _mm256_loadu_ph(oneArr); __m256h fLenght = _mm256_div_ph(one, lenght.v); - __m256i shuffleVecA = _mm256_loadu_epi8(shuffleMaskA); + __m256i shuffleVecA = _mm256_loadu_epi8(shuffleMaskA.data()); __m256h fLenghtA = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecA)); - __m256i shuffleVecB = _mm256_loadu_epi8(shuffleMaskB); + __m256i shuffleVecB = _mm256_loadu_epi8(shuffleMaskB.data()); __m256h fLenghtB = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecB)); - __m256i shuffleVecC = _mm256_loadu_epi8(shuffleMaskC); + __m256i shuffleVecC = _mm256_loadu_epi8(shuffleMaskC.data()); __m256h fLenghtC = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecC)); - __m256i shuffleVecD = _mm256_loadu_epi8(shuffleMaskD); + __m256i shuffleVecD = _mm256_loadu_epi8(shuffleMaskD.data()); __m256h fLenghtD = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecD)); - __m256i shuffleVecE = _mm256_loadu_epi8(shuffleMaskE); + __m256i shuffleVecE = _mm256_loadu_epi8(shuffleMaskE.data()); __m256h fLenghtE = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecE)); - __m256i shuffleVecF = _mm256_loadu_epi8(shuffleMaskF); + __m256i shuffleVecF = _mm256_loadu_epi8(shuffleMaskF.data()); __m256h fLenghtF = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecF)); - __m256i shuffleVecG = _mm256_loadu_epi8(shuffleMaskG); + __m256i shuffleVecG = _mm256_loadu_epi8(shuffleMaskG.data()); __m256h fLenghtG = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecG)); - __m256i shuffleVecH = _mm256_loadu_epi8(shuffleMaskH); + __m256i shuffleVecH = _mm256_loadu_epi8(shuffleMaskH.data()); __m256h fLenghtH = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecH)); return { @@ -584,28 +585,28 @@ namespace Crafter { __m512h one = _mm512_loadu_ph(oneArr); __m512h fLenght = _mm512_div_ph(one, lenght.v); - __m512i shuffleVecA = _mm512_loadu_epi8(shuffleMaskA); + __m512i shuffleVecA = _mm512_loadu_epi8(shuffleMaskA.data()); __m512h fLenghtA = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecA)); - __m512i shuffleVecB = _mm512_loadu_epi8(shuffleMaskB); + __m512i shuffleVecB = _mm512_loadu_epi8(shuffleMaskB.data()); __m512h fLenghtB = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecB)); - __m512i shuffleVecC = _mm512_loadu_epi8(shuffleMaskC); + __m512i shuffleVecC = _mm512_loadu_epi8(shuffleMaskC.data()); __m512h fLenghtC = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecC)); - __m512i shuffleVecD = _mm512_loadu_epi8(shuffleMaskD); + __m512i shuffleVecD = _mm512_loadu_epi8(shuffleMaskD.data()); __m512h fLenghtD = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecD)); - __m512i shuffleVecE = _mm512_loadu_epi8(shuffleMaskE); + __m512i shuffleVecE = _mm512_loadu_epi8(shuffleMaskE.data()); __m512h fLenghtE = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecE)); - __m512i shuffleVecF = _mm512_loadu_epi8(shuffleMaskF); + __m512i shuffleVecF = _mm512_loadu_epi8(shuffleMaskF.data()); __m512h fLenghtF = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecF)); - __m512i shuffleVecG = _mm512_loadu_epi8(shuffleMaskG); + __m512i shuffleVecG = _mm512_loadu_epi8(shuffleMaskG.data()); __m512h fLenghtG = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecG)); - __m512i shuffleVecH = _mm512_loadu_epi8(shuffleMaskH); + __m512i shuffleVecH = _mm512_loadu_epi8(shuffleMaskH.data()); __m512h fLenghtH = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecH)); return { @@ -627,10 +628,10 @@ namespace Crafter { VectorF16 E, VectorF16 G ) requires(Len == 4) { - constexpr std::uint8_t shuffleMaskA[] = GetShuffleMaskEpi8<{0,0,0,0}>(); - constexpr std::uint8_t shuffleMaskC[] = GetShuffleMaskEpi8<{1,1,1,1}>(); - constexpr std::uint8_t shuffleMaskE[] = GetShuffleMaskEpi8<{2,2,2,2}>(); - constexpr std::uint8_t shuffleMaskG[] = GetShuffleMaskEpi8<{3,3,3,3}>(); + constexpr std::array shuffleMaskA = GetShuffleMaskEpi8<{{0,0,0,0}}>(); + constexpr std::array shuffleMaskC = GetShuffleMaskEpi8<{{1,1,1,1}}>(); + constexpr std::array shuffleMaskE = GetShuffleMaskEpi8<{{2,2,2,2}}>(); + constexpr std::array shuffleMaskG = GetShuffleMaskEpi8<{{3,3,3,3}}>(); if constexpr(std::is_same_v) { VectorF16 lenght = Length(A, C, E, G); @@ -638,16 +639,16 @@ namespace Crafter { __m128h one = _mm_loadu_ph(oneArr); __m128h fLenght = _mm_div_ph(one, lenght.v); - __m128i shuffleVecA = _mm_loadu_epi8(shuffleMaskA); + __m128i shuffleVecA = _mm_loadu_epi8(shuffleMaskA.data()); __m128h fLenghtA = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecA)); - __m128i shuffleVecC = _mm_loadu_epi8(shuffleMaskC); + __m128i shuffleVecC = _mm_loadu_epi8(shuffleMaskC.data()); __m128h fLenghtC = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecC)); - __m128i shuffleVecE = _mm_loadu_epi8(shuffleMaskE); + __m128i shuffleVecE = _mm_loadu_epi8(shuffleMaskE.data()); __m128h fLenghtE = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecE)); - __m128i shuffleVecG = _mm_loadu_epi8(shuffleMaskG); + __m128i shuffleVecG = _mm_loadu_epi8(shuffleMaskG.data()); __m128h fLenghtG = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecG)); return { @@ -662,16 +663,16 @@ namespace Crafter { __m256h one = _mm256_loadu_ph(oneArr); __m256h fLenght = _mm256_div_ph(one, lenght.v); - __m256i shuffleVecA = _mm256_loadu_epi8(shuffleMaskA); + __m256i shuffleVecA = _mm256_loadu_epi8(shuffleMaskA.data()); __m256h fLenghtA = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecA)); - __m256i shuffleVecC = _mm256_loadu_epi8(shuffleMaskC); + __m256i shuffleVecC = _mm256_loadu_epi8(shuffleMaskC.data()); __m256h fLenghtC = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecC)); - __m256i shuffleVecE = _mm256_loadu_epi8(shuffleMaskE); + __m256i shuffleVecE = _mm256_loadu_epi8(shuffleMaskE.data()); __m256h fLenghtE = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecE)); - __m256i shuffleVecG = _mm256_loadu_epi8(shuffleMaskG); + __m256i shuffleVecG = _mm256_loadu_epi8(shuffleMaskG.data()); __m256h fLenghtG = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecG)); return { @@ -686,16 +687,16 @@ namespace Crafter { __m512h one = _mm512_loadu_ph(oneArr); __m512h fLenght = _mm512_div_ph(one, lenght.v); - __m512i shuffleVecA = _mm512_loadu_epi8(shuffleMaskA); + __m512i shuffleVecA = _mm512_loadu_epi8(shuffleMaskA.data()); __m512h fLenghtA = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecA)); - __m512i shuffleVecC = _mm512_loadu_epi8(shuffleMaskC); + __m512i shuffleVecC = _mm512_loadu_epi8(shuffleMaskC.data()); __m512h fLenghtC = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecC)); - __m512i shuffleVecE = _mm512_loadu_epi8(shuffleMaskE); + __m512i shuffleVecE = _mm512_loadu_epi8(shuffleMaskE.data()); __m512h fLenghtE = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecE)); - __m512i shuffleVecG = _mm512_loadu_epi8(shuffleMaskG); + __m512i shuffleVecG = _mm512_loadu_epi8(shuffleMaskG.data()); __m512h fLenghtG = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecG)); return { @@ -711,8 +712,8 @@ namespace Crafter { VectorF16 A, VectorF16 E ) requires(Len == 2) { - constexpr std::uint8_t shuffleMaskA[] = GetShuffleMaskEpi8<{0,0}>(); - constexpr std::uint8_t shuffleMaskE[] = GetShuffleMaskEpi8<{1,1}>(); + constexpr std::array shuffleMaskA = GetShuffleMaskEpi8<{{0,0}}>(); + constexpr std::array shuffleMaskE = GetShuffleMaskEpi8<{{1,1}}>(); if constexpr(std::is_same_v) { VectorF16 lenght = Length(A, E); @@ -720,10 +721,10 @@ namespace Crafter { __m128h one = _mm_loadu_ph(oneArr); __m128h fLenght = _mm_div_ph(one, lenght.v); - __m128i shuffleVecA = _mm_loadu_epi8(shuffleMaskA); + __m128i shuffleVecA = _mm_loadu_epi8(shuffleMaskA.data()); __m128h fLenghtA = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecA)); - __m128i shuffleVecE = _mm_loadu_epi8(shuffleMaskE); + __m128i shuffleVecE = _mm_loadu_epi8(shuffleMaskE.data()); __m128h fLenghtE = _mm_castsi128_ph(_mm_shuffle_epi8(_mm_castph_si128(fLenght), shuffleVecE)); return { @@ -736,10 +737,10 @@ namespace Crafter { __m256h one = _mm256_loadu_ph(oneArr); __m256h fLenght = _mm256_div_ph(one, lenght.v); - __m256i shuffleVecA = _mm256_loadu_epi8(shuffleMaskA); + __m256i shuffleVecA = _mm256_loadu_epi8(shuffleMaskA.data()); __m256h fLenghtA = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecA)); - __m256i shuffleVecE = _mm256_loadu_epi8(shuffleMaskE); + __m256i shuffleVecE = _mm256_loadu_epi8(shuffleMaskE.data()); __m256h fLenghtE = _mm256_castsi256_ph(_mm256_shuffle_epi8(_mm256_castph_si256(fLenght), shuffleVecE)); return { @@ -752,10 +753,10 @@ namespace Crafter { __m512h one = _mm512_loadu_ph(oneArr); __m512h fLenght = _mm512_div_ph(one, lenght.v); - __m512i shuffleVecA = _mm512_loadu_epi8(shuffleMaskA); + __m512i shuffleVecA = _mm512_loadu_epi8(shuffleMaskA.data()); __m512h fLenghtA = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecA)); - __m512i shuffleVecE = _mm512_loadu_epi8(shuffleMaskE); + __m512i shuffleVecE = _mm512_loadu_epi8(shuffleMaskE.data()); __m512h fLenghtE = _mm512_castsi512_ph(_mm512_shuffle_epi8(_mm512_castph_si512(fLenght), shuffleVecE)); return { @@ -1158,19 +1159,19 @@ namespace Crafter { } } - template ShuffleValues> + template ShuffleValues> constexpr static VectorF16 Blend(VectorF16 a, VectorF16 b) { if constexpr(std::is_same_v) { - return _mm_castsi128_ph(_mm_blend_epi16(GetBlendMaskEpi16(), _mm_castph_si128(a.v), _mm_castph_si128(b))); + return _mm_castsi128_ph(_mm_blend_epi16(_mm_castph_si128(a.v), _mm_castph_si128(b.v), GetBlendMaskEpi16())); } else if constexpr(std::is_same_v) { #ifndef __AVX512BW__ #ifndef __AVX512VL__ static_assert(false, "No __AVX512BW__ and __AVX512VL__ support"); #endif #endif - return _mm256_castsi256_ph(_mm256_mask_blend_epi16(_mm256_castph_si256(a.v), _mm256_castph_si256(b), GetBlendMaskEpi16())); + return _mm256_castsi256_ph(_mm256_mask_blend_epi16(_mm256_castph_si256(a.v), _mm256_castph_si256(b.v), GetBlendMaskEpi16())); } else { - return _mm512_castsi512_ph(_mm512_blend_epi16(GetBlendMaskEpi16(), _mm512_castph_si512(a.v), _mm512_castph_si512(b))); + return _mm512_castsi512_ph(_mm512_blend_epi16(GetBlendMaskEpi16(), _mm512_castph_si512(a.v), _mm512_castph_si512(b.v))); } } @@ -1189,34 +1190,36 @@ namespace Crafter { } constexpr static VectorF16<4, Packing> QuanternionFromEuler(VectorF16<3, Packing> EulerHalf) requires(Len == 4) { - VectorF16<4, Packing> sin = EulerHalf.Sin(); - VectorF16<4, Packing> cos = EulerHalf.Cos(); + std::tuple, VectorF16<3, Packing>> sinCos = EulerHalf.SinCos(); + VectorF16<4, Packing> sin = std::get<0>(sinCos); + VectorF16<4, Packing> cos = std::get<1>(sinCos); - VectorF16<4, Packing> row1 = cos.template Shuffle<{{0,0,0,0}}>(); + VectorF16<4, Packing> row1 = cos.template Shuffle<{{0,0,0,0}}>(); row1 = Blend<{{0,1,1,1}}>(sin, row1); VectorF16<4, Packing> row2 = cos.template Shuffle<{{1,1,1,1}}>(); row2 = Blend<{{1,0,1,1}}>(sin, row2); - row1 = row2; + row1 *= row2; VectorF16<4, Packing> row3 = cos.template Shuffle<{{2,2,2,2}}>(); row3 = Blend<{{1,1,0,1}}>(sin, row3); row1 *= row3; - VectorF16<4, Packing> row4 = sin.template Shuffle<{{0,0,0,0}}>(); - row4 = Blend<{{1,0,0,0}}>(sin, row4); - row1 *= row4; - row1 = row1.template Negate<{{true,false,true}}>(); - + VectorF16<4, Packing> row4 = sin.template Shuffle<{{0,0,0,0}}>(); + row4 = Blend<{{0,1,1,1}}>(cos, row4); + VectorF16<4, Packing> row5 = sin.template Shuffle<{{1,1,1,1}}>(); - row5 = Blend<{{0,1,0,0}}>(sin, row5); + row5 = Blend<{{1,0,1,1}}>(cos, row5); + + row4 *= row5; VectorF16<4, Packing> row6 = sin.template Shuffle<{{2,2,2,2}}>(); - row6 = Blend<{{0,0,1,0}}>(sin, row6); + row6 = Blend<{{1,1,0,1}}>(cos, row6); + row6 = row6.template Negate<{{true,false,true,false}}>(); - row1 = MulitplyAdd(row5, row6, row1); + row1 = MulitplyAdd(row4, row6, row1); return row1; } @@ -1323,7 +1326,7 @@ namespace Crafter { return mask; } - template ShuffleValues> + template ShuffleValues> static consteval std::uint16_t GetBlendMaskEpi16() requires (std::is_same_v){ std::uint16_t mask = 0; for (std::uint8_t i2 = 0; i2 < Packing; i2++) { @@ -1336,7 +1339,7 @@ namespace Crafter { return mask; } - template ShuffleValues> + template ShuffleValues> static consteval std::uint32_t GetBlendMaskEpi16() requires (std::is_same_v){ std::uint32_t mask = 0; for (std::uint8_t i2 = 0; i2 < Packing; i2++) { diff --git a/tests/VectorF16.cpp b/tests/VectorF16.cpp index 1934a09..2345e8a 100644 --- a/tests/VectorF16.cpp +++ b/tests/VectorF16.cpp @@ -16,379 +16,69 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include import Crafter.Math; import std; using namespace Crafter; +template +std::string* TestLoadStore() { + _Float16 floats[Len * Packing]; + for (std::uint32_t i = 0; i < Len * Packing; i++) { + floats[i] = static_cast<_Float16>(i); + } + + VectorF16 vec(floats); + auto stored = vec.Store(); + for (std::uint32_t i = 0; i < Len * Packing; i++) { + if (stored.v[i] != floats[i]) { + return new std::string(std::format("Load/Store mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i]), (float)stored.v[i])); + } + } + return nullptr; +} + +template +std::string* TestAdd() { + _Float16 floats[Len * Packing]; + for (std::uint32_t i = 0; i < Len * Packing; i++) { + floats[i] = static_cast<_Float16>(i); + } + + VectorF16 vec(floats); + vec = vec + vec; + auto stored = vec.Store(); + for (std::uint32_t i = 0; i < Len * Packing; i++) { + if (stored.v[i] != floats[i] + floats[i]) { + return new std::string(std::format("Add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + floats[i]), (float)stored.v[i])); + } + } + return nullptr; +} + + +template +std::string* TestAllCombinations() { + if constexpr (Len > MaxSize) { + return nullptr; + } else if constexpr (Len * Packing > MaxSize) { + return TestAllCombinations(); + } else { + std::string* result = TestLoadStore(); + if (result) return result; + result = TestAdd(); + if (result) return result; + return TestAllCombinations(); + } +} + + extern "C" { std::string* RunTest() { - // Test 1: Load/Store functionality - { - _Float16 floats[] {0,1,2,3,4,5,6,7}; - VectorF16<8, 1> vec1(floats); - - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i]) { - return new std::string("Load Store does not match"); - } - } + std::string* err = TestAllCombinations::MaxSize>(); + if (err) { + return err; } - - // Test 2: Addition operator - { - _Float16 floats[] {0,1,2,3,4,5,6,7}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 + vec1; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] + floats[i]) { - return new std::string("Add does not match"); - } - } - } - - // Test 3: Subtraction operator - { - _Float16 floats[] {0,1,2,3,4,5,6,7}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 - vec1; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] - floats[i]) { - return new std::string("Subtract does not match"); - } - } - } - - // Test 4: Multiplication operator - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 * vec1; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] * floats[i]) { - return new std::string("Multiply does not match"); - } - } - } - - // Test 5: Division operator - { - _Float16 floats[] {2,4,6,8,10,12,14,16}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 / vec1; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] / floats[i]) { - return new std::string("Divide does not match"); - } - } - } - - // Test 6: Compound addition operator - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> vec2(floats); - vec1 += vec2; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] + floats[i]) { - return new std::string("Compound Add does not match"); - } - } - } - - // Test 7: Compound subtraction operator - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> vec2(floats); - vec1 -= vec2; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] - floats[i]) { - return new std::string("Compound Subtract does not match"); - } - } - } - - // Test 8: Compound multiplication operator - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> vec2(floats); - vec1 *= vec2; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] * floats[i]) { - return new std::string("Compound Multiply does not match"); - } - } - } - - // Test 9: Compound division operator - { - _Float16 floats[] {2,4,6,8,10,12,14,16}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> vec2(floats); - vec1 /= vec2; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] / floats[i]) { - return new std::string("Compound Divide does not match"); - } - } - } - - // Test 10: Scalar addition - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 + _Float16(1.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] + 1.0) { - return new std::string("Scalar Add does not match"); - } - } - } - - // Test 11: Scalar subtraction - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 - _Float16(1.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] - 1.0) { - return new std::string("Scalar Subtract does not match"); - } - } - } - - // Test 12: Scalar multiplication - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 * _Float16(2.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] * 2.0) { - return new std::string("Scalar Multiply does not match"); - } - } - } - - // Test 13: Scalar division - { - _Float16 floats[] {2,4,6,8,10,12,14,16}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = vec1 / _Float16(2.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] / 2.0) { - return new std::string("Scalar Divide does not match"); - } - } - } - - // Test 14: Compound scalar addition - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - vec1 += _Float16(1.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] + 1.0) { - return new std::string("Compound Scalar Add does not match"); - } - } - } - - // Test 15: Compound scalar subtraction - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - vec1 -= _Float16(1.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] - 1.0) { - return new std::string("Compound Scalar Subtract does not match"); - } - } - } - - // Test 16: Compound scalar multiplication - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - vec1 *= _Float16(2.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] * 2.0) { - return new std::string("Compound Scalar Multiply does not match"); - } - } - } - - // Test 17: Compound scalar division - { - _Float16 floats[] {2,4,6,8,10,12,14,16}; - VectorF16<8, 1> vec1(floats); - vec1 /= _Float16(2.0); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != floats[i] / 2.0) { - return new std::string("Compound Scalar Divide does not match"); - } - } - } - - // Test 18: Equality operator - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> vec2(floats); - if (!(vec1 == vec2)) { - return new std::string("Equality operator does not match"); - } - } - - // Test 19: Inequality operator - { - _Float16 floats1[] {1,2,3,4,5,6,7,8}; - _Float16 floats2[] {2,3,4,5,6,7,8,9}; - VectorF16<8, 1> vec1(floats1); - VectorF16<8, 1> vec2(floats2); - if (!(vec1 != vec2)) { - return new std::string("Inequality operator does not match"); - } - } - - // Test 20: Negation operator - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - VectorF16<8, 1> result = -vec1; - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != -floats[i]) { - return new std::string("Negation operator does not match"); - } - } - } - - // Test 21: Length calculation - { - _Float16 floats[] {3,4,0,0,0,0,0,0}; - VectorF16<8, 1> vec1(floats); - _Float16 length = vec1.Length(); - _Float16 expectedLength = 5.0; // sqrt(3^2 + 4^2) - if (std::abs((float)length - (float)expectedLength) > 0.001) { - return new std::string("Length calculation does not match"); - } - } - - // Test 22: Length squared calculation - { - _Float16 floats[] {3,4,0,0,0,0,0,0}; - VectorF16<8, 1> vec1(floats); - _Float16 lengthSq = vec1.LengthSq(); - _Float16 expectedLengthSq = 25.0; // 3^2 + 4^2 - if (std::abs((float)lengthSq - (float)expectedLengthSq) > 0.001) { - return new std::string("Length squared calculation does not match"); - } - } - - // Test 25: Shuffle operation - { - _Float16 floats[] {1,2,3,4,5,6,7,8}; - VectorF16<8, 1> vec1(floats); - // Shuffle indices 0,1,2,3 -> 3,2,1,0 (reverse first 4 elements) - VectorF16<8, 1> result = vec1.template Shuffle<{{3,2,1,0,7,6,5,4}}>(); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - if (stored.v[0] != 4 || stored.v[1] != 3 || stored.v[2] != 2 || stored.v[3] != 1) { - return new std::string("Shuffle operation does not match"); - } - } - - // Test 26: Dot product - { - _Float16 floats1[] {1,2,3,4,0,0,0,0}; - _Float16 floats2[] {2,3,4,5,0,0,0,0}; - VectorF16<8, 1> vec1(floats1); - VectorF16<8, 1> vec2(floats2); - _Float16 dot = VectorF16<8, 1>::Dot(vec1, vec2); - _Float16 expectedDot = 1*2 + 2*3 + 3*4 + 4*5; // 2 + 6 + 12 + 20 = 40 - if (std::abs((float)dot - (float)expectedDot) > 0.001) { - return new std::string("Dot product does not match"); - } - } - - // Test 27: Cross product (for 3D vectors) - { - _Float16 floats1[] {1,2,3,0,0,0,0,0}; - _Float16 floats2[] {4,5,6,0,0,0,0,0}; - VectorF16<3, 1> vec1(floats1); - VectorF16<3, 1> vec2(floats2); - VectorF16<3, 1> result = VectorF16<3, 1>::Cross(vec1, vec2); - Vector<_Float16, 3, VectorF16<3, 1>::Alignment> stored = result.Store(); - // Cross product: (1,2,3) x (4,5,6) = (2*6-3*5, 3*4-1*6, 1*5-2*4) = (-3, 6, -3) - if (stored.v[0] != -3 || stored.v[1] != 6 || stored.v[2] != -3) { - return new std::string("Cross product does not match"); - } - } - - // Test 28: Multiply-Add operation - { - _Float16 floats1[] {1,2,3,4,0,0,0,0}; - _Float16 floats2[] {2,3,4,5,0,0,0,0}; - _Float16 floats3[] {1,1,1,1,0,0,0,0}; - VectorF16<8, 1> vec1(floats1); - VectorF16<8, 1> vec2(floats2); - VectorF16<8, 1> vec3(floats3); - VectorF16<8, 1> result = VectorF16<8, 1>::MulitplyAdd(vec1, vec2, vec3); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - // Should compute (1*2 + 1, 2*3 + 1, 3*4 + 1, 4*5 + 1, ...) = (3, 7, 13, 21, ...) - for(std::uint8_t i = 0; i < 4; i++) { - if(stored.v[i] != floats1[i]*floats2[i] + floats3[i]) { - return new std::string("Multiply-Add operation does not match"); - } - } - } - - // Test 29: Multiply-Subtract operation - { - _Float16 floats1[] {1,2,3,4,0,0,0,0}; - _Float16 floats2[] {2,3,4,5,0,0,0,0}; - _Float16 floats3[] {1,1,1,1,0,0,0,0}; - VectorF16<8, 1> vec1(floats1); - VectorF16<8, 1> vec2(floats2); - VectorF16<8, 1> vec3(floats3); - VectorF16<8, 1> result = VectorF16<8, 1>::MulitplySub(vec1, vec2, vec3); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = result.Store(); - // Should compute (1*2 - 1, 2*3 - 1, 3*4 - 1, 4*5 - 1, ...) = (1, 5, 11, 19, ...) - for(std::uint8_t i = 0; i < 4; i++) { - if(stored.v[i] != floats1[i]*floats2[i] - floats3[i]) { - return new std::string("Multiply-Subtract operation does not match"); - } - } - } - - // Test 30: Constructor with single value - { - VectorF16<8, 1> vec1(_Float16(5.0)); - Vector<_Float16, 8, VectorF16<8, 1>::Alignment> stored = vec1.Store(); - for(std::uint8_t i = 0; i < 8; i++) { - if(stored.v[i] != 5.0) { - return new std::string("Single value constructor does not match"); - } - } - } - return nullptr; } } \ No newline at end of file