diff --git a/interfaces/Crafter.Math-Basic.cppm b/interfaces/Crafter.Math-Basic.cppm index 5f392a5..83095c1 100755 --- a/interfaces/Crafter.Math-Basic.cppm +++ b/interfaces/Crafter.Math-Basic.cppm @@ -20,7 +20,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA export module Crafter.Math:Basic; import std; -import :VectorF16; +import :VectorF32; namespace Crafter { template diff --git a/interfaces/Crafter.Math-Common.cppm b/interfaces/Crafter.Math-Common.cppm index 7531896..de17abc 100644 --- a/interfaces/Crafter.Math-Common.cppm +++ b/interfaces/Crafter.Math-Common.cppm @@ -6,15 +6,19 @@ export module Crafter.Math:Common; import std; namespace Crafter { + #ifdef __AVX512FP16__ export template struct VectorF16; + #endif export template struct VectorF32; template struct VectorBase { + #ifdef __AVX512FP16__ template friend struct VectorF16; + #endif template friend struct VectorF32; protected: diff --git a/interfaces/Crafter.Math-Intersection.cppm b/interfaces/Crafter.Math-Intersection.cppm index 370cb9f..7bbc90f 100755 --- a/interfaces/Crafter.Math-Intersection.cppm +++ b/interfaces/Crafter.Math-Intersection.cppm @@ -23,13 +23,36 @@ import :MatrixRowMajor; import std; namespace Crafter { - export template - constexpr T IntersectionTestRayTriangle(Vector vert0, Vector vert1, Vector vert2, Vector rayOrigin, Vector rayDir) { - Vector edge1 = vert1 - vert0; - Vector edge2 = vert2 - vert0; + export + constexpr std::array IntersectionTestRayTriangle( + VectorF32<3, 5> vertA0, + VectorF32<3, 5> vertA1, + VectorF32<3, 5> vertA2, + + VectorF32<3, 5> vertB0, + VectorF32<3, 5> vertB1, + VectorF32<3, 5> vertB2, + + VectorF32<3, 5> vertC0, + VectorF32<3, 5> vertC1, + VectorF32<3, 5> vertC2, + + VectorF32<3, 5> rayOrigin, + VectorF32<3, 5> rayDir + ) { + + VectorF32<3, Packing> edgeA1 = vertA1 - vertA0; + VectorF32<3, Packing> edgeA2 = vertA2 - vertA0; + VectorF32<3, Packing> crossA = VectorF32<3, Packing> ::Cross(rayDir, edgeA2); + + VectorF32<3, Packing> edgeB1 = vertB1 - vertB0; + VectorF32<3, Packing> edgeB2 = vertB2 - vertB0; + VectorF32<3, Packing> crossB = VectorF32<3, Packing> ::Cross(rayDir, edgeB2); + + VectorF32<3, Packing> edgeC1 = vertC1 - vertC0; + VectorF32<3, Packing> edgeC2 = vertC2 - vertC0; + VectorF32<3, Packing> crossC = VectorF32<3, Packing> ::Cross(rayDir, edgeC2); - Vector h = Vector::Cross(rayDir, edge2); - T determinant = Vector::Dot(edge1, h); if (determinant <= std::numeric_limits::epsilon()) { return std::numeric_limits::max(); diff --git a/interfaces/Crafter.Math-VectorF16.cppm b/interfaces/Crafter.Math-VectorF16.cppm index 1962787..371e5a0 100755 --- a/interfaces/Crafter.Math-VectorF16.cppm +++ b/interfaces/Crafter.Math-VectorF16.cppm @@ -66,6 +66,7 @@ namespace Crafter { } } + template constexpr std::array<_Float16, VectorBase::AlignmentElement> Store() const { std::array<_Float16, VectorBase::AlignmentElement> returnArray; Store(returnArray.data()); @@ -1029,7 +1030,7 @@ namespace Crafter { export template struct std::formatter> : std::formatter { constexpr auto format(const Crafter::VectorF16& obj, format_context& ctx) const { - std::array<_Float16, Crafter::VectorF16::AlignmentElement> vec = obj.Store(); + std::array<_Float16, Crafter::VectorF16::AlignmentElement> vec = obj.template Store(); std::string out = "{"; for(std::uint32_t i = 0; i < Packing; i++) { out += "{"; diff --git a/interfaces/Crafter.Math-VectorF32.cppm b/interfaces/Crafter.Math-VectorF32.cppm index 268f5da..5258fe0 100755 --- a/interfaces/Crafter.Math-VectorF32.cppm +++ b/interfaces/Crafter.Math-VectorF32.cppm @@ -24,7 +24,6 @@ export module Crafter.Math:VectorF32; import std; import :Common; -#ifdef __AVX512FP16__ namespace Crafter { export template struct VectorF32 : public VectorBase { @@ -38,6 +37,9 @@ namespace Crafter { constexpr VectorF32(const float* vB) { Load(vB); }; + constexpr VectorF32(const _Float16* vB) { + Load(vB); + }; constexpr VectorF32(float val) { if constexpr(std::is_same_v::VectorType, __m128>) { this->v = _mm_set1_ps(val); @@ -66,8 +68,55 @@ namespace Crafter { } } - constexpr std::array::AlignmentElement> Store() const { - std::array::AlignmentElement> returnArray; + constexpr void Load(const _Float16* vB) { + #ifdef __F16C__ + if constexpr (std::is_same_v::VectorType, __m128>) { + this->v = _mm_cvtph_ps(_mm_loadl_epi64(reinterpret_cast(vB))); + } else if constexpr (std::is_same_v::VectorType, __m256>) { + this->v = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(vB))); + } else { + this->v = _mm512_cvtph_ps(_mm256_loadu_si256(reinterpret_cast(vB))); + } + #else + alignas(64) float tmp[Len]; + for (int i = 0; i < Len; ++i) + tmp[i] = static_cast(vB[i]); + if constexpr (std::is_same_v::VectorType, __m128>) { + this->v = _mm_load_ps(tmp); + } else if constexpr (std::is_same_v::VectorType, __m256>) { + this->v = _mm256_load_ps(tmp); + } else { + this->v = _mm512_load_ps(tmp); + } + #endif + } + + constexpr void Store(_Float16* vB) const { + #ifdef __F16C__ + if constexpr (std::is_same_v::VectorType, __m128>) { + _mm_storel_epi64(reinterpret_cast<__m128i*>(vB), _mm_cvtps_ph(this->v, _MM_FROUND_TO_NEAREST_INT)); + } else if constexpr (std::is_same_v::VectorType, __m256>) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(vB), _mm256_cvtps_ph(this->v, _MM_FROUND_TO_NEAREST_INT)); + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i*>(vB), _mm512_cvtps_ph(this->v, _MM_FROUND_TO_NEAREST_INT)); + } + #else + alignas(64) float tmp[Len]; + if constexpr (std::is_same_v::VectorType, __m128>) { + _mm_store_ps(tmp, this->v); + } else if constexpr (std::is_same_v::VectorType, __m256>) { + _mm256_store_ps(tmp, this->v); + } else { + _mm512_store_ps(tmp, this->v); + } + for (int i = 0; i < Len; ++i) + vB[i] = static_cast<_Float16>(tmp[i]); + #endif + } + + template + constexpr std::array::AlignmentElement> Store() const { + std::array::AlignmentElement> returnArray; Store(returnArray.data()); return returnArray; } @@ -96,36 +145,41 @@ namespace Crafter { if constexpr(std::is_same_v::VectorType, __m128>) { if constexpr(std::is_same_v::VectorType, __m128>) { constexpr std::array::Alignment> shuffleMask = VectorBase::template GetExtractLoMaskEpi8(); - __m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data()); + __m128i shuffleVec = _mm_loadu_si128(reinterpret_cast(shuffleMask.data())); return VectorF32(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec))); } else if constexpr(std::is_same_v::VectorType, __m256>) { constexpr std::array::AlignmentElement> permMask =VectorBase::template GetExtractLoMaskepi32(); - __m256i permIdx = _mm256_loadu_epi32(permMask.data()); + __m256i permIdx = _mm256_loadu_si256(reinterpret_cast(permMask.data())); __m256i result = _mm256_permutexvar_epi32(permIdx, _mm_castps_si256(this->v)); return VectorF32(_mm_castsi128_ps(_mm256_castsi256_si128(result))); + #ifdef __AVX512F__ } else { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetExtractLoMaskEpi32(); __m512i permIdx = _mm512_loadu_epi32(permMask.data()); __m512i result = _mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v)); return VectorF32(_mm_castsi128_ps(_mm512_castsi512_si128(result))); + #endif } } else if constexpr(std::is_same_v::VectorType, __m256>) { if constexpr(std::is_same_v::VectorType, __m128>) { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetExtractLoMaskEpi32(); - __m256i permIdx = _mm256_loadu_epi32(permMask.data()); + __m256i permIdx = _mm256_loadu_si256(reinterpret_cast(permMask.data())); __m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castsi128_si256(_mm_castps_si128(this->v))); return VectorF32(_mm256_castsi256_ps(result)); } else if constexpr(std::is_same_v::VectorType, __m256>) { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetExtractLoMaskEpi32(); - __m256i permIdx = _mm256_loadu_epi32(permMask.data()); + __m256i permIdx = _mm256_loadu_si256(reinterpret_cast(permMask.data())); __m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v)); return VectorF32(_mm256_castsi256_ps(result)); + #ifdef __AVX512F__ } else { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetExtractLoMaskEpi32(); __m256i permIdx = _mm512_loadu_epi32(permMask.data()); __m256i result = _mm512_permutexvar_epi32(permIdx, _mm512_castsi512_si256(_mm512_castps_si512(this->v))); return VectorF32(_mm256_castsi256_ps(result)); + #endif } + #ifdef __AVX512F__ } else { if constexpr(std::is_same_v::VectorType, __m128>) { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetExtractLoMaskEpi32(); @@ -143,6 +197,7 @@ namespace Crafter { __m512i result = _mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v)); return VectorF32(_mm512_castsi512_ps(result)); } + #endif } } } @@ -272,25 +327,27 @@ namespace Crafter { return Negate::GetAllTrue()>(); } - constexpr bool operator==(VectorF32 b) const { + constexpr bool operator==(VectorF32 b) const { if constexpr(std::is_same_v::VectorType, __m128>) { - return _mm_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 15; + #ifdef __AVX512VL__ + return _mm_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 0xF; + #else + return _mm_movemask_ps(_mm_cmpeq_ps(this->v, b.v)) == 0xF; + #endif } else if constexpr(std::is_same_v::VectorType, __m256>) { - return _mm256_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 255; + #ifdef __AVX512VL__ + return _mm256_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 0xFF; + #else + return _mm256_movemask_ps(_mm256_cmp_ps(this->v, b.v, _CMP_EQ_OQ)) == 0xFF; + #endif } else { - return _mm512_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 65535; + return _mm512_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 0xFFFF; } - } + } - constexpr bool operator!=(VectorF32 b) const { - if constexpr(std::is_same_v::VectorType, __m128>) { - return _mm_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) != 15; - } else if constexpr(std::is_same_v::VectorType, __m256>) { - return _mm256_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) != 255; - } else { - return _mm512_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) != 65535; - } - } + constexpr bool operator!=(VectorF32 b) const { + return !(*this == b); + } template constexpr VectorF32 ExtractLo() const { @@ -301,7 +358,7 @@ namespace Crafter { return VectorF32(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec))); } else if constexpr(std::is_same_v::VectorType, __m256>) { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetExtractLoMaskEpi32(); - __m256i permIdx = _mm256_loadu_epi32(permMask.data()); + __m256i permIdx = _mm256_loadu_si256(reinterpret_cast(permMask.data())); __m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v)); if constexpr(std::is_same_v::VectorType, __m128>) { return VectorF32(_mm256_castps256_ps128(_mm256_castsi256_ps(result))); @@ -323,10 +380,12 @@ namespace Crafter { } else { if constexpr(std::is_same_v::VectorType, __m256> && std::is_same_v::VectorType, __m128>) { return VectorF32(_mm256_castps256_ps128(this->v)); + #ifdef __AVX512F__ } else if constexpr(std::is_same_v::VectorType, __m512> && std::is_same_v::VectorType, __m128>) { return VectorF32(_mm512_castps512_ps128(this->v)); } else if constexpr(std::is_same_v::VectorType, __m512> && std::is_same_v::VectorType, __m256>) { return VectorF32(_mm512_castps512_ps256(this->v)); + #endif } else { return VectorF32(this->v); } @@ -338,8 +397,10 @@ namespace Crafter { return VectorF32(VectorBase::cos_f32x4(this->v)); } else if constexpr (std::is_same_v::VectorType, __m256>) { return VectorF32(VectorBase::cos_f32x8(this->v)); + #ifdef __AVX512F__ } else { return VectorF32(VectorBase::cos_f32x16(this->v)); + #endif } } @@ -348,8 +409,10 @@ namespace Crafter { return VectorF32(VectorBase::sin_f32x4(this->v)); } else if constexpr (std::is_same_v::VectorType, __m256>) { return VectorF32(VectorBase::sin_f32x8(this->v)); + #ifdef __AVX512F__ } else { return VectorF32(VectorBase::sin_f32x16(this->v)); + #endif } } @@ -369,7 +432,7 @@ namespace Crafter { VectorF32(s), VectorF32(c) }; - + #ifdef __AVX512F__ } else { __m512 s, c; VectorBase::sincos_f32x16(this->v, s, c); @@ -377,6 +440,7 @@ namespace Crafter { VectorF32(s), VectorF32(c) }; + #endif } } @@ -384,11 +448,13 @@ namespace Crafter { constexpr VectorF32 Negate() { std::array::AlignmentElement> mask = VectorBase::template GetNegateMask(); if constexpr(std::is_same_v::VectorType, __m128>) { - return VectorF32(_mm_castsi128_ps(_mm_xor_si128(_mm_castps_si128(this->v), _mm_loadu_epi32(mask.data())))); + return VectorF32(_mm_castsi128_ps(_mm_xor_si128(_mm_castps_si128(this->v), _mm_loadu_si128(reinterpret_cast<__m128i*>(mask.data()))))); } else if constexpr(std::is_same_v::VectorType, __m256>) { - return VectorF32(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(this->v), _mm256_loadu_epi32(mask.data())))); + return VectorF32(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(this->v), _mm256_loadu_si256(reinterpret_cast<__m256i*>(mask.data()))))); + #ifdef __AVX512F__ } else { return VectorF32(_mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(this->v), _mm512_loadu_epi32(mask.data())))); + #endif } } @@ -397,8 +463,10 @@ namespace Crafter { return VectorF32(_mm_fmadd_ps(a.v, b.v, add.v)); } else if constexpr(std::is_same_v::VectorType, __m256>) { return VectorF32(_mm256_fmadd_ps(a.v, b.v, add.v)); + #ifdef __AVX512F__ } else { return VectorF32(_mm512_fmadd_ps(a.v, b.v, add.v)); + #endif } } @@ -407,55 +475,22 @@ namespace Crafter { return VectorF32(_mm_fmsub_ps(a.v, b.v, sub.v)); } else if constexpr(std::is_same_v::VectorType, __m256>) { return VectorF32(_mm256_fmsub_ps(a.v, b.v, sub.v)); + #ifdef __AVX512F__ } else { return VectorF32(_mm512_fmsub_ps(a.v, b.v, sub.v)); + #endif } } constexpr static VectorF32 Cross(VectorF32 a, VectorF32 b) requires(Len == 3) { - if constexpr(std::is_same_v::VectorType, __m128>) { - constexpr std::array::Alignment> shuffleMask1 = VectorBase::template GetShuffleMaskEpi8<{{1,2,0}}>(); - __m128i shuffleVec1 = _mm_loadu_epi8(shuffleMask1.data()); - __m128 row1 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a.v), shuffleVec1)); - __m128 row4 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(b.v), shuffleVec1)); + VectorF32 row1 = a.template Shuffle<{{1,2,0}}>(); + VectorF32 row4 = b.template Shuffle<{{1,2,0}}>(); - constexpr std::array::Alignment> shuffleMask3 = VectorBase::template GetShuffleMaskEpi8<{{2,0,1}}>(); - __m128i shuffleVec3 = _mm_loadu_epi8(shuffleMask3.data()); - __m128 row3 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a.v), shuffleVec3)); - __m128 row2 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(b.v), shuffleVec3)); + VectorF32 row3 = a.template Shuffle<{{2,0,1}}>(); + VectorF32 row2 = b.template Shuffle<{{2,0,1}}>(); - __m128 result = _mm_mul_ps(row3, row4); - return _mm_fmsub_ps(row1,row2,result); - } else if constexpr (std::is_same_v::VectorType, __m256>) { - constexpr std::array::Alignment> shuffleMask1 = VectorBase::template GetShuffleMaskEpi8<{{1,2,0}}>(); - __m512i shuffleVec1 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask1.data())); - __m256 row1 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(a.v)), shuffleVec1))); - __m256 row4 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(b.v)), shuffleVec1))); - - constexpr std::array::Alignment> shuffleMask3 = VectorBase::template GetShuffleMaskEpi8<{{2,0,1}}>(); - - __m512i shuffleVec3 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask3.data())); - __m256 row3 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(a.v)), shuffleVec3))); - __m256 row2 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(b.v)), shuffleVec3))); - - __m256 result = _mm256_mul_ps(row3, row4); - return _mm256_fmsub_ps(row1,row2,result); - } else { - constexpr std::array::Alignment> shuffleMask1 = VectorBase::template GetShuffleMaskEpi8<{{1,2,0}}>(); - - __m512i shuffleVec1 = _mm512_loadu_epi8(shuffleMask1.data()); - __m512 row1 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(a.v), shuffleVec1)); - __m512 row4 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(b.v), shuffleVec1)); - - constexpr std::array::Alignment> shuffleMask3 = VectorBase::template GetShuffleMaskEpi8<{{2,0,1}}>(); - - __m512i shuffleVec3 = _mm512_loadu_epi8(shuffleMask3.data()); - __m512 row3 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(a.v), shuffleVec3)); - __m512 row2 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(b.v), shuffleVec3)); - - __m512 result = _mm512_mul_ps(row3, row4); - return _mm512_fmsub_ps(row1,row2,result); - } + VectorF32 result = row3 * row4; + return VectorF32::MulitplySub(row1, row2, result); } template ShuffleValues> @@ -465,21 +500,31 @@ namespace Crafter { if constexpr(std::is_same_v::VectorType, __m128>) { return VectorF32(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(this->v), imm))); } else if constexpr(std::is_same_v::VectorType, __m256>) { - return VectorF32(_mm256_castsi256_ps(_mm256_shuffle_epi32(_mm256_castps_si256(this->v), imm))); + return VectorF32(_mm256_castsi256_ps(_mm256_shuffle_epi32(_mm256_castps_si256(this->v), imm))); + #ifdef __AVX512F__ } else { return VectorF32(_mm512_castsi512_ps(_mm512_shuffle_epi32(_mm512_castps_si512(this->v), imm))); + #endif } - } else if constexpr(VectorBase::template CheckEpi8Shuffle()){ + } else if constexpr(VectorBase::template CheckEpi8Shuffle()) { constexpr std::array::Alignment> shuffleMask = VectorBase::template GetShuffleMaskEpi8(); if constexpr(std::is_same_v::VectorType, __m128>) { __m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data()); return VectorF32(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec))); } else if constexpr(std::is_same_v::VectorType, __m256>) { - __m256i shuffleVec = _mm256_loadu_epi8(shuffleMask.data()); - return VectorF32(_mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(this->v)), _mm512_castsi256_si512(shuffleVec))))); + #ifdef __AVX512BW__ + __m256i shuffleVec = _mm256_loadu_si256(reinterpret_cast(shuffleMask.data())); + return VectorF32(_mm256_castsi256_ps( _mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(this->v)),_mm512_castsi256_si512(shuffleVec))))); + #else + constexpr std::array::AlignmentElement> permMask = VectorBase::template GetPermuteMaskEpi32(); + __m256i permIdx = _mm256_loadu_si256(reinterpret_cast(permMask.data())); + return VectorF32(_mm256_castsi256_ps(_mm256_permutevar8x32_epi32(_mm256_castps_si256(this->v), permIdx))); + #endif + #ifdef __AVX512F__ } else { - __m512i shuffleVec = _mm512_loadu_epi8(shuffleMask.data()); + __m512i shuffleVec = _mm512_loadu_si512(reinterpret_cast(shuffleMask.data())); return VectorF32(_mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(this->v), shuffleVec))); + #endif } } else { if constexpr(std::is_same_v::VectorType, __m128>) { @@ -488,15 +533,17 @@ namespace Crafter { return VectorF32(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec))); } else if constexpr(std::is_same_v::VectorType, __m256>) { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetPermuteMaskEpi32(); - __m256i permIdx = _mm256_loadu_epi32(permMask.data()); - return VectorF32(_mm256_castsi256_ps(_mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v)))); + __m256i permIdx = _mm256_loadu_si256(reinterpret_cast(permMask.data())); + return VectorF32(_mm256_castsi256_ps(_mm256_permutevar8x32_epi32(_mm256_castps_si256(this->v), permIdx))); + #ifdef __AVX512F__ } else { constexpr std::array::AlignmentElement> permMask = VectorBase::template GetPermuteMaskEpi32(); __m512i permIdx = _mm512_loadu_epi32(permMask.data()); return VectorF32(_mm512_castsi512_ps(_mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v)))); + #endif } } - } + } constexpr static std::tuple, VectorF32, VectorF32, VectorF32> Normalize( VectorF32 A, @@ -539,6 +586,7 @@ namespace Crafter { _mm256_mul_ps(C.v, fLenghtC.v), _mm256_mul_ps(D.v, fLenghtD.v) }; + #if defined(__AVX512F__) } else { VectorF32<1, 16> lenght = LengthNoShuffle(A, C, B, D); constexpr float oneArr[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; @@ -558,6 +606,7 @@ namespace Crafter { VectorF32(_mm512_mul_ps(C.v, fLenghtC.v)), VectorF32(_mm512_mul_ps(D.v, fLenghtD.v)), }; + #endif } } @@ -609,6 +658,7 @@ namespace Crafter { }; } + #ifdef __AVX512F__ constexpr static std::tuple, VectorF32, VectorF32> Normalize( VectorF32 A, VectorF32 B, @@ -629,6 +679,7 @@ namespace Crafter { _mm512_mul_ps(C.v, fLenghtC.v), }; } + #endif constexpr static std::tuple, VectorF32> Normalize( VectorF32 A, @@ -660,6 +711,7 @@ namespace Crafter { _mm256_mul_ps(A.v, fLenghtA.v), _mm256_mul_ps(B.v, fLenghtB.v), }; + #ifdef __AVX512F__ } else { VectorF32<1, 16> lenght = LengthNoShuffle(A, B); constexpr float oneArr[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; @@ -673,6 +725,7 @@ namespace Crafter { _mm512_mul_ps(A.v, fLenghtA.v), _mm512_mul_ps(B.v, fLenghtB.v), }; + #endif } } @@ -712,6 +765,7 @@ namespace Crafter { return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v)); } + #ifdef __AVX512F__ constexpr static VectorF32<1, 15> Length( VectorF32 A, VectorF32 B, @@ -720,6 +774,7 @@ namespace Crafter { VectorF32<1, 15> lenghtSq = LengthSq(A, B, C); return VectorF32<1, 15>(_mm512_sqrt_ps(lenghtSq.v)); } + #endif constexpr static VectorF32<1, Packing*2> Length( VectorF32 A, @@ -730,8 +785,10 @@ namespace Crafter { return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v)); } else if constexpr(std::is_same_v::VectorType, __m256>) { return VectorF32<1, Packing*2>(_mm256_sqrt_ps(lenghtSq.v)); + #ifdef __AVX512F__ } else { return VectorF32<1, Packing*2>(_mm512_sqrt_ps(lenghtSq.v)); + #endif } } @@ -762,6 +819,7 @@ namespace Crafter { return Dot(A, A, B, B, C, C, D, D); } + #ifdef __AVX512F__ constexpr static VectorF32<1, 15> LengthSq( VectorF32 A, VectorF32 B, @@ -769,6 +827,7 @@ namespace Crafter { ) requires(Len == 3 && Packing == 5) { return Dot(A, A, B, B, C, C); } + #endif constexpr static VectorF32<1, Packing*2> LengthSq( VectorF32 A, @@ -792,6 +851,7 @@ namespace Crafter { 1,5,3,7, }}>(); return vec.v; + #ifdef __AVX512F__ } else { VectorF32<16, 1> vec(DotNoShuffle(A0, A1, B0, B1, C0, C1, D0, D1).v); vec = vec.template Shuffle<{{ @@ -801,6 +861,7 @@ namespace Crafter { 3,7,11,15 }}>(); return vec.v; + #endif } } @@ -955,6 +1016,7 @@ namespace Crafter { return row1; } + #ifdef __AVX512F__ constexpr static VectorF32<1, 15> Dot( VectorF32 A0, VectorF32 A1, VectorF32 B0, VectorF32 B1, @@ -1044,6 +1106,7 @@ namespace Crafter { return row1; } + #endif constexpr static VectorF32<1, Packing*2> Dot( VectorF32 A0, VectorF32 A1, @@ -1058,6 +1121,7 @@ namespace Crafter { 2,3, 6,7, }}>(); return vec.v; + #ifdef __AVX512F__ } else { VectorF32<16, 1> vec(DotNoShuffle(A0, A1, C0, C1).v); vec = vec.template Shuffle<{{ @@ -1067,6 +1131,7 @@ namespace Crafter { 10,11, 14,15 }}>(); return vec.v; + #endif } } @@ -1083,8 +1148,10 @@ namespace Crafter { return VectorF32<1, Packing*4>(_mm_sqrt_ps(lenghtSq.v)); } else if constexpr(std::is_same_v::VectorType, __m256>) { return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v)); + #ifdef __AVX512F__ } else { return VectorF32<1, Packing*4>(_mm512_sqrt_ps(lenghtSq.v)); + #endif } } @@ -1097,8 +1164,10 @@ namespace Crafter { return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v)); } else if constexpr(std::is_same_v::VectorType, __m256>) { return VectorF32<1, Packing*2>(_mm256_sqrt_ps(lenghtSq.v)); + #ifdef __AVX512F__ } else { return VectorF32<1, Packing*2>(_mm512_sqrt_ps(lenghtSq.v)); + #endif } } @@ -1172,6 +1241,7 @@ namespace Crafter { row1 = _mm256_add_ps(row1, row4); return row1; + #ifdef __AVX512F__ } else { __m512 mulA = _mm512_mul_ps(A0.v, A1.v); __m512 mulB = _mm512_mul_ps(B0.v, B1.v); @@ -1195,6 +1265,7 @@ namespace Crafter { row1 = _mm512_add_ps(row1, row4); return row1; + #endif } } @@ -1226,6 +1297,7 @@ namespace Crafter { row56Temp1 = _mm256_unpackhi_epi32(row1TempTemp1, row56Temp1); // A2 B2 C2 D2 return _mm256_add_ps(row12Temp1, row56Temp1); + #ifdef __AVX512F__ } else { __m512 mulA = _mm512_mul_ps(A0.v, A1.v); __m512 mulC = _mm512_mul_ps(C0.v, C1.v); @@ -1238,6 +1310,7 @@ namespace Crafter { row56Temp1 = _mm512_unpackhi_epi32(row1TempTemp1, row56Temp1); // A2 B2 C2 D2 return _mm512_add_ps(row12Temp1, row56Temp1); + #endif } } public: @@ -1245,19 +1318,20 @@ namespace Crafter { template ShuffleValues> constexpr static VectorF32 Blend(VectorF32 a, VectorF32 b) { constexpr auto mask = VectorBase::template GetBlendMaskEpi32(); - if constexpr(std::is_same_v::VectorType, __m128>) { + + if constexpr (std::is_same_v::VectorType, __m128>) { return _mm_castsi128_ps(_mm_blend_epi32(_mm_castps_si128(a.v), _mm_castps_si128(b.v), mask)); - } else if constexpr(std::is_same_v::VectorType, __m256>) { - #ifndef __AVX512BW__ - #ifndef __AVX512VL__ - static_assert(false, "No __AVX512BW__ and __AVX512VL__ support"); - #endif - #endif - return _mm256_castsi256_ps(_mm256_mask_blend_epi32(mask, _mm256_castps_si256(a.v), _mm256_castps_si256(b.v))); - } else { + + } else if constexpr (std::is_same_v::VectorType, __m256>) { + return _mm256_castsi256_ps(_mm256_blend_epi32(_mm256_castps_si256(a.v), _mm256_castps_si256(b.v), mask)); + + #ifdef __AVX512F__ + } else if constexpr (std::is_same_v::VectorType, __m512>) { return _mm512_castsi512_ps(_mm512_mask_blend_epi32(mask, _mm512_castps_si512(a.v), _mm512_castps_si512(b.v))); + #endif + } - } + } constexpr static VectorF32 Rotate(VectorF32<3, Packing> v, VectorF32<4, Packing> q) requires(Len == 3) { VectorF32<3, Packing> qv(q); @@ -1314,7 +1388,7 @@ namespace Crafter { export template struct std::formatter> : std::formatter { constexpr auto format(const Crafter::VectorF32& obj, format_context& ctx) const { - std::array::AlignmentElement> vec = obj.Store(); + std::array::AlignmentElement> vec = obj.template Store(); std::string out = "{"; for(std::uint32_t i = 0; i < Packing; i++) { out += "{"; @@ -1327,5 +1401,4 @@ struct std::formatter> : std::formatter::format(out, ctx); } -}; -#endif \ No newline at end of file +}; \ No newline at end of file diff --git a/project.json b/project.json index 73ac6a2..28cd205 100644 --- a/project.json +++ b/project.json @@ -36,6 +36,20 @@ "implementations": ["tests/Vector"], "march": "sapphirerapids", "extends": ["lib-shared"] + }, + { + "name": "Vector-x86-64-v4", + "implementations": ["tests/Vector"], + "march": "x86-64-v4", + "mtune": "generic", + "extends": ["lib-shared"] + }, + { + "name": "Vector-x86-64-v3", + "implementations": ["tests/Vector"], + "march": "x86-64-v3", + "mtune": "generic", + "extends": ["lib-shared"] } ] } \ No newline at end of file diff --git a/tests/Vector.cpp b/tests/Vector.cpp index 49962d6..ce86770 100644 --- a/tests/Vector.cpp +++ b/tests/Vector.cpp @@ -90,7 +90,7 @@ std::string* TestAllCombinations() { if constexpr(total > 0 && (total & (total - 1)) == 0) { { VectorType vec(floats); - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i])) { return new std::string(std::format("Load/Store mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i]), (float)stored[i])); @@ -101,7 +101,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec + vec; - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i] + floats[i])) { return new std::string(std::format("Add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + floats[i]), (float)stored[i])); @@ -112,7 +112,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec - vec; - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], T(0))) { return new std::string(std::format("Subtract mismatch at Len={} Packing={}, Expected: 0, Got: {}", Len, Packing, (float)stored[i])); @@ -123,7 +123,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec * vec; - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i] * floats[i])) { return new std::string(std::format("Multiply mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * floats[i]), (float)stored[i])); @@ -134,7 +134,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec / vec; - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], T(1))) { return new std::string(std::format("Divide mismatch at Len={} Packing={}, Expected: 1, Got: {}", Len, Packing, (float)stored[i])); @@ -145,7 +145,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec + T(2); - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i] + T(2))) { return new std::string(std::format("Scalar add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + T(2)), (float)stored[i])); @@ -156,7 +156,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec - T(2); - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i] - T(2))) { return new std::string(std::format("Scalar add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + T(2)), (float)stored[i])); @@ -167,7 +167,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec * T(2); - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i] * T(2))) { return new std::string(std::format("Scalar multiply mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * T(2)), (float)stored[i])); @@ -178,7 +178,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = vec / T(2); - std::array::AlignmentElement> stored = vec.Store(); + std::array::AlignmentElement> stored = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(stored[i], floats[i] / T(2))) { return new std::string(std::format("Scalar divide mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * T(2)), (float)stored[i])); @@ -225,7 +225,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); vec = -vec; - std::array::AlignmentElement> result = vec.Store(); + std::array::AlignmentElement> result = vec.template Store(); for (std::uint32_t i = 0; i < Len * Packing; i++) { if (!FloatEquals(result[i], -floats[i])) { return new std::string(std::format("Negate mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(-floats[i]), (float)result[i])); @@ -237,7 +237,7 @@ std::string* TestAllCombinations() { VectorType vecA(floats1); VectorType vecB(floats2); VectorType result = VectorType::template Blend()>(vecA, vecB); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for (std::uint32_t i = 0; i < Len; i++) { bool useB = (i % 2 == 0); T expected = useB ? floats2[i]: floats1[i]; @@ -252,7 +252,7 @@ std::string* TestAllCombinations() { VectorType vecB(floats); VectorType vecAdd(floats); VectorType result = VectorType::MulitplyAdd(vecA, vecB, vecAdd); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for (std::uint32_t i = 0; i < Len; i++) { T expected = floats[i] * floats[i] + floats[i]; if (!FloatEquals(stored[i], expected)) { @@ -266,7 +266,7 @@ std::string* TestAllCombinations() { VectorType vecB(floats); VectorType vecSub(floats); VectorType result = VectorType::MulitplySub(vecA, vecB, vecSub); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for (std::uint32_t i = 0; i < Len; i++) { T expected = floats[i] * floats[i] - floats[i]; if (!FloatEquals(stored[i], expected)) { @@ -278,7 +278,7 @@ std::string* TestAllCombinations() { if constexpr(Len > 2){ VectorType vec(floats); VectorType result = vec.template ExtractLo(); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for(std::uint32_t i2 = 0; i2 < Packing; i2++){ for (std::uint32_t i = 0; i < Len-1; i++) { T expected = floats[i2*(Len)+i]; @@ -292,7 +292,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); VectorType result = vec.Sin(); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for(std::uint32_t i2 = 0; i2 < Packing; i2++){ for (std::uint32_t i = 0; i < Len; i++) { T expected = (T)std::sin((float)floats[i2*Len+i]); @@ -306,7 +306,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); VectorType result = vec.Cos(); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for(std::uint32_t i2 = 0; i2 < Packing; i2++){ for (std::uint32_t i = 0; i < Len; i++) { T expected = (T)std::cos((float)floats[i2*Len+i]); @@ -320,8 +320,8 @@ std::string* TestAllCombinations() { { VectorType vec(floats); auto result = vec.SinCos(); - std::array::AlignmentElement> storedSin = std::get<0>(result).Store(); - std::array::AlignmentElement> storedCos = std::get<1>(result).Store(); + std::array::AlignmentElement> storedSin = std::get<0>(result).template Store(); + std::array::AlignmentElement> storedCos = std::get<1>(result).template Store(); for(std::uint32_t i2 = 0; i2 < Packing; i2++){ for (std::uint32_t i = 0; i < Len; i++) { T expected = (T)std::sin((float)floats[i2*Len+i]); @@ -340,7 +340,7 @@ std::string* TestAllCombinations() { { VectorType vec(floats); VectorType result = vec.template Shuffle()>(); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for (std::uint32_t i = 0; i < Len; i++) { T expected = floats[Len - 1 - i]; if (!FloatEquals(stored[i], expected)) { @@ -355,9 +355,9 @@ std::string* TestAllCombinations() { VectorType vec1(floats1); VectorType vec2(floats2); VectorType result = VectorType::Cross(vec1, vec2); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], T(-3)) || !FloatEquals(stored[1], T(6)) || !FloatEquals(stored[2], T(-3))) { - return new std::string(std::format("Cross mismatch at Len={} Packing={}, Expected: -3,6,-3, Got: {},{},{}", Len, Packing, (float)stored[0], (float)stored[1], (float)stored[2])); + return new std::string(std::format("Cross mismatch at Len={} Packing={}, Expected: -3,6,-3, Got: {},{},{}", Len, Packing, (float)stored[0], (float)stored[1], (float)stored[2])); } } if constexpr(4 * Packing < VectorType<1, 1>::MaxElement) { @@ -370,7 +370,7 @@ std::string* TestAllCombinations() { VectorType<3, Packing> vecV(floats); VectorType<4, Packing> vecQ(qData); VectorType<3, Packing> result = VectorType<3, Packing>::Rotate(vecV, vecQ); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); for (std::uint32_t i = 0; i < 3; i++) { if (!FloatEquals(stored[i], floats[i])) { @@ -389,7 +389,7 @@ std::string* TestAllCombinations() { } VectorType<3, Packing> eulerVec(eulerData); VectorType<4, Packing> result = VectorType<4, Packing>::QuanternionFromEuler(eulerVec); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], T(0.63720703)) || !FloatEquals(stored[1], T(0.30688477)) || !FloatEquals(stored[2], T(0.14074707)) || !FloatEquals(stored[3], T(0.6933594))) { @@ -397,14 +397,14 @@ std::string* TestAllCombinations() { } } - if constexpr(Len == 3 && Packing == 1) { + if constexpr(Len == 3 && Packing == 1 && std::same_as) { { VectorType vecA(floats); VectorType vecB = vecA * 2; VectorType vecC = vecA * 3; VectorType vecD = vecA * 4; VectorType<1, 4> result = VectorType::Length(vecA, vecB, vecC, vecD); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], expectedLength[0])) { return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0])); @@ -430,7 +430,7 @@ std::string* TestAllCombinations() { VectorType vecD = vecA * 4; auto result = VectorType::Normalize(vecA, vecB, vecC, vecD); VectorType<1, 4> result2 = VectorType::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result)); - std::array::AlignmentElement> stored = result2.Store(); + std::array::AlignmentElement> stored = result2.template Store(); for(std::uint8_t i = 0; i < Len*Packing; i++) { if (!FloatEquals(stored[i], T(1))) { @@ -440,14 +440,14 @@ std::string* TestAllCombinations() { } } - if constexpr(Len == 3 && Packing == 2) { + if constexpr(Len == 3 && Packing == 2 && std::same_as) { { VectorType vecA(floats); VectorType vecB = vecA * 2; VectorType vecC = vecA * 3; VectorType vecD = vecA * 4; VectorType<1, 8> result = VectorType::Length(vecA, vecB, vecC, vecD); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], expectedLength[0])) { return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0])); @@ -473,7 +473,7 @@ std::string* TestAllCombinations() { VectorType vecD = vecA * 4; auto result = VectorType::Normalize(vecA, vecB, vecC, vecD); VectorType<1, 8> result2 = VectorType::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result)); - std::array::AlignmentElement> stored = result2.Store(); + std::array::AlignmentElement> stored = result2.template Store(); for(std::uint8_t i = 0; i < Len*Packing; i++) { if (!FloatEquals(stored[i], T(1))) { @@ -483,13 +483,13 @@ std::string* TestAllCombinations() { } } - if constexpr(Len == 3 && Packing == 5) { + if constexpr(Len == 3 && Packing == 5 && std::same_as) { { VectorType vecA(floats); VectorType vecB = vecA * 2; VectorType vecC = vecA * 3; VectorType<1, 15> result = VectorType::Length(vecA, vecB, vecC); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], expectedLength[0])) { return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0])); @@ -510,7 +510,7 @@ std::string* TestAllCombinations() { VectorType vecC = vecA * 3; auto result = VectorType::Normalize(vecA, vecB, vecC); VectorType<1, 15> result2 = VectorType::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result)); - std::array::AlignmentElement> stored = result2.Store(); + std::array::AlignmentElement> stored = result2.template Store(); for(std::uint8_t i = 0; i < Len*Packing; i++) { if (!FloatEquals(stored[i], T(1))) { @@ -525,7 +525,7 @@ std::string* TestAllCombinations() { VectorType vecA(floats); VectorType vecE = vecA *2; VectorType<1, Packing*2> result = VectorType::Length(vecA, vecE); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], expectedLength[0])) { return new std::string(std::format("Length 2 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0])); @@ -541,7 +541,7 @@ std::string* TestAllCombinations() { VectorType vecE = vecA * 2; auto result = VectorType::Normalize(vecA, vecE); VectorType<1, Packing*2> result2 = VectorType::Length(std::get<0>(result), std::get<1>(result)); - std::array::AlignmentElement> stored = result2.Store(); + std::array::AlignmentElement> stored = result2.template Store(); for(std::uint8_t i = 0; i < Len*Packing; i++) { if (!FloatEquals(stored[i], T(1))) { @@ -558,7 +558,7 @@ std::string* TestAllCombinations() { VectorType vecE = vecA * 3; VectorType vecG = vecA * 4; VectorType<1, Packing*4> result = VectorType::Length(vecA, vecC, vecE, vecG); - std::array::AlignmentElement> stored = result.Store(); + std::array::AlignmentElement> stored = result.template Store(); if (!FloatEquals(stored[0], expectedLength[0])) { return new std::string(std::format("Length 4 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0])); @@ -584,7 +584,7 @@ std::string* TestAllCombinations() { VectorType vecG = vecA * 4; auto result = VectorType::Normalize(vecA, vecC, vecE, vecG); VectorType<1, Packing*4> result2 = VectorType::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result)); - std::array::AlignmentElement> stored = result2.Store(); + std::array::AlignmentElement> stored = result2.template Store(); for(std::uint8_t i = 0; i < Len*Packing; i++) { if (!FloatEquals(stored[i], T(1))) { @@ -600,8 +600,11 @@ std::string* TestAllCombinations() { extern "C" { std::string* RunTest() { - //std::string* err = TestAllCombinations<_Float16, VectorF16, VectorF16<1, 1>::MaxElement>(); - std::string* err = TestAllCombinations::MaxElement>(); + std::string* err = TestAllCombinations<_Float16, VectorF16, VectorF16<1, 1>::MaxElement>(); + if (err) { + return err; + } + err = TestAllCombinations::MaxElement>(); if (err) { return err; }