x86v3
This commit is contained in:
parent
143b71eeb9
commit
a16f8ffbde
7 changed files with 251 additions and 133 deletions
|
|
@ -20,7 +20,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
export module Crafter.Math:Basic;
|
export module Crafter.Math:Basic;
|
||||||
import std;
|
import std;
|
||||||
import :VectorF16;
|
import :VectorF32;
|
||||||
|
|
||||||
namespace Crafter {
|
namespace Crafter {
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
|
|
||||||
|
|
@ -6,15 +6,19 @@ export module Crafter.Math:Common;
|
||||||
import std;
|
import std;
|
||||||
|
|
||||||
namespace Crafter {
|
namespace Crafter {
|
||||||
|
#ifdef __AVX512FP16__
|
||||||
export template <std::uint8_t Len, std::uint8_t Packing>
|
export template <std::uint8_t Len, std::uint8_t Packing>
|
||||||
struct VectorF16;
|
struct VectorF16;
|
||||||
|
#endif
|
||||||
export template <std::uint8_t Len, std::uint8_t Packing>
|
export template <std::uint8_t Len, std::uint8_t Packing>
|
||||||
struct VectorF32;
|
struct VectorF32;
|
||||||
|
|
||||||
template <std::uint8_t Len, std::uint8_t Packing, typename T>
|
template <std::uint8_t Len, std::uint8_t Packing, typename T>
|
||||||
struct VectorBase {
|
struct VectorBase {
|
||||||
|
#ifdef __AVX512FP16__
|
||||||
template <std::uint8_t L, std::uint8_t P>
|
template <std::uint8_t L, std::uint8_t P>
|
||||||
friend struct VectorF16;
|
friend struct VectorF16;
|
||||||
|
#endif
|
||||||
template <std::uint8_t L, std::uint8_t P>
|
template <std::uint8_t L, std::uint8_t P>
|
||||||
friend struct VectorF32;
|
friend struct VectorF32;
|
||||||
protected:
|
protected:
|
||||||
|
|
|
||||||
|
|
@ -23,13 +23,36 @@ import :MatrixRowMajor;
|
||||||
import std;
|
import std;
|
||||||
|
|
||||||
namespace Crafter {
|
namespace Crafter {
|
||||||
export template<typename T>
|
export
|
||||||
constexpr T IntersectionTestRayTriangle(Vector<T, 3, 0> vert0, Vector<T, 3, 0> vert1, Vector<T, 3, 0> vert2, Vector<T, 3, 0> rayOrigin, Vector<T, 3, 0> rayDir) {
|
constexpr std::array<float, 15> IntersectionTestRayTriangle(
|
||||||
Vector<T, 3, 0> edge1 = vert1 - vert0;
|
VectorF32<3, 5> vertA0,
|
||||||
Vector<T, 3, 0> edge2 = vert2 - vert0;
|
VectorF32<3, 5> vertA1,
|
||||||
|
VectorF32<3, 5> vertA2,
|
||||||
|
|
||||||
|
VectorF32<3, 5> vertB0,
|
||||||
|
VectorF32<3, 5> vertB1,
|
||||||
|
VectorF32<3, 5> vertB2,
|
||||||
|
|
||||||
|
VectorF32<3, 5> vertC0,
|
||||||
|
VectorF32<3, 5> vertC1,
|
||||||
|
VectorF32<3, 5> vertC2,
|
||||||
|
|
||||||
|
VectorF32<3, 5> rayOrigin,
|
||||||
|
VectorF32<3, 5> rayDir
|
||||||
|
) {
|
||||||
|
|
||||||
|
VectorF32<3, Packing> edgeA1 = vertA1 - vertA0;
|
||||||
|
VectorF32<3, Packing> edgeA2 = vertA2 - vertA0;
|
||||||
|
VectorF32<3, Packing> crossA = VectorF32<3, Packing> ::Cross(rayDir, edgeA2);
|
||||||
|
|
||||||
|
VectorF32<3, Packing> edgeB1 = vertB1 - vertB0;
|
||||||
|
VectorF32<3, Packing> edgeB2 = vertB2 - vertB0;
|
||||||
|
VectorF32<3, Packing> crossB = VectorF32<3, Packing> ::Cross(rayDir, edgeB2);
|
||||||
|
|
||||||
|
VectorF32<3, Packing> edgeC1 = vertC1 - vertC0;
|
||||||
|
VectorF32<3, Packing> edgeC2 = vertC2 - vertC0;
|
||||||
|
VectorF32<3, Packing> crossC = VectorF32<3, Packing> ::Cross(rayDir, edgeC2);
|
||||||
|
|
||||||
Vector<T, 3, 0> h = Vector<T, 3, 0>::Cross(rayDir, edge2);
|
|
||||||
T determinant = Vector<T, 3, 0>::Dot(edge1, h);
|
|
||||||
|
|
||||||
if (determinant <= std::numeric_limits<T>::epsilon()) {
|
if (determinant <= std::numeric_limits<T>::epsilon()) {
|
||||||
return std::numeric_limits<T>::max();
|
return std::numeric_limits<T>::max();
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ namespace Crafter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
constexpr std::array<_Float16, VectorBase<Len, Packing, _Float16>::AlignmentElement> Store() const {
|
constexpr std::array<_Float16, VectorBase<Len, Packing, _Float16>::AlignmentElement> Store() const {
|
||||||
std::array<_Float16, VectorBase<Len, Packing, _Float16>::AlignmentElement> returnArray;
|
std::array<_Float16, VectorBase<Len, Packing, _Float16>::AlignmentElement> returnArray;
|
||||||
Store(returnArray.data());
|
Store(returnArray.data());
|
||||||
|
|
@ -1029,7 +1030,7 @@ namespace Crafter {
|
||||||
export template <std::uint32_t Len, std::uint32_t Packing>
|
export template <std::uint32_t Len, std::uint32_t Packing>
|
||||||
struct std::formatter<Crafter::VectorF16<Len, Packing>> : std::formatter<std::string> {
|
struct std::formatter<Crafter::VectorF16<Len, Packing>> : std::formatter<std::string> {
|
||||||
constexpr auto format(const Crafter::VectorF16<Len, Packing>& obj, format_context& ctx) const {
|
constexpr auto format(const Crafter::VectorF16<Len, Packing>& obj, format_context& ctx) const {
|
||||||
std::array<_Float16, Crafter::VectorF16<Len, Packing>::AlignmentElement> vec = obj.Store();
|
std::array<_Float16, Crafter::VectorF16<Len, Packing>::AlignmentElement> vec = obj.template Store<float>();
|
||||||
std::string out = "{";
|
std::string out = "{";
|
||||||
for(std::uint32_t i = 0; i < Packing; i++) {
|
for(std::uint32_t i = 0; i < Packing; i++) {
|
||||||
out += "{";
|
out += "{";
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ export module Crafter.Math:VectorF32;
|
||||||
import std;
|
import std;
|
||||||
import :Common;
|
import :Common;
|
||||||
|
|
||||||
#ifdef __AVX512FP16__
|
|
||||||
namespace Crafter {
|
namespace Crafter {
|
||||||
export template <std::uint8_t Len, std::uint8_t Packing>
|
export template <std::uint8_t Len, std::uint8_t Packing>
|
||||||
struct VectorF32 : public VectorBase<Len, Packing, float> {
|
struct VectorF32 : public VectorBase<Len, Packing, float> {
|
||||||
|
|
@ -38,6 +37,9 @@ namespace Crafter {
|
||||||
constexpr VectorF32(const float* vB) {
|
constexpr VectorF32(const float* vB) {
|
||||||
Load(vB);
|
Load(vB);
|
||||||
};
|
};
|
||||||
|
constexpr VectorF32(const _Float16* vB) {
|
||||||
|
Load(vB);
|
||||||
|
};
|
||||||
constexpr VectorF32(float val) {
|
constexpr VectorF32(float val) {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
this->v = _mm_set1_ps(val);
|
this->v = _mm_set1_ps(val);
|
||||||
|
|
@ -66,8 +68,55 @@ namespace Crafter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr std::array<float, VectorBase<Len, Packing, float>::AlignmentElement> Store() const {
|
constexpr void Load(const _Float16* vB) {
|
||||||
std::array<float, VectorBase<Len, Packing, float>::AlignmentElement> returnArray;
|
#ifdef __F16C__
|
||||||
|
if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
|
this->v = _mm_cvtph_ps(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(vB)));
|
||||||
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
|
this->v = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(vB)));
|
||||||
|
} else {
|
||||||
|
this->v = _mm512_cvtph_ps(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(vB)));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
alignas(64) float tmp[Len];
|
||||||
|
for (int i = 0; i < Len; ++i)
|
||||||
|
tmp[i] = static_cast<float>(vB[i]);
|
||||||
|
if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
|
this->v = _mm_load_ps(tmp);
|
||||||
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
|
this->v = _mm256_load_ps(tmp);
|
||||||
|
} else {
|
||||||
|
this->v = _mm512_load_ps(tmp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr void Store(_Float16* vB) const {
|
||||||
|
#ifdef __F16C__
|
||||||
|
if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
|
_mm_storel_epi64(reinterpret_cast<__m128i*>(vB), _mm_cvtps_ph(this->v, _MM_FROUND_TO_NEAREST_INT));
|
||||||
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i*>(vB), _mm256_cvtps_ph(this->v, _MM_FROUND_TO_NEAREST_INT));
|
||||||
|
} else {
|
||||||
|
_mm256_storeu_si256(reinterpret_cast<__m256i*>(vB), _mm512_cvtps_ph(this->v, _MM_FROUND_TO_NEAREST_INT));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
alignas(64) float tmp[Len];
|
||||||
|
if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
|
_mm_store_ps(tmp, this->v);
|
||||||
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
|
_mm256_store_ps(tmp, this->v);
|
||||||
|
} else {
|
||||||
|
_mm512_store_ps(tmp, this->v);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < Len; ++i)
|
||||||
|
vB[i] = static_cast<_Float16>(tmp[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr std::array<T, VectorBase<Len, Packing, float>::AlignmentElement> Store() const {
|
||||||
|
std::array<T, VectorBase<Len, Packing, float>::AlignmentElement> returnArray;
|
||||||
Store(returnArray.data());
|
Store(returnArray.data());
|
||||||
return returnArray;
|
return returnArray;
|
||||||
}
|
}
|
||||||
|
|
@ -96,36 +145,41 @@ namespace Crafter {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<BLen, BPacking, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<BLen, BPacking, float>::VectorType, __m128>) {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask = VectorBase<Len, Packing, float>::template GetExtractLoMaskEpi8<BLen>();
|
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask = VectorBase<Len, Packing, float>::template GetExtractLoMaskEpi8<BLen>();
|
||||||
__m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data());
|
__m128i shuffleVec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(shuffleMask.data()));
|
||||||
return VectorF32<BLen, BPacking>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
return VectorF32<BLen, BPacking>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask =VectorBase<Len, Packing, float>::template GetExtractLoMaskepi32<BLen>();
|
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask =VectorBase<Len, Packing, float>::template GetExtractLoMaskepi32<BLen>();
|
||||||
__m256i permIdx = _mm256_loadu_epi32(permMask.data());
|
__m256i permIdx = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(permMask.data()));
|
||||||
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm_castps_si256(this->v));
|
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm_castps_si256(this->v));
|
||||||
return VectorF32<BLen, BPacking>(_mm_castsi128_ps(_mm256_castsi256_si128(result)));
|
return VectorF32<BLen, BPacking>(_mm_castsi128_ps(_mm256_castsi256_si128(result)));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
||||||
__m512i permIdx = _mm512_loadu_epi32(permMask.data());
|
__m512i permIdx = _mm512_loadu_epi32(permMask.data());
|
||||||
__m512i result = _mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v));
|
__m512i result = _mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v));
|
||||||
return VectorF32<BLen, BPacking>(_mm_castsi128_ps(_mm512_castsi512_si128(result)));
|
return VectorF32<BLen, BPacking>(_mm_castsi128_ps(_mm512_castsi512_si128(result)));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<BLen, BPacking, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<BLen, BPacking, float>::VectorType, __m256>) {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
||||||
__m256i permIdx = _mm256_loadu_epi32(permMask.data());
|
__m256i permIdx = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(permMask.data()));
|
||||||
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castsi128_si256(_mm_castps_si128(this->v)));
|
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castsi128_si256(_mm_castps_si128(this->v)));
|
||||||
return VectorF32<BLen, BPacking>(_mm256_castsi256_ps(result));
|
return VectorF32<BLen, BPacking>(_mm256_castsi256_ps(result));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
||||||
__m256i permIdx = _mm256_loadu_epi32(permMask.data());
|
__m256i permIdx = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(permMask.data()));
|
||||||
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v));
|
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v));
|
||||||
return VectorF32<BLen, BPacking>(_mm256_castsi256_ps(result));
|
return VectorF32<BLen, BPacking>(_mm256_castsi256_ps(result));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
||||||
__m256i permIdx = _mm512_loadu_epi32(permMask.data());
|
__m256i permIdx = _mm512_loadu_epi32(permMask.data());
|
||||||
__m256i result = _mm512_permutexvar_epi32(permIdx, _mm512_castsi512_si256(_mm512_castps_si512(this->v)));
|
__m256i result = _mm512_permutexvar_epi32(permIdx, _mm512_castsi512_si256(_mm512_castps_si512(this->v)));
|
||||||
return VectorF32<BLen, BPacking>(_mm256_castsi256_ps(result));
|
return VectorF32<BLen, BPacking>(_mm256_castsi256_ps(result));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<BLen, BPacking, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<BLen, BPacking, float>::VectorType, __m128>) {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
constexpr std::array<std::uint32_t, VectorBase<BLen, Packing, float>::AlignmentElement> permMask = VectorBase<BLen, Packing, float>::template GetExtractLoMaskEpi32<BLen>();
|
||||||
|
|
@ -143,6 +197,7 @@ namespace Crafter {
|
||||||
__m512i result = _mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v));
|
__m512i result = _mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v));
|
||||||
return VectorF32<BLen, BPacking>(_mm512_castsi512_ps(result));
|
return VectorF32<BLen, BPacking>(_mm512_castsi512_ps(result));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -272,25 +327,27 @@ namespace Crafter {
|
||||||
return Negate<VectorBase<Len, Packing, float>::GetAllTrue()>();
|
return Negate<VectorBase<Len, Packing, float>::GetAllTrue()>();
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool operator==(VectorF32<Len, Packing> b) const {
|
constexpr bool operator==(VectorF32<Len, Packing> b) const {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
return _mm_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 15;
|
#ifdef __AVX512VL__
|
||||||
|
return _mm_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 0xF;
|
||||||
|
#else
|
||||||
|
return _mm_movemask_ps(_mm_cmpeq_ps(this->v, b.v)) == 0xF;
|
||||||
|
#endif
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return _mm256_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 255;
|
#ifdef __AVX512VL__
|
||||||
|
return _mm256_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 0xFF;
|
||||||
|
#else
|
||||||
|
return _mm256_movemask_ps(_mm256_cmp_ps(this->v, b.v, _CMP_EQ_OQ)) == 0xFF;
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
return _mm512_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 65535;
|
return _mm512_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) == 0xFFFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool operator!=(VectorF32<Len, Packing> b) const {
|
constexpr bool operator!=(VectorF32<Len, Packing> b) const {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
return !(*this == b);
|
||||||
return _mm_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) != 15;
|
}
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
|
||||||
return _mm256_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) != 255;
|
|
||||||
} else {
|
|
||||||
return _mm512_cmp_ps_mask(this->v, b.v, _CMP_EQ_OQ) != 65535;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<std::uint32_t ExtractLen>
|
template<std::uint32_t ExtractLen>
|
||||||
constexpr VectorF32<ExtractLen, Packing> ExtractLo() const {
|
constexpr VectorF32<ExtractLen, Packing> ExtractLo() const {
|
||||||
|
|
@ -301,7 +358,7 @@ namespace Crafter {
|
||||||
return VectorF32<ExtractLen, Packing>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
return VectorF32<ExtractLen, Packing>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetExtractLoMaskEpi32<ExtractLen>();
|
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetExtractLoMaskEpi32<ExtractLen>();
|
||||||
__m256i permIdx = _mm256_loadu_epi32(permMask.data());
|
__m256i permIdx = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(permMask.data()));
|
||||||
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v));
|
__m256i result = _mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v));
|
||||||
if constexpr(std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m128>) {
|
||||||
return VectorF32<ExtractLen, Packing>(_mm256_castps256_ps128(_mm256_castsi256_ps(result)));
|
return VectorF32<ExtractLen, Packing>(_mm256_castps256_ps128(_mm256_castsi256_ps(result)));
|
||||||
|
|
@ -323,10 +380,12 @@ namespace Crafter {
|
||||||
} else {
|
} else {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256> && std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256> && std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m128>) {
|
||||||
return VectorF32<ExtractLen, Packing>(_mm256_castps256_ps128(this->v));
|
return VectorF32<ExtractLen, Packing>(_mm256_castps256_ps128(this->v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m512> && std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m128>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m512> && std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m128>) {
|
||||||
return VectorF32<ExtractLen, Packing>(_mm512_castps512_ps128(this->v));
|
return VectorF32<ExtractLen, Packing>(_mm512_castps512_ps128(this->v));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m512> && std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m512> && std::is_same_v<typename VectorBase<ExtractLen, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<ExtractLen, Packing>(_mm512_castps512_ps256(this->v));
|
return VectorF32<ExtractLen, Packing>(_mm512_castps512_ps256(this->v));
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<ExtractLen, Packing>(this->v);
|
return VectorF32<ExtractLen, Packing>(this->v);
|
||||||
}
|
}
|
||||||
|
|
@ -338,8 +397,10 @@ namespace Crafter {
|
||||||
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::cos_f32x4(this->v));
|
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::cos_f32x4(this->v));
|
||||||
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::cos_f32x8(this->v));
|
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::cos_f32x8(this->v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::cos_f32x16(this->v));
|
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::cos_f32x16(this->v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -348,8 +409,10 @@ namespace Crafter {
|
||||||
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::sin_f32x4(this->v));
|
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::sin_f32x4(this->v));
|
||||||
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::sin_f32x8(this->v));
|
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::sin_f32x8(this->v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::sin_f32x16(this->v));
|
return VectorF32<Len, Packing>(VectorBase<Len, Packing, float>::sin_f32x16(this->v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -369,7 +432,7 @@ namespace Crafter {
|
||||||
VectorF32<Len, Packing>(s),
|
VectorF32<Len, Packing>(s),
|
||||||
VectorF32<Len, Packing>(c)
|
VectorF32<Len, Packing>(c)
|
||||||
};
|
};
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
__m512 s, c;
|
__m512 s, c;
|
||||||
VectorBase<Len, Packing, float>::sincos_f32x16(this->v, s, c);
|
VectorBase<Len, Packing, float>::sincos_f32x16(this->v, s, c);
|
||||||
|
|
@ -377,6 +440,7 @@ namespace Crafter {
|
||||||
VectorF32<Len, Packing>(s),
|
VectorF32<Len, Packing>(s),
|
||||||
VectorF32<Len, Packing>(c)
|
VectorF32<Len, Packing>(c)
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -384,11 +448,13 @@ namespace Crafter {
|
||||||
constexpr VectorF32<Len, Packing> Negate() {
|
constexpr VectorF32<Len, Packing> Negate() {
|
||||||
std::array<float, VectorBase<Len, Packing, float>::AlignmentElement> mask = VectorBase<Len, Packing, float>::template GetNegateMask<values>();
|
std::array<float, VectorBase<Len, Packing, float>::AlignmentElement> mask = VectorBase<Len, Packing, float>::template GetNegateMask<values>();
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_xor_si128(_mm_castps_si128(this->v), _mm_loadu_epi32(mask.data()))));
|
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_xor_si128(_mm_castps_si128(this->v), _mm_loadu_si128(reinterpret_cast<__m128i*>(mask.data())))));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(this->v), _mm256_loadu_epi32(mask.data()))));
|
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(this->v), _mm256_loadu_si256(reinterpret_cast<__m256i*>(mask.data())))));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(this->v), _mm512_loadu_epi32(mask.data()))));
|
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(this->v), _mm512_loadu_epi32(mask.data()))));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -397,8 +463,10 @@ namespace Crafter {
|
||||||
return VectorF32<Len, Packing>(_mm_fmadd_ps(a.v, b.v, add.v));
|
return VectorF32<Len, Packing>(_mm_fmadd_ps(a.v, b.v, add.v));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<Len, Packing>(_mm256_fmadd_ps(a.v, b.v, add.v));
|
return VectorF32<Len, Packing>(_mm256_fmadd_ps(a.v, b.v, add.v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<Len, Packing>(_mm512_fmadd_ps(a.v, b.v, add.v));
|
return VectorF32<Len, Packing>(_mm512_fmadd_ps(a.v, b.v, add.v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -407,55 +475,22 @@ namespace Crafter {
|
||||||
return VectorF32<Len, Packing>(_mm_fmsub_ps(a.v, b.v, sub.v));
|
return VectorF32<Len, Packing>(_mm_fmsub_ps(a.v, b.v, sub.v));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<Len, Packing>(_mm256_fmsub_ps(a.v, b.v, sub.v));
|
return VectorF32<Len, Packing>(_mm256_fmsub_ps(a.v, b.v, sub.v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<Len, Packing>(_mm512_fmsub_ps(a.v, b.v, sub.v));
|
return VectorF32<Len, Packing>(_mm512_fmsub_ps(a.v, b.v, sub.v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static VectorF32<Len, Packing> Cross(VectorF32<Len, Packing> a, VectorF32<Len, Packing> b) requires(Len == 3) {
|
constexpr static VectorF32<Len, Packing> Cross(VectorF32<Len, Packing> a, VectorF32<Len, Packing> b) requires(Len == 3) {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
VectorF32<Len, Packing> row1 = a.template Shuffle<{{1,2,0}}>();
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask1 = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<{{1,2,0}}>();
|
VectorF32<Len, Packing> row4 = b.template Shuffle<{{1,2,0}}>();
|
||||||
__m128i shuffleVec1 = _mm_loadu_epi8(shuffleMask1.data());
|
|
||||||
__m128 row1 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a.v), shuffleVec1));
|
|
||||||
__m128 row4 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(b.v), shuffleVec1));
|
|
||||||
|
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask3 = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<{{2,0,1}}>();
|
VectorF32<Len, Packing> row3 = a.template Shuffle<{{2,0,1}}>();
|
||||||
__m128i shuffleVec3 = _mm_loadu_epi8(shuffleMask3.data());
|
VectorF32<Len, Packing> row2 = b.template Shuffle<{{2,0,1}}>();
|
||||||
__m128 row3 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a.v), shuffleVec3));
|
|
||||||
__m128 row2 = _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(b.v), shuffleVec3));
|
|
||||||
|
|
||||||
__m128 result = _mm_mul_ps(row3, row4);
|
VectorF32<Len, Packing> result = row3 * row4;
|
||||||
return _mm_fmsub_ps(row1,row2,result);
|
return VectorF32<Len, Packing>::MulitplySub(row1, row2, result);
|
||||||
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask1 = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<{{1,2,0}}>();
|
|
||||||
__m512i shuffleVec1 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask1.data()));
|
|
||||||
__m256 row1 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(a.v)), shuffleVec1)));
|
|
||||||
__m256 row4 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(b.v)), shuffleVec1)));
|
|
||||||
|
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask3 = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<{{2,0,1}}>();
|
|
||||||
|
|
||||||
__m512i shuffleVec3 = _mm512_castsi256_si512(_mm256_loadu_epi8(shuffleMask3.data()));
|
|
||||||
__m256 row3 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(a.v)), shuffleVec3)));
|
|
||||||
__m256 row2 = _mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(b.v)), shuffleVec3)));
|
|
||||||
|
|
||||||
__m256 result = _mm256_mul_ps(row3, row4);
|
|
||||||
return _mm256_fmsub_ps(row1,row2,result);
|
|
||||||
} else {
|
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask1 = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<{{1,2,0}}>();
|
|
||||||
|
|
||||||
__m512i shuffleVec1 = _mm512_loadu_epi8(shuffleMask1.data());
|
|
||||||
__m512 row1 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(a.v), shuffleVec1));
|
|
||||||
__m512 row4 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(b.v), shuffleVec1));
|
|
||||||
|
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask3 = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<{{2,0,1}}>();
|
|
||||||
|
|
||||||
__m512i shuffleVec3 = _mm512_loadu_epi8(shuffleMask3.data());
|
|
||||||
__m512 row3 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(a.v), shuffleVec3));
|
|
||||||
__m512 row2 = _mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(b.v), shuffleVec3));
|
|
||||||
|
|
||||||
__m512 result = _mm512_mul_ps(row3, row4);
|
|
||||||
return _mm512_fmsub_ps(row1,row2,result);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <const std::array<std::uint8_t, Len> ShuffleValues>
|
template <const std::array<std::uint8_t, Len> ShuffleValues>
|
||||||
|
|
@ -465,21 +500,31 @@ namespace Crafter {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(this->v), imm)));
|
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(this->v), imm)));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_shuffle_epi32(_mm256_castps_si256(this->v), imm)));
|
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_shuffle_epi32(_mm256_castps_si256(this->v), imm)));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_shuffle_epi32(_mm512_castps_si512(this->v), imm)));
|
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_shuffle_epi32(_mm512_castps_si512(this->v), imm)));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
} else if constexpr(VectorBase<Len, Packing, float>::template CheckEpi8Shuffle<ShuffleValues>()){
|
} else if constexpr(VectorBase<Len, Packing, float>::template CheckEpi8Shuffle<ShuffleValues>()) {
|
||||||
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<ShuffleValues>();
|
constexpr std::array<std::uint8_t, VectorBase<Len, Packing, float>::Alignment> shuffleMask = VectorBase<Len, Packing, float>::template GetShuffleMaskEpi8<ShuffleValues>();
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
__m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data());
|
__m128i shuffleVec = _mm_loadu_epi8(shuffleMask.data());
|
||||||
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
__m256i shuffleVec = _mm256_loadu_epi8(shuffleMask.data());
|
#ifdef __AVX512BW__
|
||||||
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(this->v)), _mm512_castsi256_si512(shuffleVec)))));
|
__m256i shuffleVec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(shuffleMask.data()));
|
||||||
|
return VectorF32<Len, Packing>(_mm256_castsi256_ps( _mm512_castsi512_si256(_mm512_shuffle_epi8(_mm512_castsi256_si512(_mm256_castps_si256(this->v)),_mm512_castsi256_si512(shuffleVec)))));
|
||||||
|
#else
|
||||||
|
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetPermuteMaskEpi32<ShuffleValues>();
|
||||||
|
__m256i permIdx = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(permMask.data()));
|
||||||
|
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_permutevar8x32_epi32(_mm256_castps_si256(this->v), permIdx)));
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
__m512i shuffleVec = _mm512_loadu_epi8(shuffleMask.data());
|
__m512i shuffleVec = _mm512_loadu_si512(reinterpret_cast<const __m256i*>(shuffleMask.data()));
|
||||||
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(this->v), shuffleVec)));
|
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_shuffle_epi8(_mm512_castps_si512(this->v), shuffleVec)));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
|
|
@ -488,15 +533,17 @@ namespace Crafter {
|
||||||
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
return VectorF32<Len, Packing>(_mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(this->v), shuffleVec)));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetPermuteMaskEpi32<ShuffleValues>();
|
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetPermuteMaskEpi32<ShuffleValues>();
|
||||||
__m256i permIdx = _mm256_loadu_epi32(permMask.data());
|
__m256i permIdx = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(permMask.data()));
|
||||||
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_permutexvar_epi32(permIdx, _mm256_castps_si256(this->v))));
|
return VectorF32<Len, Packing>(_mm256_castsi256_ps(_mm256_permutevar8x32_epi32(_mm256_castps_si256(this->v), permIdx)));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetPermuteMaskEpi32<ShuffleValues>();
|
constexpr std::array<std::uint32_t, VectorBase<Len, Packing, float>::AlignmentElement> permMask = VectorBase<Len, Packing, float>::template GetPermuteMaskEpi32<ShuffleValues>();
|
||||||
__m512i permIdx = _mm512_loadu_epi32(permMask.data());
|
__m512i permIdx = _mm512_loadu_epi32(permMask.data());
|
||||||
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v))));
|
return VectorF32<Len, Packing>(_mm512_castsi512_ps(_mm512_permutexvar_epi32(permIdx, _mm512_castps_si512(this->v))));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
|
|
@ -539,6 +586,7 @@ namespace Crafter {
|
||||||
_mm256_mul_ps(C.v, fLenghtC.v),
|
_mm256_mul_ps(C.v, fLenghtC.v),
|
||||||
_mm256_mul_ps(D.v, fLenghtD.v)
|
_mm256_mul_ps(D.v, fLenghtD.v)
|
||||||
};
|
};
|
||||||
|
#if defined(__AVX512F__)
|
||||||
} else {
|
} else {
|
||||||
VectorF32<1, 16> lenght = LengthNoShuffle(A, C, B, D);
|
VectorF32<1, 16> lenght = LengthNoShuffle(A, C, B, D);
|
||||||
constexpr float oneArr[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
constexpr float oneArr[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||||
|
|
@ -558,6 +606,7 @@ namespace Crafter {
|
||||||
VectorF32<Len, Packing>(_mm512_mul_ps(C.v, fLenghtC.v)),
|
VectorF32<Len, Packing>(_mm512_mul_ps(C.v, fLenghtC.v)),
|
||||||
VectorF32<Len, Packing>(_mm512_mul_ps(D.v, fLenghtD.v)),
|
VectorF32<Len, Packing>(_mm512_mul_ps(D.v, fLenghtD.v)),
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -609,6 +658,7 @@ namespace Crafter {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __AVX512F__
|
||||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
VectorF32<Len, Packing> B,
|
VectorF32<Len, Packing> B,
|
||||||
|
|
@ -629,6 +679,7 @@ namespace Crafter {
|
||||||
_mm512_mul_ps(C.v, fLenghtC.v),
|
_mm512_mul_ps(C.v, fLenghtC.v),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
constexpr static std::tuple<VectorF32<Len, Packing>, VectorF32<Len, Packing>> Normalize(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
|
|
@ -660,6 +711,7 @@ namespace Crafter {
|
||||||
_mm256_mul_ps(A.v, fLenghtA.v),
|
_mm256_mul_ps(A.v, fLenghtA.v),
|
||||||
_mm256_mul_ps(B.v, fLenghtB.v),
|
_mm256_mul_ps(B.v, fLenghtB.v),
|
||||||
};
|
};
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
VectorF32<1, 16> lenght = LengthNoShuffle(A, B);
|
VectorF32<1, 16> lenght = LengthNoShuffle(A, B);
|
||||||
constexpr float oneArr[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
constexpr float oneArr[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||||
|
|
@ -673,6 +725,7 @@ namespace Crafter {
|
||||||
_mm512_mul_ps(A.v, fLenghtA.v),
|
_mm512_mul_ps(A.v, fLenghtA.v),
|
||||||
_mm512_mul_ps(B.v, fLenghtB.v),
|
_mm512_mul_ps(B.v, fLenghtB.v),
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -712,6 +765,7 @@ namespace Crafter {
|
||||||
return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __AVX512F__
|
||||||
constexpr static VectorF32<1, 15> Length(
|
constexpr static VectorF32<1, 15> Length(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
VectorF32<Len, Packing> B,
|
VectorF32<Len, Packing> B,
|
||||||
|
|
@ -720,6 +774,7 @@ namespace Crafter {
|
||||||
VectorF32<1, 15> lenghtSq = LengthSq(A, B, C);
|
VectorF32<1, 15> lenghtSq = LengthSq(A, B, C);
|
||||||
return VectorF32<1, 15>(_mm512_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, 15>(_mm512_sqrt_ps(lenghtSq.v));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
constexpr static VectorF32<1, Packing*2> Length(
|
constexpr static VectorF32<1, Packing*2> Length(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
|
|
@ -730,8 +785,10 @@ namespace Crafter {
|
||||||
return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<1, Packing*2>(_mm256_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*2>(_mm256_sqrt_ps(lenghtSq.v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<1, Packing*2>(_mm512_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*2>(_mm512_sqrt_ps(lenghtSq.v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -762,6 +819,7 @@ namespace Crafter {
|
||||||
return Dot(A, A, B, B, C, C, D, D);
|
return Dot(A, A, B, B, C, C, D, D);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __AVX512F__
|
||||||
constexpr static VectorF32<1, 15> LengthSq(
|
constexpr static VectorF32<1, 15> LengthSq(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
VectorF32<Len, Packing> B,
|
VectorF32<Len, Packing> B,
|
||||||
|
|
@ -769,6 +827,7 @@ namespace Crafter {
|
||||||
) requires(Len == 3 && Packing == 5) {
|
) requires(Len == 3 && Packing == 5) {
|
||||||
return Dot(A, A, B, B, C, C);
|
return Dot(A, A, B, B, C, C);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
constexpr static VectorF32<1, Packing*2> LengthSq(
|
constexpr static VectorF32<1, Packing*2> LengthSq(
|
||||||
VectorF32<Len, Packing> A,
|
VectorF32<Len, Packing> A,
|
||||||
|
|
@ -792,6 +851,7 @@ namespace Crafter {
|
||||||
1,5,3,7,
|
1,5,3,7,
|
||||||
}}>();
|
}}>();
|
||||||
return vec.v;
|
return vec.v;
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
VectorF32<16, 1> vec(DotNoShuffle(A0, A1, B0, B1, C0, C1, D0, D1).v);
|
VectorF32<16, 1> vec(DotNoShuffle(A0, A1, B0, B1, C0, C1, D0, D1).v);
|
||||||
vec = vec.template Shuffle<{{
|
vec = vec.template Shuffle<{{
|
||||||
|
|
@ -801,6 +861,7 @@ namespace Crafter {
|
||||||
3,7,11,15
|
3,7,11,15
|
||||||
}}>();
|
}}>();
|
||||||
return vec.v;
|
return vec.v;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -955,6 +1016,7 @@ namespace Crafter {
|
||||||
return row1;
|
return row1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __AVX512F__
|
||||||
constexpr static VectorF32<1, 15> Dot(
|
constexpr static VectorF32<1, 15> Dot(
|
||||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||||
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
VectorF32<Len, Packing> B0, VectorF32<Len, Packing> B1,
|
||||||
|
|
@ -1044,6 +1106,7 @@ namespace Crafter {
|
||||||
|
|
||||||
return row1;
|
return row1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
constexpr static VectorF32<1, Packing*2> Dot(
|
constexpr static VectorF32<1, Packing*2> Dot(
|
||||||
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
VectorF32<Len, Packing> A0, VectorF32<Len, Packing> A1,
|
||||||
|
|
@ -1058,6 +1121,7 @@ namespace Crafter {
|
||||||
2,3, 6,7,
|
2,3, 6,7,
|
||||||
}}>();
|
}}>();
|
||||||
return vec.v;
|
return vec.v;
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
VectorF32<16, 1> vec(DotNoShuffle(A0, A1, C0, C1).v);
|
VectorF32<16, 1> vec(DotNoShuffle(A0, A1, C0, C1).v);
|
||||||
vec = vec.template Shuffle<{{
|
vec = vec.template Shuffle<{{
|
||||||
|
|
@ -1067,6 +1131,7 @@ namespace Crafter {
|
||||||
10,11, 14,15
|
10,11, 14,15
|
||||||
}}>();
|
}}>();
|
||||||
return vec.v;
|
return vec.v;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1083,8 +1148,10 @@ namespace Crafter {
|
||||||
return VectorF32<1, Packing*4>(_mm_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*4>(_mm_sqrt_ps(lenghtSq.v));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*4>(_mm256_sqrt_ps(lenghtSq.v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<1, Packing*4>(_mm512_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*4>(_mm512_sqrt_ps(lenghtSq.v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1097,8 +1164,10 @@ namespace Crafter {
|
||||||
return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*2>(_mm_sqrt_ps(lenghtSq.v));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
return VectorF32<1, Packing*2>(_mm256_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*2>(_mm256_sqrt_ps(lenghtSq.v));
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
return VectorF32<1, Packing*2>(_mm512_sqrt_ps(lenghtSq.v));
|
return VectorF32<1, Packing*2>(_mm512_sqrt_ps(lenghtSq.v));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1172,6 +1241,7 @@ namespace Crafter {
|
||||||
row1 = _mm256_add_ps(row1, row4);
|
row1 = _mm256_add_ps(row1, row4);
|
||||||
|
|
||||||
return row1;
|
return row1;
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
__m512 mulA = _mm512_mul_ps(A0.v, A1.v);
|
__m512 mulA = _mm512_mul_ps(A0.v, A1.v);
|
||||||
__m512 mulB = _mm512_mul_ps(B0.v, B1.v);
|
__m512 mulB = _mm512_mul_ps(B0.v, B1.v);
|
||||||
|
|
@ -1195,6 +1265,7 @@ namespace Crafter {
|
||||||
row1 = _mm512_add_ps(row1, row4);
|
row1 = _mm512_add_ps(row1, row4);
|
||||||
|
|
||||||
return row1;
|
return row1;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1226,6 +1297,7 @@ namespace Crafter {
|
||||||
row56Temp1 = _mm256_unpackhi_epi32(row1TempTemp1, row56Temp1); // A2 B2 C2 D2
|
row56Temp1 = _mm256_unpackhi_epi32(row1TempTemp1, row56Temp1); // A2 B2 C2 D2
|
||||||
|
|
||||||
return _mm256_add_ps(row12Temp1, row56Temp1);
|
return _mm256_add_ps(row12Temp1, row56Temp1);
|
||||||
|
#ifdef __AVX512F__
|
||||||
} else {
|
} else {
|
||||||
__m512 mulA = _mm512_mul_ps(A0.v, A1.v);
|
__m512 mulA = _mm512_mul_ps(A0.v, A1.v);
|
||||||
__m512 mulC = _mm512_mul_ps(C0.v, C1.v);
|
__m512 mulC = _mm512_mul_ps(C0.v, C1.v);
|
||||||
|
|
@ -1238,6 +1310,7 @@ namespace Crafter {
|
||||||
row56Temp1 = _mm512_unpackhi_epi32(row1TempTemp1, row56Temp1); // A2 B2 C2 D2
|
row56Temp1 = _mm512_unpackhi_epi32(row1TempTemp1, row56Temp1); // A2 B2 C2 D2
|
||||||
|
|
||||||
return _mm512_add_ps(row12Temp1, row56Temp1);
|
return _mm512_add_ps(row12Temp1, row56Temp1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
|
@ -1245,19 +1318,20 @@ namespace Crafter {
|
||||||
template <std::array<bool, Len> ShuffleValues>
|
template <std::array<bool, Len> ShuffleValues>
|
||||||
constexpr static VectorF32<Len, Packing> Blend(VectorF32<Len, Packing> a, VectorF32<Len, Packing> b) {
|
constexpr static VectorF32<Len, Packing> Blend(VectorF32<Len, Packing> a, VectorF32<Len, Packing> b) {
|
||||||
constexpr auto mask = VectorBase<Len, Packing, float>::template GetBlendMaskEpi32<ShuffleValues>();
|
constexpr auto mask = VectorBase<Len, Packing, float>::template GetBlendMaskEpi32<ShuffleValues>();
|
||||||
if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
|
||||||
|
if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m128>) {
|
||||||
return _mm_castsi128_ps(_mm_blend_epi32(_mm_castps_si128(a.v), _mm_castps_si128(b.v), mask));
|
return _mm_castsi128_ps(_mm_blend_epi32(_mm_castps_si128(a.v), _mm_castps_si128(b.v), mask));
|
||||||
} else if constexpr(std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
|
||||||
#ifndef __AVX512BW__
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m256>) {
|
||||||
#ifndef __AVX512VL__
|
return _mm256_castsi256_ps(_mm256_blend_epi32(_mm256_castps_si256(a.v), _mm256_castps_si256(b.v), mask));
|
||||||
static_assert(false, "No __AVX512BW__ and __AVX512VL__ support");
|
|
||||||
#endif
|
#ifdef __AVX512F__
|
||||||
#endif
|
} else if constexpr (std::is_same_v<typename VectorBase<Len, Packing, float>::VectorType, __m512>) {
|
||||||
return _mm256_castsi256_ps(_mm256_mask_blend_epi32(mask, _mm256_castps_si256(a.v), _mm256_castps_si256(b.v)));
|
|
||||||
} else {
|
|
||||||
return _mm512_castsi512_ps(_mm512_mask_blend_epi32(mask, _mm512_castps_si512(a.v), _mm512_castps_si512(b.v)));
|
return _mm512_castsi512_ps(_mm512_mask_blend_epi32(mask, _mm512_castps_si512(a.v), _mm512_castps_si512(b.v)));
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static VectorF32<Len, Packing> Rotate(VectorF32<3, Packing> v, VectorF32<4, Packing> q) requires(Len == 3) {
|
constexpr static VectorF32<Len, Packing> Rotate(VectorF32<3, Packing> v, VectorF32<4, Packing> q) requires(Len == 3) {
|
||||||
VectorF32<3, Packing> qv(q);
|
VectorF32<3, Packing> qv(q);
|
||||||
|
|
@ -1314,7 +1388,7 @@ namespace Crafter {
|
||||||
export template <std::uint32_t Len, std::uint32_t Packing>
|
export template <std::uint32_t Len, std::uint32_t Packing>
|
||||||
struct std::formatter<Crafter::VectorF32<Len, Packing>> : std::formatter<std::string> {
|
struct std::formatter<Crafter::VectorF32<Len, Packing>> : std::formatter<std::string> {
|
||||||
constexpr auto format(const Crafter::VectorF32<Len, Packing>& obj, format_context& ctx) const {
|
constexpr auto format(const Crafter::VectorF32<Len, Packing>& obj, format_context& ctx) const {
|
||||||
std::array<float, Crafter::VectorF32<Len, Packing>::AlignmentElement> vec = obj.Store();
|
std::array<float, Crafter::VectorF32<Len, Packing>::AlignmentElement> vec = obj.template Store<float>();
|
||||||
std::string out = "{";
|
std::string out = "{";
|
||||||
for(std::uint32_t i = 0; i < Packing; i++) {
|
for(std::uint32_t i = 0; i < Packing; i++) {
|
||||||
out += "{";
|
out += "{";
|
||||||
|
|
@ -1327,5 +1401,4 @@ struct std::formatter<Crafter::VectorF32<Len, Packing>> : std::formatter<std::st
|
||||||
out += "}";
|
out += "}";
|
||||||
return std::formatter<std::string>::format(out, ctx);
|
return std::formatter<std::string>::format(out, ctx);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
14
project.json
14
project.json
|
|
@ -36,6 +36,20 @@
|
||||||
"implementations": ["tests/Vector"],
|
"implementations": ["tests/Vector"],
|
||||||
"march": "sapphirerapids",
|
"march": "sapphirerapids",
|
||||||
"extends": ["lib-shared"]
|
"extends": ["lib-shared"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Vector-x86-64-v4",
|
||||||
|
"implementations": ["tests/Vector"],
|
||||||
|
"march": "x86-64-v4",
|
||||||
|
"mtune": "generic",
|
||||||
|
"extends": ["lib-shared"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Vector-x86-64-v3",
|
||||||
|
"implementations": ["tests/Vector"],
|
||||||
|
"march": "x86-64-v3",
|
||||||
|
"mtune": "generic",
|
||||||
|
"extends": ["lib-shared"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -90,7 +90,7 @@ std::string* TestAllCombinations() {
|
||||||
if constexpr(total > 0 && (total & (total - 1)) == 0) {
|
if constexpr(total > 0 && (total & (total - 1)) == 0) {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i])) {
|
if (!FloatEquals(stored[i], floats[i])) {
|
||||||
return new std::string(std::format("Load/Store mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i]), (float)stored[i]));
|
return new std::string(std::format("Load/Store mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i]), (float)stored[i]));
|
||||||
|
|
@ -101,7 +101,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec + vec;
|
vec = vec + vec;
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i] + floats[i])) {
|
if (!FloatEquals(stored[i], floats[i] + floats[i])) {
|
||||||
return new std::string(std::format("Add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + floats[i]), (float)stored[i]));
|
return new std::string(std::format("Add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + floats[i]), (float)stored[i]));
|
||||||
|
|
@ -112,7 +112,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec - vec;
|
vec = vec - vec;
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(0))) {
|
if (!FloatEquals(stored[i], T(0))) {
|
||||||
return new std::string(std::format("Subtract mismatch at Len={} Packing={}, Expected: 0, Got: {}", Len, Packing, (float)stored[i]));
|
return new std::string(std::format("Subtract mismatch at Len={} Packing={}, Expected: 0, Got: {}", Len, Packing, (float)stored[i]));
|
||||||
|
|
@ -123,7 +123,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec * vec;
|
vec = vec * vec;
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i] * floats[i])) {
|
if (!FloatEquals(stored[i], floats[i] * floats[i])) {
|
||||||
return new std::string(std::format("Multiply mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * floats[i]), (float)stored[i]));
|
return new std::string(std::format("Multiply mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * floats[i]), (float)stored[i]));
|
||||||
|
|
@ -134,7 +134,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec / vec;
|
vec = vec / vec;
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(1))) {
|
if (!FloatEquals(stored[i], T(1))) {
|
||||||
return new std::string(std::format("Divide mismatch at Len={} Packing={}, Expected: 1, Got: {}", Len, Packing, (float)stored[i]));
|
return new std::string(std::format("Divide mismatch at Len={} Packing={}, Expected: 1, Got: {}", Len, Packing, (float)stored[i]));
|
||||||
|
|
@ -145,7 +145,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec + T(2);
|
vec = vec + T(2);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i] + T(2))) {
|
if (!FloatEquals(stored[i], floats[i] + T(2))) {
|
||||||
return new std::string(std::format("Scalar add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + T(2)), (float)stored[i]));
|
return new std::string(std::format("Scalar add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + T(2)), (float)stored[i]));
|
||||||
|
|
@ -156,7 +156,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec - T(2);
|
vec = vec - T(2);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i] - T(2))) {
|
if (!FloatEquals(stored[i], floats[i] - T(2))) {
|
||||||
return new std::string(std::format("Scalar add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + T(2)), (float)stored[i]));
|
return new std::string(std::format("Scalar add mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] + T(2)), (float)stored[i]));
|
||||||
|
|
@ -167,7 +167,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec * T(2);
|
vec = vec * T(2);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i] * T(2))) {
|
if (!FloatEquals(stored[i], floats[i] * T(2))) {
|
||||||
return new std::string(std::format("Scalar multiply mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * T(2)), (float)stored[i]));
|
return new std::string(std::format("Scalar multiply mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * T(2)), (float)stored[i]));
|
||||||
|
|
@ -178,7 +178,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = vec / T(2);
|
vec = vec / T(2);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i] / T(2))) {
|
if (!FloatEquals(stored[i], floats[i] / T(2))) {
|
||||||
return new std::string(std::format("Scalar divide mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * T(2)), (float)stored[i]));
|
return new std::string(std::format("Scalar divide mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(floats[i] * T(2)), (float)stored[i]));
|
||||||
|
|
@ -225,7 +225,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
vec = -vec;
|
vec = -vec;
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> result = vec.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> result = vec.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
for (std::uint32_t i = 0; i < Len * Packing; i++) {
|
||||||
if (!FloatEquals(result[i], -floats[i])) {
|
if (!FloatEquals(result[i], -floats[i])) {
|
||||||
return new std::string(std::format("Negate mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(-floats[i]), (float)result[i]));
|
return new std::string(std::format("Negate mismatch at Len={} Packing={}, Expected: {}, Got: {}", Len, Packing, (float)(-floats[i]), (float)result[i]));
|
||||||
|
|
@ -237,7 +237,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecA(floats1);
|
VectorType<Len, Packing> vecA(floats1);
|
||||||
VectorType<Len, Packing> vecB(floats2);
|
VectorType<Len, Packing> vecB(floats2);
|
||||||
VectorType<Len, Packing> result = VectorType<Len, Packing>::template Blend<AlternateTrueFalse<Len>()>(vecA, vecB);
|
VectorType<Len, Packing> result = VectorType<Len, Packing>::template Blend<AlternateTrueFalse<Len>()>(vecA, vecB);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
bool useB = (i % 2 == 0);
|
bool useB = (i % 2 == 0);
|
||||||
T expected = useB ? floats2[i]: floats1[i];
|
T expected = useB ? floats2[i]: floats1[i];
|
||||||
|
|
@ -252,7 +252,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecB(floats);
|
VectorType<Len, Packing> vecB(floats);
|
||||||
VectorType<Len, Packing> vecAdd(floats);
|
VectorType<Len, Packing> vecAdd(floats);
|
||||||
VectorType<Len, Packing> result = VectorType<Len, Packing>::MulitplyAdd(vecA, vecB, vecAdd);
|
VectorType<Len, Packing> result = VectorType<Len, Packing>::MulitplyAdd(vecA, vecB, vecAdd);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
T expected = floats[i] * floats[i] + floats[i];
|
T expected = floats[i] * floats[i] + floats[i];
|
||||||
if (!FloatEquals(stored[i], expected)) {
|
if (!FloatEquals(stored[i], expected)) {
|
||||||
|
|
@ -266,7 +266,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecB(floats);
|
VectorType<Len, Packing> vecB(floats);
|
||||||
VectorType<Len, Packing> vecSub(floats);
|
VectorType<Len, Packing> vecSub(floats);
|
||||||
VectorType<Len, Packing> result = VectorType<Len, Packing>::MulitplySub(vecA, vecB, vecSub);
|
VectorType<Len, Packing> result = VectorType<Len, Packing>::MulitplySub(vecA, vecB, vecSub);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
T expected = floats[i] * floats[i] - floats[i];
|
T expected = floats[i] * floats[i] - floats[i];
|
||||||
if (!FloatEquals(stored[i], expected)) {
|
if (!FloatEquals(stored[i], expected)) {
|
||||||
|
|
@ -278,7 +278,7 @@ std::string* TestAllCombinations() {
|
||||||
if constexpr(Len > 2){
|
if constexpr(Len > 2){
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
VectorType<Len-1, Packing> result = vec.template ExtractLo<Len-1>();
|
VectorType<Len-1, Packing> result = vec.template ExtractLo<Len-1>();
|
||||||
std::array<T, VectorType<Len-1, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len-1, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
||||||
for (std::uint32_t i = 0; i < Len-1; i++) {
|
for (std::uint32_t i = 0; i < Len-1; i++) {
|
||||||
T expected = floats[i2*(Len)+i];
|
T expected = floats[i2*(Len)+i];
|
||||||
|
|
@ -292,7 +292,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
VectorType<Len, Packing> result = vec.Sin();
|
VectorType<Len, Packing> result = vec.Sin();
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
T expected = (T)std::sin((float)floats[i2*Len+i]);
|
T expected = (T)std::sin((float)floats[i2*Len+i]);
|
||||||
|
|
@ -306,7 +306,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
VectorType<Len, Packing> result = vec.Cos();
|
VectorType<Len, Packing> result = vec.Cos();
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
T expected = (T)std::cos((float)floats[i2*Len+i]);
|
T expected = (T)std::cos((float)floats[i2*Len+i]);
|
||||||
|
|
@ -320,8 +320,8 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
auto result = vec.SinCos();
|
auto result = vec.SinCos();
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> storedSin = std::get<0>(result).Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> storedSin = std::get<0>(result).template Store<T>();
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> storedCos = std::get<1>(result).Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> storedCos = std::get<1>(result).template Store<T>();
|
||||||
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
for(std::uint32_t i2 = 0; i2 < Packing; i2++){
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
T expected = (T)std::sin((float)floats[i2*Len+i]);
|
T expected = (T)std::sin((float)floats[i2*Len+i]);
|
||||||
|
|
@ -340,7 +340,7 @@ std::string* TestAllCombinations() {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vec(floats);
|
VectorType<Len, Packing> vec(floats);
|
||||||
VectorType<Len, Packing> result = vec.template Shuffle<GetCountReverse<Len>()>();
|
VectorType<Len, Packing> result = vec.template Shuffle<GetCountReverse<Len>()>();
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
for (std::uint32_t i = 0; i < Len; i++) {
|
for (std::uint32_t i = 0; i < Len; i++) {
|
||||||
T expected = floats[Len - 1 - i];
|
T expected = floats[Len - 1 - i];
|
||||||
if (!FloatEquals(stored[i], expected)) {
|
if (!FloatEquals(stored[i], expected)) {
|
||||||
|
|
@ -355,9 +355,9 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vec1(floats1);
|
VectorType<Len, Packing> vec1(floats1);
|
||||||
VectorType<Len, Packing> vec2(floats2);
|
VectorType<Len, Packing> vec2(floats2);
|
||||||
VectorType<Len, Packing> result = VectorType<Len, Packing>::Cross(vec1, vec2);
|
VectorType<Len, Packing> result = VectorType<Len, Packing>::Cross(vec1, vec2);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
if (!FloatEquals(stored[0], T(-3)) || !FloatEquals(stored[1], T(6)) || !FloatEquals(stored[2], T(-3))) {
|
if (!FloatEquals(stored[0], T(-3)) || !FloatEquals(stored[1], T(6)) || !FloatEquals(stored[2], T(-3))) {
|
||||||
return new std::string(std::format("Cross mismatch at Len={} Packing={}, Expected: -3,6,-3, Got: {},{},{}", Len, Packing, (float)stored[0], (float)stored[1], (float)stored[2]));
|
return new std::string(std::format("Cross mismatch at Len={} Packing={}, Expected: -3,6,-3, Got: {},{},{}", Len, Packing, (float)stored[0], (float)stored[1], (float)stored[2]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(4 * Packing < VectorType<1, 1>::MaxElement) {
|
if constexpr(4 * Packing < VectorType<1, 1>::MaxElement) {
|
||||||
|
|
@ -370,7 +370,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<3, Packing> vecV(floats);
|
VectorType<3, Packing> vecV(floats);
|
||||||
VectorType<4, Packing> vecQ(qData);
|
VectorType<4, Packing> vecQ(qData);
|
||||||
VectorType<3, Packing> result = VectorType<3, Packing>::Rotate(vecV, vecQ);
|
VectorType<3, Packing> result = VectorType<3, Packing>::Rotate(vecV, vecQ);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
for (std::uint32_t i = 0; i < 3; i++) {
|
for (std::uint32_t i = 0; i < 3; i++) {
|
||||||
if (!FloatEquals(stored[i], floats[i])) {
|
if (!FloatEquals(stored[i], floats[i])) {
|
||||||
|
|
@ -389,7 +389,7 @@ std::string* TestAllCombinations() {
|
||||||
}
|
}
|
||||||
VectorType<3, Packing> eulerVec(eulerData);
|
VectorType<3, Packing> eulerVec(eulerData);
|
||||||
VectorType<4, Packing> result = VectorType<4, Packing>::QuanternionFromEuler(eulerVec);
|
VectorType<4, Packing> result = VectorType<4, Packing>::QuanternionFromEuler(eulerVec);
|
||||||
std::array<T, VectorType<4, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<4, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
if (!FloatEquals(stored[0], T(0.63720703)) || !FloatEquals(stored[1], T(0.30688477)) ||
|
if (!FloatEquals(stored[0], T(0.63720703)) || !FloatEquals(stored[1], T(0.30688477)) ||
|
||||||
!FloatEquals(stored[2], T(0.14074707)) || !FloatEquals(stored[3], T(0.6933594))) {
|
!FloatEquals(stored[2], T(0.14074707)) || !FloatEquals(stored[3], T(0.6933594))) {
|
||||||
|
|
@ -397,14 +397,14 @@ std::string* TestAllCombinations() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr(Len == 3 && Packing == 1) {
|
if constexpr(Len == 3 && Packing == 1 && std::same_as<T, float>) {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vecA(floats);
|
VectorType<Len, Packing> vecA(floats);
|
||||||
VectorType<Len, Packing> vecB = vecA * 2;
|
VectorType<Len, Packing> vecB = vecA * 2;
|
||||||
VectorType<Len, Packing> vecC = vecA * 3;
|
VectorType<Len, Packing> vecC = vecA * 3;
|
||||||
VectorType<Len, Packing> vecD = vecA * 4;
|
VectorType<Len, Packing> vecD = vecA * 4;
|
||||||
VectorType<1, 4> result = VectorType<Len, Packing>::Length(vecA, vecB, vecC, vecD);
|
VectorType<1, 4> result = VectorType<Len, Packing>::Length(vecA, vecB, vecC, vecD);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
if (!FloatEquals(stored[0], expectedLength[0])) {
|
if (!FloatEquals(stored[0], expectedLength[0])) {
|
||||||
return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
||||||
|
|
@ -430,7 +430,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecD = vecA * 4;
|
VectorType<Len, Packing> vecD = vecA * 4;
|
||||||
auto result = VectorType<Len, Packing>::Normalize(vecA, vecB, vecC, vecD);
|
auto result = VectorType<Len, Packing>::Normalize(vecA, vecB, vecC, vecD);
|
||||||
VectorType<1, 4> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result));
|
VectorType<1, 4> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result));
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.template Store<T>();
|
||||||
|
|
||||||
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(1))) {
|
if (!FloatEquals(stored[i], T(1))) {
|
||||||
|
|
@ -440,14 +440,14 @@ std::string* TestAllCombinations() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr(Len == 3 && Packing == 2) {
|
if constexpr(Len == 3 && Packing == 2 && std::same_as<T, float>) {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vecA(floats);
|
VectorType<Len, Packing> vecA(floats);
|
||||||
VectorType<Len, Packing> vecB = vecA * 2;
|
VectorType<Len, Packing> vecB = vecA * 2;
|
||||||
VectorType<Len, Packing> vecC = vecA * 3;
|
VectorType<Len, Packing> vecC = vecA * 3;
|
||||||
VectorType<Len, Packing> vecD = vecA * 4;
|
VectorType<Len, Packing> vecD = vecA * 4;
|
||||||
VectorType<1, 8> result = VectorType<Len, Packing>::Length(vecA, vecB, vecC, vecD);
|
VectorType<1, 8> result = VectorType<Len, Packing>::Length(vecA, vecB, vecC, vecD);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
if (!FloatEquals(stored[0], expectedLength[0])) {
|
if (!FloatEquals(stored[0], expectedLength[0])) {
|
||||||
return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
||||||
|
|
@ -473,7 +473,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecD = vecA * 4;
|
VectorType<Len, Packing> vecD = vecA * 4;
|
||||||
auto result = VectorType<Len, Packing>::Normalize(vecA, vecB, vecC, vecD);
|
auto result = VectorType<Len, Packing>::Normalize(vecA, vecB, vecC, vecD);
|
||||||
VectorType<1, 8> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result));
|
VectorType<1, 8> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result));
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.template Store<T>();
|
||||||
|
|
||||||
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(1))) {
|
if (!FloatEquals(stored[i], T(1))) {
|
||||||
|
|
@ -483,13 +483,13 @@ std::string* TestAllCombinations() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr(Len == 3 && Packing == 5) {
|
if constexpr(Len == 3 && Packing == 5 && std::same_as<T, float>) {
|
||||||
{
|
{
|
||||||
VectorType<Len, Packing> vecA(floats);
|
VectorType<Len, Packing> vecA(floats);
|
||||||
VectorType<Len, Packing> vecB = vecA * 2;
|
VectorType<Len, Packing> vecB = vecA * 2;
|
||||||
VectorType<Len, Packing> vecC = vecA * 3;
|
VectorType<Len, Packing> vecC = vecA * 3;
|
||||||
VectorType<1, 15> result = VectorType<Len, Packing>::Length(vecA, vecB, vecC);
|
VectorType<1, 15> result = VectorType<Len, Packing>::Length(vecA, vecB, vecC);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
if (!FloatEquals(stored[0], expectedLength[0])) {
|
if (!FloatEquals(stored[0], expectedLength[0])) {
|
||||||
return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
return new std::string(std::format("Length 3 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
||||||
|
|
@ -510,7 +510,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecC = vecA * 3;
|
VectorType<Len, Packing> vecC = vecA * 3;
|
||||||
auto result = VectorType<Len, Packing>::Normalize(vecA, vecB, vecC);
|
auto result = VectorType<Len, Packing>::Normalize(vecA, vecB, vecC);
|
||||||
VectorType<1, 15> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result));
|
VectorType<1, 15> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result));
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.template Store<T>();
|
||||||
|
|
||||||
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(1))) {
|
if (!FloatEquals(stored[i], T(1))) {
|
||||||
|
|
@ -525,7 +525,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecA(floats);
|
VectorType<Len, Packing> vecA(floats);
|
||||||
VectorType<Len, Packing> vecE = vecA *2;
|
VectorType<Len, Packing> vecE = vecA *2;
|
||||||
VectorType<1, Packing*2> result = VectorType<Len, Packing>::Length(vecA, vecE);
|
VectorType<1, Packing*2> result = VectorType<Len, Packing>::Length(vecA, vecE);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
if (!FloatEquals(stored[0], expectedLength[0])) {
|
if (!FloatEquals(stored[0], expectedLength[0])) {
|
||||||
return new std::string(std::format("Length 2 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
return new std::string(std::format("Length 2 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
||||||
|
|
@ -541,7 +541,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecE = vecA * 2;
|
VectorType<Len, Packing> vecE = vecA * 2;
|
||||||
auto result = VectorType<Len, Packing>::Normalize(vecA, vecE);
|
auto result = VectorType<Len, Packing>::Normalize(vecA, vecE);
|
||||||
VectorType<1, Packing*2> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result));
|
VectorType<1, Packing*2> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result));
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.template Store<T>();
|
||||||
|
|
||||||
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(1))) {
|
if (!FloatEquals(stored[i], T(1))) {
|
||||||
|
|
@ -558,7 +558,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecE = vecA * 3;
|
VectorType<Len, Packing> vecE = vecA * 3;
|
||||||
VectorType<Len, Packing> vecG = vecA * 4;
|
VectorType<Len, Packing> vecG = vecA * 4;
|
||||||
VectorType<1, Packing*4> result = VectorType<Len, Packing>::Length(vecA, vecC, vecE, vecG);
|
VectorType<1, Packing*4> result = VectorType<Len, Packing>::Length(vecA, vecC, vecE, vecG);
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result.template Store<T>();
|
||||||
|
|
||||||
if (!FloatEquals(stored[0], expectedLength[0])) {
|
if (!FloatEquals(stored[0], expectedLength[0])) {
|
||||||
return new std::string(std::format("Length 4 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
return new std::string(std::format("Length 4 vecA test failed at Len={} Packing={} Expected: {}, Got: {}", Len, Packing, (float)expectedLength[0], (float)stored[0]));
|
||||||
|
|
@ -584,7 +584,7 @@ std::string* TestAllCombinations() {
|
||||||
VectorType<Len, Packing> vecG = vecA * 4;
|
VectorType<Len, Packing> vecG = vecA * 4;
|
||||||
auto result = VectorType<Len, Packing>::Normalize(vecA, vecC, vecE, vecG);
|
auto result = VectorType<Len, Packing>::Normalize(vecA, vecC, vecE, vecG);
|
||||||
VectorType<1, Packing*4> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result));
|
VectorType<1, Packing*4> result2 = VectorType<Len, Packing>::Length(std::get<0>(result), std::get<1>(result), std::get<2>(result), std::get<3>(result));
|
||||||
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.Store();
|
std::array<T, VectorType<Len, Packing>::AlignmentElement> stored = result2.template Store<T>();
|
||||||
|
|
||||||
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
for(std::uint8_t i = 0; i < Len*Packing; i++) {
|
||||||
if (!FloatEquals(stored[i], T(1))) {
|
if (!FloatEquals(stored[i], T(1))) {
|
||||||
|
|
@ -600,8 +600,11 @@ std::string* TestAllCombinations() {
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
std::string* RunTest() {
|
std::string* RunTest() {
|
||||||
//std::string* err = TestAllCombinations<_Float16, VectorF16, VectorF16<1, 1>::MaxElement>();
|
std::string* err = TestAllCombinations<_Float16, VectorF16, VectorF16<1, 1>::MaxElement>();
|
||||||
std::string* err = TestAllCombinations<float, VectorF32, VectorF32<1, 1>::MaxElement>();
|
if (err) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
err = TestAllCombinations<float, VectorF32, VectorF32<1, 1>::MaxElement>();
|
||||||
if (err) {
|
if (err) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue