commit d0a8b12c1a443f3bd5daa40887ca4c9bc8b29052 Author: Jorijn van der Graaf Date: Mon May 5 02:01:44 2025 +0200 stash diff --git a/Crafter.Math-BasicTypes.cppm b/Crafter.Math-BasicTypes.cppm new file mode 100755 index 0000000..dad931c --- /dev/null +++ b/Crafter.Math-BasicTypes.cppm @@ -0,0 +1,43 @@ +module; + +#include +#include +#include + +export module Crafter.Math:BasicTypes; + +namespace Crafter { + export struct Float2 { + float x; + float y; + }; + export struct Float3 { + float x; + float y; + float z; + }; + export struct Float4 { + float x; + float y; + float z; + float w; + }; + export struct Float4x4 { + float c1[4]; + float c2[4]; + float c3[4]; + float c4[4]; + }; +} + +template <> +struct std::formatter : std::formatter { + auto format(const Crafter::Float4x4& obj, format_context& ctx) const { + return std::formatter::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}", + obj.c1[0], obj.c2[0], obj.c3[0], obj.c4[0], + obj.c1[1], obj.c2[1], obj.c3[1], obj.c4[1], + obj.c1[2], obj.c2[2], obj.c3[2], obj.c4[2], + obj.c1[3], obj.c2[3], obj.c3[3], obj.c4[3] + ), ctx); + } +}; \ No newline at end of file diff --git a/Crafter.Math-Matrix.cppm b/Crafter.Math-Matrix.cppm new file mode 100755 index 0000000..13da647 --- /dev/null +++ b/Crafter.Math-Matrix.cppm @@ -0,0 +1,278 @@ +module; + +#include +#include +#include +#include +#include +#include + +export module Crafter.Math:Matrix; + +import :BasicTypes; +import :Vector; +import :Misc; + +namespace Crafter { + export template + class Matrix { + public: + typedef + typename std::conditional<(sizeof(T)* collumSize*repeats > 32 && (std::same_as || std::same_as || std::same_as || std::same_as)), __m512i, + typename std::conditional<(sizeof(T)* collumSize*repeats > 16 && (std::same_as || std::same_as || std::same_as || std::same_as)), __m256i, + typename std::conditional<(sizeof(T)* collumSize*repeats <= 16 && (std::same_as || std::same_as || std::same_as || std::same_as)), __m128i, + typename std::conditional<(collumSize*repeats > 16 && std::same_as), __m512h, + typename std::conditional<(collumSize*repeats > 8 && std::same_as), __m256h, + typename std::conditional<(collumSize*repeats <= 8 && std::same_as), __m128h, + typename std::conditional<(collumSize*repeats > 8 && std::same_as), __m512, + typename std::conditional<(collumSize*repeats > 4 && std::same_as), __m256, + typename std::conditional<(collumSize*repeats <= 4 && std::same_as), __m128, + typename std::conditional<(collumSize*repeats > 4 && std::same_as), __m512d, + typename std::conditional<(collumSize*repeats > 2 && std::same_as), __m256d, __m128d + >::type>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type collum_type; + + collum_type c[rowSize]; + + Matrix() { + + } + + Matrix(__m128 c0, __m128 c1, __m128 c2, __m128 c3) requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as) { + c[0] = c0; + c[1] = c1; + c[2] = c2; + c[3] = c3; + } + + Matrix( + float x0, float y0, float z0, float w0, + float x1, float y1, float z1, float w1, + float x2, float y2, float z2, float w2, + float x3, float y3, float z3, float w3 + ) requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as) { + c[0] = _mm_set_ps(x3, x2, x1, x0); + c[1] = _mm_set_ps(y3, y2, y1, y0); + c[2] = _mm_set_ps(z3, z2, z1, z0); + c[3] = _mm_set_ps(w3, w2, w1, w0); + } + + Vector operator*(Vector b) const requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as) { + __m128 result = _mm_mul_ps(reinterpret_cast<__m128>(c[0]), reinterpret_cast<__m128>(b.v)); + result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[1]), reinterpret_cast<__m128>(b.v), result); + result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[2]), reinterpret_cast<__m128>(b.v), result); + result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[3]), reinterpret_cast<__m128>(b.v), result); + return Vector(result); + } + + + // static Matrix Scaling(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as) { + // return Matrix( + // _mm_set_ps(0, 0, 0, x), + // _mm_set_ps(0, 0, y, 0), + // _mm_set_ps(0, z, 0, 0), + // _mm_set_ps(1, 0, 0, 0) + // ); + // } + + // static Matrix Translation(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as) { + // return Matrix( + // _mm_set_ps(0, 0, 0, 1), + // _mm_set_ps(0, 0, 1, 0), + // _mm_set_ps(0, 1, 0, 0), + // _mm_set_ps(1, z, y, x) + // ); + // } + + // // static Matrix Rotation(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as) { + // // return Matrix( + // // _mm_set_ps(0, 0, 0, 1), + // // _mm_set_ps(0, 0, 1, 0), + // // _mm_set_ps(0, 1, 0, 0), + // // _mm_set_ps(1, z, y, x) + // // ); + // // } + + // static Matrix Idenity() requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as) { + // return Matrix( + // _mm_set_ps(0, 0, 0, 1), + // _mm_set_ps(0, 0, 1, 0), + // _mm_set_ps(0, 1, 0, 0), + // _mm_set_ps(1, 0, 0, 0) + // ); + // } + + // static Matrix Projection(float FovAngleY, float AspectRatio, float NearZ, float FarZ) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as) { + // float SinFov; + // float CosFov; + // XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + // float fRange = FarZ / (NearZ - FarZ); + // // Note: This is recorded on the stack + // float Height = CosFov / SinFov; + // __m128 rMem = { + // Height / AspectRatio, + // Height, + // fRange, + // fRange * NearZ + // }; + // // Copy from memory to SSE register + // __m128 vValues = rMem; + // __m128 vTemp = _mm_setzero_ps(); + // // Copy x only + // vTemp = _mm_move_ss(vTemp, vValues); + // // Height / AspectRatio,0,0,0 + // Matrix M; + // M.r[0] = vTemp; + // // 0,Height,0,0 + // vTemp = vValues; + // vTemp = _mm_and_ps(vTemp, g_XMMaskY.v); + // M.r[1] = vTemp; + // // x=fRange,y=-fRange * NearZ,0,-1.0f + // vTemp = _mm_setzero_ps(); + // vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3.v, _MM_SHUFFLE(3, 2, 3, 2)); + // // 0,0,fRange,-1.0f + // vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + // M.r[2] = vTemp; + // // 0,0,fRange * NearZ,0.0f + // vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + // M.r[3] = vTemp; + // return M; + // } + + // template + // Vector operator*(Vector b) const requires(collums == 4 && vectorRowSize == 4 && rowSize >= 4 && vectorSize == 1 && std::same_as) { + + + // //std::cout << Vector(allX).ToString() << std::endl; + + // // __m128 result = _mm_permute_ps(b.v, 0b00000000); + // // result = _mm_fmadd_ps(result, r[0], r[3]); + + // // __m128 allY = _mm_permute_ps(b.v, 0b10101010); + // // result = _mm_fmadd_ps(allY, r[1], result); + + // // __m128 allZ = _mm_permute_ps(b.v, 0b01010101); + // // return Vector(_mm_fmadd_ps(allZ, r[2], result)); + // return Vector(1, 2, 3, 4); + // } + + + // Matrix operator*(Matrix b) const requires(collums == 4 && rowSize == 4 && vectorSize == 1 && std::same_as) { + // Matrix result; + // result.r[0] = _mm_permute_ps(b.r[0], 0b00000000); + // result.r[1] = _mm_fmadd_ps(_mm_permute_ps(b.r[1], 0b00000000), reinterpret_cast<__m128>(r[1]), reinterpret_cast<__m128>(result.r[0])); + // result.r[1] = _mm_permute_ps(b.r[1], 0b00000000); + // result.r[2] = _mm_permute_ps(b.r[2], 0b00000000); + // result.r[3] = _mm_permute_ps(b.r[3], 0b00000000); + + // // result.r[0] = _mm_fmadd_ps(allY, reinterpret_cast<__m128>(r[1]), reinterpret_cast<__m128>(result.r[0])); + // // result.r[0] = _mm_fmadd_ps(allZ, reinterpret_cast<__m128>(r[2]), reinterpret_cast<__m128>(result.r[0])); + // // result.r[0] = _mm_fmadd_ps(allW, reinterpret_cast<__m128>(r[3]), reinterpret_cast<__m128>(result.r[0])); + + // Float4x4 store; + // result.Store(&store); + + // std::cout << std::format("{}", store) << std::endl; + + // return result; + // } + + // void Store(Float4x4* store) const requires(collums == 4 && rowSize == 4 && vectorSize == 1 && std::same_as) { + // _mm_storeu_ps(store->r1, reinterpret_cast<__m128>(r[0])); + // _mm_storeu_ps(store->r2, reinterpret_cast<__m128>(r[1])); + // _mm_storeu_ps(store->r3, reinterpret_cast<__m128>(r[2])); + // _mm_storeu_ps(store->r4, reinterpret_cast<__m128>(r[3])); + // } + + + // // VectorVector operator*(VectorVector b) requires(collums == 4 && rowSize == 4 && vectorSize == 4 && std::same_as) { + // // __m512 result = _mm512_permute_ps(b.v, 0b11111111); + // // result = _mm512_fmadd_ps(result, reinterpret_cast<__m512>(r[0]), reinterpret_cast<__m512>(r[3])); + + // // __m512 allY = _mm512_permute_ps(b.v, 0b10101010); + // // result = _mm512_fmadd_ps(allY, reinterpret_cast<__m512>(r[1]), result); + + // // __m512 allZ = _mm512_permute_ps(b.v, 0b01010101); + // // return VectorVector(_mm512_fmadd_ps(allZ, reinterpret_cast<__m512>(r[2]), result)); + // // } + + // // m4x4float Transpose() const { + // // // x.x,x.y,y.x,y.y + // // __m128 vTemp1 = _mm_shuffle_ps(r[0], r[1], _MM_SHUFFLE(1, 0, 1, 0)); + // // // x.z,x.w,y.z,y.w + // // __m128 vTemp3 = _mm_shuffle_ps(r[0], r[1], _MM_SHUFFLE(3, 2, 3, 2)); + // // // z.x,z.y,w.x,w.y + // // __m128 vTemp2 = _mm_shuffle_ps(r[2], r[3], _MM_SHUFFLE(1, 0, 1, 0)); + // // // z.z,z.w,w.z,w.w + // // __m128 vTemp4 = _mm_shuffle_ps(r[2], r[3], _MM_SHUFFLE(3, 2, 3, 2)); + + // // return m4x4float( + // // _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)), + // // _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)), + // // _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)), + // // _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)) + // // ); + // // } + // // m4x4float operator*(m4x4float b) const { + // // __m256 t0 = _mm256_castps128_ps256(r[0]); + // // t0 = _mm256_insertf128_ps(t0, r[1], 1); + // // __m256 t1 = _mm256_castps128_ps256(r[2]); + // // t1 = _mm256_insertf128_ps(t1, r[3], 1); + + // // __m256 u0 = _mm256_castps128_ps256(b.r[0]); + // // u0 = _mm256_insertf128_ps(u0, b.r[1], 1); + // // __m256 u1 = _mm256_castps128_ps256(b.r[2]); + // // u1 = _mm256_insertf128_ps(u1, b.r[3], 1); + + // // __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0)); + // // __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0)); + // // __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00); + // // __m256 c0 = _mm256_mul_ps(a0, b0); + // // __m256 c1 = _mm256_mul_ps(a1, b0); + + // // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1)); + // // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1)); + // // b0 = _mm256_permute2f128_ps(u0, u0, 0x11); + // // __m256 c2 = _mm256_fmadd_ps(a0, b0, c0); + // // __m256 c3 = _mm256_fmadd_ps(a1, b0, c1); + + // // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2)); + // // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2)); + // // __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00); + // // __m256 c4 = _mm256_mul_ps(a0, b1); + // // __m256 c5 = _mm256_mul_ps(a1, b1); + + // // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3)); + // // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3)); + // // b1 = _mm256_permute2f128_ps(u1, u1, 0x11); + // // __m256 c6 = _mm256_fmadd_ps(a0, b1, c4); + // // __m256 c7 = _mm256_fmadd_ps(a1, b1, c5); + + // // t0 = _mm256_add_ps(c2, c6); + // // t1 = _mm256_add_ps(c3, c7); + + // // return m4x4float( + // // _mm256_castps256_ps128(t0), + // // _mm256_extractf128_ps(t0, 1), + // // _mm256_castps256_ps128(t1), + // // _mm256_extractf128_ps(t1, 1) + // // ); + // // } + + + void Store(Crafter::Float4x4& store) const { + _mm_storeu_ps(store.c1, c[0]); + _mm_storeu_ps(store.c2, c[1]); + _mm_storeu_ps(store.c3, c[2]); + _mm_storeu_ps(store.c4, c[3]); + } + }; +} + +template <> +struct std::formatter> : std::formatter { + auto format(const Crafter::Matrix& obj, format_context& ctx) const { + Crafter::Float4x4 store; + obj.Store(store); + return std::formatter::format(std::format("{}", store), ctx); + } +}; \ No newline at end of file diff --git a/Crafter.Math-Misc.cppm b/Crafter.Math-Misc.cppm new file mode 100644 index 0000000..4a1a42b --- /dev/null +++ b/Crafter.Math-Misc.cppm @@ -0,0 +1,66 @@ + +module; + +#include +#include + +export module Crafter.Math:Misc; + +export namespace Crafter { + //------------------------------------------------------------------------------------- + // DirectXMathMisc.inl -- SIMD C++ Math library + // + // Copyright (c) Microsoft Corporation. + // Licensed under the MIT License. + // + // http://go.microsoft.com/fwlink/?LinkID=615560 + //------------------------------------------------------------------------------------- + constexpr float XM_PI = 3.141592654f; + constexpr float XM_2PI = 6.283185307f; + constexpr float XM_1DIVPI = 0.318309886f; + constexpr float XM_1DIV2PI = 0.159154943f; + constexpr float XM_PIDIV2 = 1.570796327f; + constexpr float XM_PIDIV4 = 0.785398163f; + + + inline void XMScalarSinCos(float* pSin, float* pCos, float Value) noexcept + { + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + float y2 = y * y; + + // 11-degree minimax approximation + *pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y; + + // 10-degree minimax approximation + float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f; + *pCos = sign * p; + } +} \ No newline at end of file diff --git a/Crafter.Math-Vector.cppm b/Crafter.Math-Vector.cppm new file mode 100755 index 0000000..b01722f --- /dev/null +++ b/Crafter.Math-Vector.cppm @@ -0,0 +1,881 @@ +module; + +#include +#include +#include +#include +#include +#include + +export module Crafter.Math:Vector; + +import :BasicTypes; + +namespace Crafter { + export template + class Vector { + typedef + typename std::conditional<(sizeof(T)* len > 32 && (std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as)), __m512i, + typename std::conditional<(sizeof(T)* len > 16 && (std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as)), __m256i, + typename std::conditional<(sizeof(T)* len <= 16 && (std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as || std::same_as)), __m128i, + typename std::conditional<(len > 16 && std::same_as), __m512h, + typename std::conditional<(len > 8 && std::same_as), __m256h, + typename std::conditional<(len <= 8 && std::same_as), __m128h, + typename std::conditional<(len > 8 && std::same_as), __m512, + typename std::conditional<(len > 4 && std::same_as), __m256, + typename std::conditional<(len <= 4 && std::same_as), __m128, + typename std::conditional<(len > 4 && std::same_as), __m512d, + typename std::conditional<(len > 2 && std::same_as), __m256d, __m128d + >::type>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type vector_type; + + + public: + template + static consteval uint8_t GetVectorAlignedSize() { + if constexpr(std::same_as && std::same_as) { + return 64; + } else if constexpr(std::same_as || (std::same_as && std::same_as) || (std::same_as && std::same_as)) { + return 32; + } else if constexpr(std::same_as || std::same_as || (std::same_as && std::same_as) || (std::same_as && std::same_as) || (std::same_as && std::same_as)) { + return 16; + } else if constexpr(std::same_as || std::same_as || std::same_as || (std::same_as && std::same_as) || (std::same_as && std::same_as) || (std::same_as && std::same_as)) { + return 8; + } else if constexpr(std::same_as || std::same_as || (std::same_as && std::same_as) || (std::same_as && std::same_as)) { + return 4; + } else if constexpr(std::same_as || (std::same_as && std::same_as)) { + return 2; + } else{ + throw std::invalid_argument(""); + } + } + vector_type v; + + Vector() {}; + Vector(__m128h v) requires(std::same_as) : v(v) { } + Vector(__m128 v) requires(std::same_as) : v(v) { } + Vector(__m128d v) requires(std::same_as) : v(v) { } + Vector(__m128i v) requires(std::same_as) : v(v) { } + + Vector(__m256h v) requires(std::same_as) : v(v) { } + Vector(__m256 v) requires(std::same_as) : v(v) { } + Vector(__m256d v) requires(std::same_as) : v(v) { } + Vector(__m256i v) requires(std::same_as) : v(v) { } + + Vector(__m512h v) requires(std::same_as) : v(v) { } + Vector(__m512 v) requires(std::same_as) : v(v) { } + Vector(__m512d v) requires(std::same_as) : v(v) { } + Vector(__m512i v) requires(std::same_as) : v(v) { } + + template + void operator+=(Vector b) requires(Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + this->v = (*this+b).v; + } + template + void operator-=(Vector b) requires(Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + this->v = (*this-b).v; + } + template + void operator*=(Vector b) requires(Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + this->v = (*this*b).v; + } + template + void operator/=(Vector b) requires(Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + this->v = (*this/b).v; + } + + std::string ToString() const { + std::ostringstream ss; + ss << "{ "; + T store[GetVectorAlignedSize()]; + Store(store); + for(uint8_t i = 0; i < len; i++) { + ss << std::format("{}", store[i]); + if(i+1 < len) { + ss << ", "; + } + } + ss << " }"; + return std::string(ss.str()); + } +#pragma region 128 + Vector( + const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0, + const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0 + ) requires(std::same_as && std::same_as) { + __fp16 temp[]{ x0,y0,z0,w0,x1,y1,z1,w1,}; + v = _mm_load_ph(temp); + } + + Vector(float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0) requires(std::same_as&& std::same_as) { + v = _mm_set_ps(w0, z0, y0, x0); + } + + Vector(double x0 = 0, double y0 = 0) requires(std::same_as&& std::same_as) { + v = _mm_set_pd(y0, x0); + } + + Vector( + int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0, + int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0, + int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0, + int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0 + ) requires(std::same_as && std::same_as) { + v = _mm_set_epi8(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector( + int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0, + int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm_set_epi16(w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector(int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0) requires(std::same_as&& std::same_as) { + v = _mm_set_epi32(w0, z0, y0, x0); + } + + Vector(int64_t x0 = 0, int64_t y0 = 0) requires(std::same_as&& std::same_as) { + v = _mm_set_epi64x(y0, x0); + } + + Vector( + uint8_t x0 = 0, uint8_t y0 = 0, uint8_t z0 = 0, uint8_t w0 = 0, + uint8_t x1 = 0, uint8_t y1 = 0, uint8_t z1 = 0, uint8_t w1 = 0, + uint8_t x2 = 0, uint8_t y2 = 0, uint8_t z2 = 0, uint8_t w2 = 0, + uint8_t x3 = 0, uint8_t y3 = 0, uint8_t z3 = 0, uint8_t w3 = 0 + ) requires(std::same_as && std::same_as) { + v = _mm_set_epi8(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector( + uint16_t x0 = 0, uint16_t y0 = 0, uint16_t z0 = 0, uint16_t w0 = 0, + uint16_t x1 = 0, uint16_t y1 = 0, uint16_t z1 = 0, uint16_t w1 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm_set_epi16(w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector(uint32_t x0 = 0, uint32_t y0 = 0, uint32_t z0 = 0, uint32_t w0 = 0) requires(std::same_as&& std::same_as) { + v = _mm_set_epi32(w0, z0, y0, x0); + } + + Vector(uint64_t x0 = 0, uint64_t y0 = 0) requires(std::same_as&& std::same_as) { + v = _mm_set_epi64x(y0, x0); + } + + static Vector Zero() requires(std::same_as) { + return Vector(_mm_setzero_ps()); + } + + void Store(T* data) const requires(std::same_as) { + _mm_storeu_ph(reinterpret_cast(data), reinterpret_cast<__m128h>(v)); + } + void Store(T* data) const requires(std::same_as) { + _mm_storeu_ps(reinterpret_cast(data), reinterpret_cast<__m128>(v)); + } + void Store(T* data) const requires(std::same_as) { + _mm_storeu_pd(data, reinterpret_cast<__m128d>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm_storeu_epi8(reinterpret_cast(data), reinterpret_cast<__m128i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm_storeu_epi16(reinterpret_cast(data), reinterpret_cast<__m128i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm_storeu_epi32(reinterpret_cast(data), reinterpret_cast<__m128i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm_storeu_epi64(reinterpret_cast(data), reinterpret_cast<__m128i>(v)); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_mul_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_div_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_mul_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_div_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_mul_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_div_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_epi8(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_epi8(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi8(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi8(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_epi16(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_epi16(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi16(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi16(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_mul_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi32(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_add_epi64(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm_sub_epi64(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi64(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi64(v, bv)); + // } + + template + Vector AddMask() { + + } +#pragma endregion +#pragma region 256 + Vector( + const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0, + const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0, + const __fp16& x2 = 0, const __fp16& y2 = 0, const __fp16& z2 = 0, const __fp16& w2 = 0, + const __fp16& x3 = 0, const __fp16& y3 = 0, const __fp16& z3 = 0, const __fp16& w3 = 0 + ) requires(std::same_as&& std::same_as) { + __fp16 temp[]{ w0,z0,y0,x0,w1,z1,y1,x1,w2,z2,y2,x2,w3,z3,y3,x3 }; + v = _mm256_load_ph(temp); + } + + Vector( + float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0, + float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector(double x0 = 0, double y0 = 0, double z0 = 0, double w0 = 0) requires(std::same_as&& std::same_as) { + v = _mm256_set_pd(w0, z0, y0, x0); + } + + Vector( + int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0, + int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0, + int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0, + int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0, + int8_t x4 = 0, int8_t y4 = 0, int8_t z4 = 0, int8_t w4 = 0, + int8_t x5 = 0, int8_t y5 = 0, int8_t z5 = 0, int8_t w5 = 0, + int8_t x6 = 0, int8_t y6 = 0, int8_t z6 = 0, int8_t w6 = 0, + int8_t x7 = 0, int8_t y7 = 0, int8_t z7 = 0, int8_t w7 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm256_set_epi8(w7, z7, y7, x7, w6, z6, y6, x6, w5, z5, y5, x5, w4, z4, y4, x4, w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector( + int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0, + int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0, + int16_t x2 = 0, int16_t y2 = 0, int16_t z2 = 0, int16_t w2 = 0, + int16_t x3 = 0, int16_t y3 = 0, int16_t z3 = 0, int16_t w3 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm256_set_epi16(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector( + int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0, + int32_t x1 = 0, int32_t y1 = 0, int32_t z1 = 0, int32_t w1 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm256_set_epi32(w1, z1, y1, x1, w0, z0, y0, x0); + } + + Vector(int64_t x0 = 0, int64_t y0 = 0, int64_t z0 = 0, int64_t w0 = 0) requires(std::same_as&& std::same_as) { + v = _mm256_set_epi64x(w0, z0, y0, x0); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_mul_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_div_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v))); + } + + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_mul_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_div_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_mul_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_div_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_epi8(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_epi8(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi8(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi8(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_epi16(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_epi16(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi16(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi16(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_mul_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi32(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_add_epi64(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm256_sub_epi64(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi64(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi64(v, bv)); + // } + + void Store(T* data) const requires(std::same_as) { + _mm256_storeu_ph(reinterpret_cast(data), reinterpret_cast<__m256h>(v)); + } + void Store(T* data) const requires(std::same_as) { + _mm256_storeu_ps(data, reinterpret_cast<__m256>(v)); + } + void Store(T* data) const requires(std::same_as) { + _mm256_storeu_pd(data, reinterpret_cast<__m256d>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm256_storeu_epi8(reinterpret_cast(data), reinterpret_cast<__m256i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm256_storeu_epi16(reinterpret_cast(data), reinterpret_cast<__m256i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm256_storeu_epi32(reinterpret_cast(data), reinterpret_cast<__m256i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm256_storeu_epi64(reinterpret_cast(data), reinterpret_cast<__m256i>(v)); + } +#pragma endregion +#pragma region 512 + Vector( + const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0, + const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0, + const __fp16& x2 = 0, const __fp16& y2 = 0, const __fp16& z2 = 0, const __fp16& w2 = 0, + const __fp16& x3 = 0, const __fp16& y3 = 0, const __fp16& z3 = 0, const __fp16& w3 = 0, + const __fp16& x4 = 0, const __fp16& y4 = 0, const __fp16& z4 = 0, const __fp16& w4 = 0, + const __fp16& x5 = 0, const __fp16& y5 = 0, const __fp16& z5 = 0, const __fp16& w5 = 0, + const __fp16& x6 = 0, const __fp16& y6 = 0, const __fp16& z6 = 0, const __fp16& w6 = 0, + const __fp16& x7 = 0, const __fp16& y7 = 0, const __fp16& z7 = 0, const __fp16& w7 = 0 + ) requires(std::same_as&& std::same_as) { + __fp16 temp[]{ w0,z0,y0,x0, w1,z1,y1,x1, w2,z2,y2,x2,w3, z3,y3,x3, w4,z4,y4,x4, w5,z5,y5,x5, w6,z6,y6,x6, w7,z7,y7,x7 }; + v = _mm512_load_ph(temp); + } + + Vector( + float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0, + float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0, + float x2 = 0, float y2 = 0, float z2 = 0, float w2 = 0, + float x3 = 0, float y3 = 0, float z3 = 0, float w3 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm512_set_ps( + w3, z3, y3, x3, + w2, z2, y2, x2, + w1, z1, y1, x1, + w0, z0, y0, x0 + ); + } + + Vector( + double x0 = 0, double y0 = 0, double z0 = 0, double w0 = 0, + double x1 = 0, double y1 = 0, double z1 = 0, double w1 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm512_set_pd( + w1, z1, y1, x1, + w0, z0, y0, x0 + ); + } + + Vector( + int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0, + int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0, + int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0, + int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0, + int8_t x4 = 0, int8_t y4 = 0, int8_t z4 = 0, int8_t w4 = 0, + int8_t x5 = 0, int8_t y5 = 0, int8_t z5 = 0, int8_t w5 = 0, + int8_t x6 = 0, int8_t y6 = 0, int8_t z6 = 0, int8_t w6 = 0, + int8_t x7 = 0, int8_t y7 = 0, int8_t z7 = 0, int8_t w7 = 0, + int8_t x8 = 0, int8_t y8 = 0, int8_t z8 = 0, int8_t w8 = 0, + int8_t x9 = 0, int8_t y9 = 0, int8_t z9 = 0, int8_t w9 = 0, + int8_t x10 = 0, int8_t y10 = 0, int8_t z10 = 0, int8_t w10 = 0, + int8_t x11 = 0, int8_t y11 = 0, int8_t z11 = 0, int8_t w11 = 0, + int8_t x12 = 0, int8_t y12 = 0, int8_t z12 = 0, int8_t w12 = 0, + int8_t x13 = 0, int8_t y13 = 0, int8_t z13 = 0, int8_t w13 = 0, + int8_t x14 = 0, int8_t y14 = 0, int8_t z14 = 0, int8_t w14 = 0, + int8_t x15 = 0, int8_t y15 = 0, int8_t z15 = 0, int8_t w15 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm512_set_epi8( + w15, z15, y15, x15, + w14, z14, y14, x14, + w13, z13, y13, x13, + w12, z12, y12, x12, + w11, z11, y11, x11, + w10, z10, y10, x10, + w9, z9, y9, x9, + w8, z8, y8, x8, + w7, z7, y7, x7, + w6, z6, y6, x6, + w5, z5, y5, x5, + w4, z4, y4, x4, + w3, z3, y3, x3, + w2, z2, y2, x2, + w1, z1, y1, x1, + w0, z0, y0, x0 + ); + } + + Vector( + int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0, + int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0, + int16_t x2 = 0, int16_t y2 = 0, int16_t z2 = 0, int16_t w2 = 0, + int16_t x3 = 0, int16_t y3 = 0, int16_t z3 = 0, int16_t w3 = 0, + int16_t x4 = 0, int16_t y4 = 0, int16_t z4 = 0, int16_t w4 = 0, + int16_t x5 = 0, int16_t y5 = 0, int16_t z5 = 0, int16_t w5 = 0, + int16_t x6 = 0, int16_t y6 = 0, int16_t z6 = 0, int16_t w6 = 0, + int16_t x7 = 0, int16_t y7 = 0, int16_t z7 = 0, int16_t w7 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm512_set_epi16( + w7, z7, y7, x7, + w6, z6, y6, x6, + w5, z5, y5, x5, + w4, z4, y4, x4, + w3, z3, y3, x3, + w2, z2, y2, x2, + w1, z1, y1, x1, + w0, z0, y0, x0 + ); + } + + Vector( + int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0, + int32_t x1 = 0, int32_t y1 = 0, int32_t z1 = 0, int32_t w1 = 0, + int32_t x2 = 0, int32_t y2 = 0, int32_t z2 = 0, int32_t w2 = 0, + int32_t x3 = 0, int32_t y3 = 0, int32_t z3 = 0, int32_t w3 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm512_set_epi32( + w3, z3, y3, x3, + w2, z2, y2, x2, + w1, z1, y1, x1, + w0, z0, y0, x0 + ); + } + + Vector( + int64_t x0 = 0, int64_t y0 = 0, int64_t z0 = 0, int64_t w0 = 0, + int64_t x1 = 0, int64_t y1 = 0, int64_t z1 = 0, int64_t w1 = 0 + ) requires(std::same_as&& std::same_as) { + v = _mm512_set_epi64( + w1, z1, y1, x1, + w0, z0, y0, x0 + ); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_mul_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_div_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_mul_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_div_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_mul_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v))); + } + template + Vector operator/(Vector b) requires(std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_div_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v))); + } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_epi8(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_epi8(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi8(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi8(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_epi16(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_epi16(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi16(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi16(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + template + Vector operator*(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_mul_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi32(v, bv)); + // } + + template + Vector operator+(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_add_epi64(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + template + Vector operator-(Vector b) requires(std::same_as && std::same_as && Vector::GetVectorAlignedSize() == GetVectorAlignedSize()) { + return Vector(_mm512_sub_epi64(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v))); + } + // template + // Vector operator*(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_mul_epi64(v, bv)); + // } + // template + // Vector operator/(Vector b) requires(std::same_as && std::same_as) { + // __m512i v = this->v; + // __m512i bv = b.v; + // return Vector(_mm512_div_epi64(v, bv)); + // } + + void Store(T* data) const requires(std::same_as) { + _mm512_storeu_ph(reinterpret_cast(data), reinterpret_cast<__m512h>(v)); + } + void Store(T* data) const requires(std::same_as) { + _mm512_storeu_ps(reinterpret_cast(data), reinterpret_cast<__m512>(v)); + } + void Store(T* data) const requires(std::same_as) { + _mm512_storeu_pd(data, reinterpret_cast<__m512d>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm512_storeu_epi8(reinterpret_cast(data), reinterpret_cast<__m512i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm512_storeu_epi16(reinterpret_cast(data), reinterpret_cast<__m512i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm512_storeu_epi32(reinterpret_cast(data), reinterpret_cast<__m512i>(v)); + } + void Store(T* data) const requires(std::same_as && std::same_as) { + _mm512_storeu_epi64(reinterpret_cast(data), reinterpret_cast<__m512i>(v)); + } +#pragma endregion + }; + + export template + class VectorVector : public Vector { + public: + VectorVector(__m128h v0, __m128h v1) requires(std::same_as && vectorLenght*Vector::GetVectorAlignedSize() == Vector::GetVectorAlignedSize()) { + this->v = _mm256_castps128_ps256(v0); + this->v = _mm256_insertf128_ps(this->v,v1,1); + } + VectorVector(__m128 v0, __m128 v1, __m128 v2, __m128 v3) requires(std::same_as && vectorLenght*Vector::GetVectorAlignedSize() == Vector::GetVectorAlignedSize()) { + this->v = _mm512_castps256_ps512(_mm256_castps128_ps256(v0)); + this->v = _mm512_insertfloatx4(this->v, v1, 1); + this->v = _mm512_insertfloatx4(this->v, v2, 2); + this->v = _mm512_insertfloatx4(this->v, v3, 3); + } + VectorVector(__m512 v) : Vector(v) { //requires(std::same_as && vectorLenght*Vector::GetVectorAlignedSize() == Vector::GetVectorAlignedSize()) : Vector(v) + + } + VectorVector( + float x0 = 0, float y0 = 0, float z0 = 0, + float x1 = 0, float y1 = 0, float z1 = 0, + float x2 = 0, float y2 = 0, float z2 = 0, + float x3 = 0, float y3 = 0, float z3 = 0, + float x4 = 0, float y4 = 0, float z4 = 0, + float x5 = 0 + ) requires(std::same_as && vectorLenght*Vector::GetVectorAlignedSize() == Vector::GetVectorAlignedSize() && len == 3) : + Vector( + x0,y0,z0, + x1,y1,z1, + x2,y2,z2, + x3,y3,z3, + x4,y4,z4, + x5) + {} + + VectorVector( + float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0, + float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0, + float x2 = 0, float y2 = 0, float z2 = 0, float w2 = 0, + float x3 = 0, float y3 = 0, float z3 = 0, float w3 = 0 + ) : + Vector( + w3, z3, y3, x3, + w2, z2, y2, x2, + w1, z1, y1, x1, + w0, z0, y0, x0) + {} + }; + + export Vector g_XMNegIdentityR0(-1.0f, 0.0f, 0.0f, 0.0f); + export Vector g_XMNegIdentityR1(0.0f, -1.0f, 0.0f, 0.0f); + export Vector g_XMNegIdentityR2(0.0f, 0.0f, -1.0f, 0.0f); + export Vector g_XMNegIdentityR3(0.0f, 0.0f, 0.0f, -1.0f); + export Vector g_XMIdentityR0(1.0f, 0.0f, 0.0f, 0.0f); + export Vector g_XMIdentityR1(0.0f, 1.0f, 0.0f, 0.0f); + export Vector g_XMIdentityR2(0.0f, 0.0f, 1.0f, 0.0f); + export Vector g_XMIdentityR3(0.0f, 0.0f, 0.0f, 1.0f); + export Vector g_XMMaskXY(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000); + export Vector g_XMMask3(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000); + export Vector g_XMMaskX(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000); + export Vector g_XMMaskY(0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000); + export Vector g_XMMaskZ(0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000); + export Vector g_XMMaskW( 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF); +} \ No newline at end of file diff --git a/Crafter.Math.cppm b/Crafter.Math.cppm new file mode 100644 index 0000000..a98ecde --- /dev/null +++ b/Crafter.Math.cppm @@ -0,0 +1,5 @@ +export module Crafter.Math; +export import :BasicTypes; +export import :Vector; +export import :Matrix; +export import :Misc; \ No newline at end of file diff --git a/bin/crafter-math b/bin/crafter-math new file mode 100755 index 0000000..7d2000f Binary files /dev/null and b/bin/crafter-math differ diff --git a/build/debug-lib/Crafter.Math-BasicTypes.o b/build/debug-lib/Crafter.Math-BasicTypes.o new file mode 100644 index 0000000..3f3cb74 Binary files /dev/null and b/build/debug-lib/Crafter.Math-BasicTypes.o differ diff --git a/build/debug-lib/Crafter.Math-Misc.o b/build/debug-lib/Crafter.Math-Misc.o new file mode 100644 index 0000000..812c528 Binary files /dev/null and b/build/debug-lib/Crafter.Math-Misc.o differ diff --git a/build/debug-lib/Crafter.Math-Vector.o b/build/debug-lib/Crafter.Math-Vector.o new file mode 100644 index 0000000..762bb12 Binary files /dev/null and b/build/debug-lib/Crafter.Math-Vector.o differ diff --git a/build/debug/Crafter.Math-BasicTypes.o b/build/debug/Crafter.Math-BasicTypes.o new file mode 100644 index 0000000..56840b2 Binary files /dev/null and b/build/debug/Crafter.Math-BasicTypes.o differ diff --git a/build/debug/Crafter.Math-BasicTypes.pcm b/build/debug/Crafter.Math-BasicTypes.pcm new file mode 100644 index 0000000..58730d6 Binary files /dev/null and b/build/debug/Crafter.Math-BasicTypes.pcm differ diff --git a/build/debug/Crafter.Math-Matrix.o b/build/debug/Crafter.Math-Matrix.o new file mode 100644 index 0000000..e4a38ff Binary files /dev/null and b/build/debug/Crafter.Math-Matrix.o differ diff --git a/build/debug/Crafter.Math-Matrix.pcm b/build/debug/Crafter.Math-Matrix.pcm new file mode 100644 index 0000000..fb43bf7 Binary files /dev/null and b/build/debug/Crafter.Math-Matrix.pcm differ diff --git a/build/debug/Crafter.Math-Misc.o b/build/debug/Crafter.Math-Misc.o new file mode 100644 index 0000000..7f4b8e3 Binary files /dev/null and b/build/debug/Crafter.Math-Misc.o differ diff --git a/build/debug/Crafter.Math-Misc.pcm b/build/debug/Crafter.Math-Misc.pcm new file mode 100644 index 0000000..aa9b13f Binary files /dev/null and b/build/debug/Crafter.Math-Misc.pcm differ diff --git a/build/debug/Crafter.Math-Vector.o b/build/debug/Crafter.Math-Vector.o new file mode 100644 index 0000000..1cb5d56 Binary files /dev/null and b/build/debug/Crafter.Math-Vector.o differ diff --git a/build/debug/Crafter.Math-Vector.pcm b/build/debug/Crafter.Math-Vector.pcm new file mode 100644 index 0000000..e8b8f44 Binary files /dev/null and b/build/debug/Crafter.Math-Vector.pcm differ diff --git a/build/debug/Crafter.Math.o b/build/debug/Crafter.Math.o new file mode 100644 index 0000000..db0e129 Binary files /dev/null and b/build/debug/Crafter.Math.o differ diff --git a/build/debug/Crafter.Math.pcm b/build/debug/Crafter.Math.pcm new file mode 100644 index 0000000..e7ad020 Binary files /dev/null and b/build/debug/Crafter.Math.pcm differ diff --git a/build/debug/main_source.o b/build/debug/main_source.o new file mode 100644 index 0000000..d955820 Binary files /dev/null and b/build/debug/main_source.o differ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..ad67ffc --- /dev/null +++ b/main.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include + +import Crafter.Math; +using namespace Crafter; + + +int main() { + Matrix matrix( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 1, 0, 0, 1 + ); + Vector test(0, 0, 0, 1); + Vector result = matrix*test; + std::cout << result.ToString() << std::endl; +} \ No newline at end of file diff --git a/project.json b/project.json new file mode 100644 index 0000000..acaa705 --- /dev/null +++ b/project.json @@ -0,0 +1,35 @@ +{ + "name": "crafter-math", + "configurations": [ + { + "name": "base", + "standard": "c++26", + "source_files": [], + "module_files": ["Crafter.Math-Vector", "Crafter.Math-BasicTypes", "Crafter.Math-MatrixAMX", "Crafter.Math-Matrix", "Crafter.Math-Misc", "Crafter.Math"], + "build_dir": "./build", + "output_dir": "./bin" + }, + { + "name": "debug", + "type": "executable", + "source_files": ["main"], + "optimization_level": "3", + "extends":["base"] + }, + { + "name": "lib", + "extends": ["base"], + "type":"library" + }, + { + "name": "debug-lib", + "extends": ["lib"], + "optimization_level": "0" + }, + { + "name": "release-lib", + "extends": ["lib"], + "optimization_level": "3" + } + ] +}