This commit is contained in:
Jorijn van der Graaf 2025-05-05 02:01:44 +02:00
commit d0a8b12c1a
22 changed files with 1329 additions and 0 deletions

43
Crafter.Math-BasicTypes.cppm Executable file
View file

@ -0,0 +1,43 @@
module;
#include <cstdint>
#include <stdfloat>
#include <format>
export module Crafter.Math:BasicTypes;
namespace Crafter {
export struct Float2 {
float x;
float y;
};
export struct Float3 {
float x;
float y;
float z;
};
export struct Float4 {
float x;
float y;
float z;
float w;
};
export struct Float4x4 {
float c1[4];
float c2[4];
float c3[4];
float c4[4];
};
}
template <>
struct std::formatter<Crafter::Float4x4> : std::formatter<std::string> {
auto format(const Crafter::Float4x4& obj, format_context& ctx) const {
return std::formatter<std::string>::format(std::format("{{{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}\n{}, {}, {}, {}}}",
obj.c1[0], obj.c2[0], obj.c3[0], obj.c4[0],
obj.c1[1], obj.c2[1], obj.c3[1], obj.c4[1],
obj.c1[2], obj.c2[2], obj.c3[2], obj.c4[2],
obj.c1[3], obj.c2[3], obj.c3[3], obj.c4[3]
), ctx);
}
};

278
Crafter.Math-Matrix.cppm Executable file
View file

@ -0,0 +1,278 @@
module;
#include <type_traits>
#include <concepts>
#include <immintrin.h>
#include <string>
#include <sstream>
#include <iostream>
export module Crafter.Math:Matrix;
import :BasicTypes;
import :Vector;
import :Misc;
namespace Crafter {
export template <typename T, uint32_t collumSize, uint32_t rowSize, uint32_t repeats>
class Matrix {
public:
typedef
typename std::conditional<(sizeof(T)* collumSize*repeats > 32 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t>)), __m512i,
typename std::conditional<(sizeof(T)* collumSize*repeats > 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t>)), __m256i,
typename std::conditional<(sizeof(T)* collumSize*repeats <= 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t>)), __m128i,
typename std::conditional<(collumSize*repeats > 16 && std::same_as<T, __fp16>), __m512h,
typename std::conditional<(collumSize*repeats > 8 && std::same_as<T, __fp16>), __m256h,
typename std::conditional<(collumSize*repeats <= 8 && std::same_as<T, __fp16>), __m128h,
typename std::conditional<(collumSize*repeats > 8 && std::same_as<T, float>), __m512,
typename std::conditional<(collumSize*repeats > 4 && std::same_as<T, float>), __m256,
typename std::conditional<(collumSize*repeats <= 4 && std::same_as<T, float>), __m128,
typename std::conditional<(collumSize*repeats > 4 && std::same_as<T, double>), __m512d,
typename std::conditional<(collumSize*repeats > 2 && std::same_as<T, double>), __m256d, __m128d
>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type collum_type;
collum_type c[rowSize];
Matrix() {
}
Matrix(__m128 c0, __m128 c1, __m128 c2, __m128 c3) requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as<T, float>) {
c[0] = c0;
c[1] = c1;
c[2] = c2;
c[3] = c3;
}
Matrix(
float x0, float y0, float z0, float w0,
float x1, float y1, float z1, float w1,
float x2, float y2, float z2, float w2,
float x3, float y3, float z3, float w3
) requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as<T, float>) {
c[0] = _mm_set_ps(x3, x2, x1, x0);
c[1] = _mm_set_ps(y3, y2, y1, y0);
c[2] = _mm_set_ps(z3, z2, z1, z0);
c[3] = _mm_set_ps(w3, w2, w1, w0);
}
Vector<T, rowSize> operator*(Vector<T, 4> b) const requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as<T, float>) {
__m128 result = _mm_mul_ps(reinterpret_cast<__m128>(c[0]), reinterpret_cast<__m128>(b.v));
result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[1]), reinterpret_cast<__m128>(b.v), result);
result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[2]), reinterpret_cast<__m128>(b.v), result);
result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[3]), reinterpret_cast<__m128>(b.v), result);
return Vector<T, 4>(result);
}
// static Matrix<T, collums, rowSize, vectorSize> Scaling(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
// return Matrix<T, collums, rowSize, vectorSize>(
// _mm_set_ps(0, 0, 0, x),
// _mm_set_ps(0, 0, y, 0),
// _mm_set_ps(0, z, 0, 0),
// _mm_set_ps(1, 0, 0, 0)
// );
// }
// static Matrix<T, collums, rowSize, vectorSize> Translation(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
// return Matrix<T, collums, rowSize, vectorSize>(
// _mm_set_ps(0, 0, 0, 1),
// _mm_set_ps(0, 0, 1, 0),
// _mm_set_ps(0, 1, 0, 0),
// _mm_set_ps(1, z, y, x)
// );
// }
// // static Matrix<T, collums, rowSize, vectorSize> Rotation(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
// // return Matrix<T, collums, rowSize, vectorSize>(
// // _mm_set_ps(0, 0, 0, 1),
// // _mm_set_ps(0, 0, 1, 0),
// // _mm_set_ps(0, 1, 0, 0),
// // _mm_set_ps(1, z, y, x)
// // );
// // }
// static Matrix<T, collums, rowSize, vectorSize> Idenity() requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
// return Matrix<T, collums, rowSize, vectorSize>(
// _mm_set_ps(0, 0, 0, 1),
// _mm_set_ps(0, 0, 1, 0),
// _mm_set_ps(0, 1, 0, 0),
// _mm_set_ps(1, 0, 0, 0)
// );
// }
// static Matrix<T, collums, rowSize, vectorSize> Projection(float FovAngleY, float AspectRatio, float NearZ, float FarZ) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
// float SinFov;
// float CosFov;
// XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
// float fRange = FarZ / (NearZ - FarZ);
// // Note: This is recorded on the stack
// float Height = CosFov / SinFov;
// __m128 rMem = {
// Height / AspectRatio,
// Height,
// fRange,
// fRange * NearZ
// };
// // Copy from memory to SSE register
// __m128 vValues = rMem;
// __m128 vTemp = _mm_setzero_ps();
// // Copy x only
// vTemp = _mm_move_ss(vTemp, vValues);
// // Height / AspectRatio,0,0,0
// Matrix<T, collums, rowSize, vectorSize> M;
// M.r[0] = vTemp;
// // 0,Height,0,0
// vTemp = vValues;
// vTemp = _mm_and_ps(vTemp, g_XMMaskY.v);
// M.r[1] = vTemp;
// // x=fRange,y=-fRange * NearZ,0,-1.0f
// vTemp = _mm_setzero_ps();
// vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3.v, _MM_SHUFFLE(3, 2, 3, 2));
// // 0,0,fRange,-1.0f
// vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0));
// M.r[2] = vTemp;
// // 0,0,fRange * NearZ,0.0f
// vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0));
// M.r[3] = vTemp;
// return M;
// }
// template <uint32_t vectorRowSize>
// Vector<T, rowSize> operator*(Vector<T, vectorRowSize> b) const requires(collums == 4 && vectorRowSize == 4 && rowSize >= 4 && vectorSize == 1 && std::same_as<T, float>) {
// //std::cout << Vector<T, rowSize>(allX).ToString() << std::endl;
// // __m128 result = _mm_permute_ps(b.v, 0b00000000);
// // result = _mm_fmadd_ps(result, r[0], r[3]);
// // __m128 allY = _mm_permute_ps(b.v, 0b10101010);
// // result = _mm_fmadd_ps(allY, r[1], result);
// // __m128 allZ = _mm_permute_ps(b.v, 0b01010101);
// // return Vector<T, rowSize>(_mm_fmadd_ps(allZ, r[2], result));
// return Vector<T, vectorRowSize>(1, 2, 3, 4);
// }
// Matrix<T, collums, rowSize, vectorSize> operator*(Matrix<T, collums, rowSize, vectorSize> b) const requires(collums == 4 && rowSize == 4 && vectorSize == 1 && std::same_as<T, float>) {
// Matrix<T, collums, rowSize, vectorSize> result;
// result.r[0] = _mm_permute_ps(b.r[0], 0b00000000);
// result.r[1] = _mm_fmadd_ps(_mm_permute_ps(b.r[1], 0b00000000), reinterpret_cast<__m128>(r[1]), reinterpret_cast<__m128>(result.r[0]));
// result.r[1] = _mm_permute_ps(b.r[1], 0b00000000);
// result.r[2] = _mm_permute_ps(b.r[2], 0b00000000);
// result.r[3] = _mm_permute_ps(b.r[3], 0b00000000);
// // result.r[0] = _mm_fmadd_ps(allY, reinterpret_cast<__m128>(r[1]), reinterpret_cast<__m128>(result.r[0]));
// // result.r[0] = _mm_fmadd_ps(allZ, reinterpret_cast<__m128>(r[2]), reinterpret_cast<__m128>(result.r[0]));
// // result.r[0] = _mm_fmadd_ps(allW, reinterpret_cast<__m128>(r[3]), reinterpret_cast<__m128>(result.r[0]));
// Float4x4 store;
// result.Store(&store);
// std::cout << std::format("{}", store) << std::endl;
// return result;
// }
// void Store(Float4x4* store) const requires(collums == 4 && rowSize == 4 && vectorSize == 1 && std::same_as<T, float>) {
// _mm_storeu_ps(store->r1, reinterpret_cast<__m128>(r[0]));
// _mm_storeu_ps(store->r2, reinterpret_cast<__m128>(r[1]));
// _mm_storeu_ps(store->r3, reinterpret_cast<__m128>(r[2]));
// _mm_storeu_ps(store->r4, reinterpret_cast<__m128>(r[3]));
// }
// // VectorVector<T, 4, 4> operator*(VectorVector<T, 4, 4> b) requires(collums == 4 && rowSize == 4 && vectorSize == 4 && std::same_as<T, float>) {
// // __m512 result = _mm512_permute_ps(b.v, 0b11111111);
// // result = _mm512_fmadd_ps(result, reinterpret_cast<__m512>(r[0]), reinterpret_cast<__m512>(r[3]));
// // __m512 allY = _mm512_permute_ps(b.v, 0b10101010);
// // result = _mm512_fmadd_ps(allY, reinterpret_cast<__m512>(r[1]), result);
// // __m512 allZ = _mm512_permute_ps(b.v, 0b01010101);
// // return VectorVector<T, 4, 4>(_mm512_fmadd_ps(allZ, reinterpret_cast<__m512>(r[2]), result));
// // }
// // m4x4float Transpose() const {
// // // x.x,x.y,y.x,y.y
// // __m128 vTemp1 = _mm_shuffle_ps(r[0], r[1], _MM_SHUFFLE(1, 0, 1, 0));
// // // x.z,x.w,y.z,y.w
// // __m128 vTemp3 = _mm_shuffle_ps(r[0], r[1], _MM_SHUFFLE(3, 2, 3, 2));
// // // z.x,z.y,w.x,w.y
// // __m128 vTemp2 = _mm_shuffle_ps(r[2], r[3], _MM_SHUFFLE(1, 0, 1, 0));
// // // z.z,z.w,w.z,w.w
// // __m128 vTemp4 = _mm_shuffle_ps(r[2], r[3], _MM_SHUFFLE(3, 2, 3, 2));
// // return m4x4float(
// // _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)),
// // _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)),
// // _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)),
// // _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1))
// // );
// // }
// // m4x4float operator*(m4x4float b) const {
// // __m256 t0 = _mm256_castps128_ps256(r[0]);
// // t0 = _mm256_insertf128_ps(t0, r[1], 1);
// // __m256 t1 = _mm256_castps128_ps256(r[2]);
// // t1 = _mm256_insertf128_ps(t1, r[3], 1);
// // __m256 u0 = _mm256_castps128_ps256(b.r[0]);
// // u0 = _mm256_insertf128_ps(u0, b.r[1], 1);
// // __m256 u1 = _mm256_castps128_ps256(b.r[2]);
// // u1 = _mm256_insertf128_ps(u1, b.r[3], 1);
// // __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0));
// // __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0));
// // __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00);
// // __m256 c0 = _mm256_mul_ps(a0, b0);
// // __m256 c1 = _mm256_mul_ps(a1, b0);
// // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1));
// // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1));
// // b0 = _mm256_permute2f128_ps(u0, u0, 0x11);
// // __m256 c2 = _mm256_fmadd_ps(a0, b0, c0);
// // __m256 c3 = _mm256_fmadd_ps(a1, b0, c1);
// // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2));
// // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2));
// // __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00);
// // __m256 c4 = _mm256_mul_ps(a0, b1);
// // __m256 c5 = _mm256_mul_ps(a1, b1);
// // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3));
// // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3));
// // b1 = _mm256_permute2f128_ps(u1, u1, 0x11);
// // __m256 c6 = _mm256_fmadd_ps(a0, b1, c4);
// // __m256 c7 = _mm256_fmadd_ps(a1, b1, c5);
// // t0 = _mm256_add_ps(c2, c6);
// // t1 = _mm256_add_ps(c3, c7);
// // return m4x4float(
// // _mm256_castps256_ps128(t0),
// // _mm256_extractf128_ps(t0, 1),
// // _mm256_castps256_ps128(t1),
// // _mm256_extractf128_ps(t1, 1)
// // );
// // }
void Store(Crafter::Float4x4& store) const {
_mm_storeu_ps(store.c1, c[0]);
_mm_storeu_ps(store.c2, c[1]);
_mm_storeu_ps(store.c3, c[2]);
_mm_storeu_ps(store.c4, c[3]);
}
};
}
template <>
struct std::formatter<Crafter::Matrix<float, 4, 4, 1>> : std::formatter<std::string> {
auto format(const Crafter::Matrix<float, 4, 4, 1>& obj, format_context& ctx) const {
Crafter::Float4x4 store;
obj.Store(store);
return std::formatter<std::string>::format(std::format("{}", store), ctx);
}
};

66
Crafter.Math-Misc.cppm Normal file
View file

@ -0,0 +1,66 @@
module;
#include <cstdint>
#include <stdfloat>
export module Crafter.Math:Misc;
export namespace Crafter {
//-------------------------------------------------------------------------------------
// DirectXMathMisc.inl -- SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
constexpr float XM_PI = 3.141592654f;
constexpr float XM_2PI = 6.283185307f;
constexpr float XM_1DIVPI = 0.318309886f;
constexpr float XM_1DIV2PI = 0.159154943f;
constexpr float XM_PIDIV2 = 1.570796327f;
constexpr float XM_PIDIV4 = 0.785398163f;
inline void XMScalarSinCos(float* pSin, float* pCos, float Value) noexcept
{
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
float sign;
if (y > XM_PIDIV2)
{
y = XM_PI - y;
sign = -1.0f;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
sign = -1.0f;
}
else
{
sign = +1.0f;
}
float y2 = y * y;
// 11-degree minimax approximation
*pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y;
// 10-degree minimax approximation
float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f;
*pCos = sign * p;
}
}

881
Crafter.Math-Vector.cppm Executable file
View file

@ -0,0 +1,881 @@
module;
#include <type_traits>
#include <concepts>
#include <immintrin.h>
#include <string>
#include <sstream>
#include <iostream>
export module Crafter.Math:Vector;
import :BasicTypes;
namespace Crafter {
export template <typename T, uint32_t len>
class Vector {
typedef
typename std::conditional<(sizeof(T)* len > 32 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t> || std::same_as<T, uint64_t> || std::same_as<T, uint32_t> || std::same_as<T, uint16_t> || std::same_as<T, uint8_t>)), __m512i,
typename std::conditional<(sizeof(T)* len > 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t> || std::same_as<T, uint64_t> || std::same_as<T, uint32_t> || std::same_as<T, uint16_t> || std::same_as<T, uint8_t>)), __m256i,
typename std::conditional<(sizeof(T)* len <= 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t> || std::same_as<T, uint64_t> || std::same_as<T, uint32_t> || std::same_as<T, uint16_t> || std::same_as<T, uint8_t>)), __m128i,
typename std::conditional<(len > 16 && std::same_as<T, __fp16>), __m512h,
typename std::conditional<(len > 8 && std::same_as<T, __fp16>), __m256h,
typename std::conditional<(len <= 8 && std::same_as<T, __fp16>), __m128h,
typename std::conditional<(len > 8 && std::same_as<T, float>), __m512,
typename std::conditional<(len > 4 && std::same_as<T, float>), __m256,
typename std::conditional<(len <= 4 && std::same_as<T, float>), __m128,
typename std::conditional<(len > 4 && std::same_as<T, double>), __m512d,
typename std::conditional<(len > 2 && std::same_as<T, double>), __m256d, __m128d
>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type vector_type;
public:
template <typename Datatype, typename Vectortype>
static consteval uint8_t GetVectorAlignedSize() {
if constexpr(std::same_as<Datatype, int8_t> && std::same_as<Vectortype, __m512i>) {
return 64;
} else if constexpr(std::same_as<Vectortype, __m512h> || (std::same_as<T, int8_t> && std::same_as<Vectortype, __m256i>) || (std::same_as<T, int16_t> && std::same_as<Vectortype, __m512i>)) {
return 32;
} else if constexpr(std::same_as<Vectortype, __m256h> || std::same_as<Vectortype, __m512> || (std::same_as<Datatype, int8_t> && std::same_as<Vectortype, __m128i>) || (std::same_as<Datatype, int16_t> && std::same_as<Vectortype, __m256i>) || (std::same_as<T, int32_t> && std::same_as<Vectortype, __m512i>)) {
return 16;
} else if constexpr(std::same_as<Vectortype, __m128h> || std::same_as<Vectortype, __m256> || std::same_as<Vectortype, __m512d> || (std::same_as<Datatype, int16_t> && std::same_as<Vectortype, __m128i>) || (std::same_as<T, int32_t> && std::same_as<Vectortype, __m256i>) || (std::same_as<Datatype, int64_t> && std::same_as<Vectortype, __m512i>)) {
return 8;
} else if constexpr(std::same_as<Vectortype, __m128> || std::same_as<Vectortype, __m256d> || (std::same_as<Datatype, int32_t> && std::same_as<Vectortype, __m128>) || (std::same_as<Datatype, int64_t> && std::same_as<Vectortype, __m256i>)) {
return 4;
} else if constexpr(std::same_as<Vectortype, __m128d> || (std::same_as<Datatype, int64_t> && std::same_as<Vectortype, __m128>)) {
return 2;
} else{
throw std::invalid_argument("");
}
}
vector_type v;
Vector() {};
Vector(__m128h v) requires(std::same_as<vector_type, __m128h>) : v(v) { }
Vector(__m128 v) requires(std::same_as<vector_type, __m128>) : v(v) { }
Vector(__m128d v) requires(std::same_as<vector_type, __m128d>) : v(v) { }
Vector(__m128i v) requires(std::same_as<vector_type, __m128i>) : v(v) { }
Vector(__m256h v) requires(std::same_as<vector_type, __m256h>) : v(v) { }
Vector(__m256 v) requires(std::same_as<vector_type, __m256>) : v(v) { }
Vector(__m256d v) requires(std::same_as<vector_type, __m256d>) : v(v) { }
Vector(__m256i v) requires(std::same_as<vector_type, __m256i>) : v(v) { }
Vector(__m512h v) requires(std::same_as<vector_type, __m512h>) : v(v) { }
Vector(__m512 v) requires(std::same_as<vector_type, __m512>) : v(v) { }
Vector(__m512d v) requires(std::same_as<vector_type, __m512d>) : v(v) { }
Vector(__m512i v) requires(std::same_as<vector_type, __m512i>) : v(v) { }
template <uint32_t blen>
void operator+=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
this->v = (*this+b).v;
}
template <uint32_t blen>
void operator-=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
this->v = (*this-b).v;
}
template <uint32_t blen>
void operator*=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
this->v = (*this*b).v;
}
template <uint32_t blen>
void operator/=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
this->v = (*this/b).v;
}
std::string ToString() const {
std::ostringstream ss;
ss << "{ ";
T store[GetVectorAlignedSize<T, vector_type>()];
Store(store);
for(uint8_t i = 0; i < len; i++) {
ss << std::format("{}", store[i]);
if(i+1 < len) {
ss << ", ";
}
}
ss << " }";
return std::string(ss.str());
}
#pragma region 128
Vector(
const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0,
const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0
) requires(std::same_as<T, __fp16> && std::same_as<vector_type, __m128h>) {
__fp16 temp[]{ x0,y0,z0,w0,x1,y1,z1,w1,};
v = _mm_load_ph(temp);
}
Vector(float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0) requires(std::same_as<T, float>&& std::same_as<vector_type, __m128>) {
v = _mm_set_ps(w0, z0, y0, x0);
}
Vector(double x0 = 0, double y0 = 0) requires(std::same_as<T, double>&& std::same_as<vector_type, __m128d>) {
v = _mm_set_pd(y0, x0);
}
Vector(
int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0,
int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0,
int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0,
int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0
) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i>) {
v = _mm_set_epi8(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(
int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0,
int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0
) requires(std::same_as<T, int16_t>&& std::same_as<vector_type, __m128i>) {
v = _mm_set_epi16(w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0) requires(std::same_as<T, int32_t>&& std::same_as<vector_type, __m128i>) {
v = _mm_set_epi32(w0, z0, y0, x0);
}
Vector(int64_t x0 = 0, int64_t y0 = 0) requires(std::same_as<T, int64_t>&& std::same_as<vector_type, __m128i>) {
v = _mm_set_epi64x(y0, x0);
}
Vector(
uint8_t x0 = 0, uint8_t y0 = 0, uint8_t z0 = 0, uint8_t w0 = 0,
uint8_t x1 = 0, uint8_t y1 = 0, uint8_t z1 = 0, uint8_t w1 = 0,
uint8_t x2 = 0, uint8_t y2 = 0, uint8_t z2 = 0, uint8_t w2 = 0,
uint8_t x3 = 0, uint8_t y3 = 0, uint8_t z3 = 0, uint8_t w3 = 0
) requires(std::same_as<T, uint8_t> && std::same_as<vector_type, __m128i>) {
v = _mm_set_epi8(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(
uint16_t x0 = 0, uint16_t y0 = 0, uint16_t z0 = 0, uint16_t w0 = 0,
uint16_t x1 = 0, uint16_t y1 = 0, uint16_t z1 = 0, uint16_t w1 = 0
) requires(std::same_as<T, uint16_t>&& std::same_as<vector_type, __m128i>) {
v = _mm_set_epi16(w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(uint32_t x0 = 0, uint32_t y0 = 0, uint32_t z0 = 0, uint32_t w0 = 0) requires(std::same_as<T, uint32_t>&& std::same_as<vector_type, __m128i>) {
v = _mm_set_epi32(w0, z0, y0, x0);
}
Vector(uint64_t x0 = 0, uint64_t y0 = 0) requires(std::same_as<T, uint64_t>&& std::same_as<vector_type, __m128i>) {
v = _mm_set_epi64x(y0, x0);
}
static Vector<T, len> Zero() requires(std::same_as<vector_type, __m128>) {
return Vector<T, len>(_mm_setzero_ps());
}
void Store(T* data) const requires(std::same_as<vector_type, __m128h>) {
_mm_storeu_ph(reinterpret_cast<void*>(data), reinterpret_cast<__m128h>(v));
}
void Store(T* data) const requires(std::same_as<vector_type, __m128>) {
_mm_storeu_ps(reinterpret_cast<float*>(data), reinterpret_cast<__m128>(v));
}
void Store(T* data) const requires(std::same_as<vector_type, __m128d>) {
_mm_storeu_pd(data, reinterpret_cast<__m128d>(v));
}
void Store(T* data) const requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i>) {
_mm_storeu_epi8(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
}
void Store(T* data) const requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m128i>) {
_mm_storeu_epi16(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
}
void Store(T* data) const requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i>) {
_mm_storeu_epi32(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
}
void Store(T* data) const requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m128i>) {
_mm_storeu_epi64(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_mul_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_div_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_mul_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_div_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_mul_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_div_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_epi8(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_epi8(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi8(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi8(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_epi16(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_epi16(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi16(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi16(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_mul_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, i132> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi32(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_add_epi64(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm_sub_epi64(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi64(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi64(v, bv));
// }
template <uint32_t blen>
Vector<T, len> AddMask() {
}
#pragma endregion
#pragma region 256
Vector(
const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0,
const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0,
const __fp16& x2 = 0, const __fp16& y2 = 0, const __fp16& z2 = 0, const __fp16& w2 = 0,
const __fp16& x3 = 0, const __fp16& y3 = 0, const __fp16& z3 = 0, const __fp16& w3 = 0
) requires(std::same_as<T, __fp16>&& std::same_as<vector_type, __m256h>) {
__fp16 temp[]{ w0,z0,y0,x0,w1,z1,y1,x1,w2,z2,y2,x2,w3,z3,y3,x3 };
v = _mm256_load_ph(temp);
}
Vector(
float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0,
float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0
) requires(std::same_as<T, float>&& std::same_as<vector_type, __m256>) {
v = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(double x0 = 0, double y0 = 0, double z0 = 0, double w0 = 0) requires(std::same_as<T, double>&& std::same_as<vector_type, __m256d>) {
v = _mm256_set_pd(w0, z0, y0, x0);
}
Vector(
int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0,
int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0,
int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0,
int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0,
int8_t x4 = 0, int8_t y4 = 0, int8_t z4 = 0, int8_t w4 = 0,
int8_t x5 = 0, int8_t y5 = 0, int8_t z5 = 0, int8_t w5 = 0,
int8_t x6 = 0, int8_t y6 = 0, int8_t z6 = 0, int8_t w6 = 0,
int8_t x7 = 0, int8_t y7 = 0, int8_t z7 = 0, int8_t w7 = 0
) requires(std::same_as<T, int8_t>&& std::same_as<vector_type, __m256i>) {
v = _mm256_set_epi8(w7, z7, y7, x7, w6, z6, y6, x6, w5, z5, y5, x5, w4, z4, y4, x4, w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(
int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0,
int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0,
int16_t x2 = 0, int16_t y2 = 0, int16_t z2 = 0, int16_t w2 = 0,
int16_t x3 = 0, int16_t y3 = 0, int16_t z3 = 0, int16_t w3 = 0
) requires(std::same_as<T, int16_t>&& std::same_as<vector_type, __m256i>) {
v = _mm256_set_epi16(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(
int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0,
int32_t x1 = 0, int32_t y1 = 0, int32_t z1 = 0, int32_t w1 = 0
) requires(std::same_as<T, int32_t>&& std::same_as<vector_type, __m256i>) {
v = _mm256_set_epi32(w1, z1, y1, x1, w0, z0, y0, x0);
}
Vector(int64_t x0 = 0, int64_t y0 = 0, int64_t z0 = 0, int64_t w0 = 0) requires(std::same_as<T, int64_t>&& std::same_as<vector_type, __m256>) {
v = _mm256_set_epi64x(w0, z0, y0, x0);
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_mul_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_div_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_mul_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_div_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_mul_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_div_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_epi8(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_epi8(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi8(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi8(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_epi16(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_epi16(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi16(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi16(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_mul_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, i132> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi32(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_add_epi64(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm256_sub_epi64(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi64(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi64(v, bv));
// }
void Store(T* data) const requires(std::same_as<vector_type, __m256h>) {
_mm256_storeu_ph(reinterpret_cast<void*>(data), reinterpret_cast<__m256h>(v));
}
void Store(T* data) const requires(std::same_as<vector_type, __m256>) {
_mm256_storeu_ps(data, reinterpret_cast<__m256>(v));
}
void Store(T* data) const requires(std::same_as<vector_type, __m256d>) {
_mm256_storeu_pd(data, reinterpret_cast<__m256d>(v));
}
void Store(T* data) const requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m256i>) {
_mm256_storeu_epi8(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
}
void Store(T* data) const requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m256i>) {
_mm256_storeu_epi16(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
}
void Store(T* data) const requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i>) {
_mm256_storeu_epi32(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
}
void Store(T* data) const requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m256i>) {
_mm256_storeu_epi64(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
}
#pragma endregion
#pragma region 512
Vector(
const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0,
const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0,
const __fp16& x2 = 0, const __fp16& y2 = 0, const __fp16& z2 = 0, const __fp16& w2 = 0,
const __fp16& x3 = 0, const __fp16& y3 = 0, const __fp16& z3 = 0, const __fp16& w3 = 0,
const __fp16& x4 = 0, const __fp16& y4 = 0, const __fp16& z4 = 0, const __fp16& w4 = 0,
const __fp16& x5 = 0, const __fp16& y5 = 0, const __fp16& z5 = 0, const __fp16& w5 = 0,
const __fp16& x6 = 0, const __fp16& y6 = 0, const __fp16& z6 = 0, const __fp16& w6 = 0,
const __fp16& x7 = 0, const __fp16& y7 = 0, const __fp16& z7 = 0, const __fp16& w7 = 0
) requires(std::same_as<T, __fp16>&& std::same_as<vector_type, __m512h>) {
__fp16 temp[]{ w0,z0,y0,x0, w1,z1,y1,x1, w2,z2,y2,x2,w3, z3,y3,x3, w4,z4,y4,x4, w5,z5,y5,x5, w6,z6,y6,x6, w7,z7,y7,x7 };
v = _mm512_load_ph(temp);
}
Vector(
float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0,
float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0,
float x2 = 0, float y2 = 0, float z2 = 0, float w2 = 0,
float x3 = 0, float y3 = 0, float z3 = 0, float w3 = 0
) requires(std::same_as<T, float>&& std::same_as<vector_type, __m512>) {
v = _mm512_set_ps(
w3, z3, y3, x3,
w2, z2, y2, x2,
w1, z1, y1, x1,
w0, z0, y0, x0
);
}
Vector(
double x0 = 0, double y0 = 0, double z0 = 0, double w0 = 0,
double x1 = 0, double y1 = 0, double z1 = 0, double w1 = 0
) requires(std::same_as<T, double>&& std::same_as<vector_type, __m512d>) {
v = _mm512_set_pd(
w1, z1, y1, x1,
w0, z0, y0, x0
);
}
Vector(
int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0,
int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0,
int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0,
int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0,
int8_t x4 = 0, int8_t y4 = 0, int8_t z4 = 0, int8_t w4 = 0,
int8_t x5 = 0, int8_t y5 = 0, int8_t z5 = 0, int8_t w5 = 0,
int8_t x6 = 0, int8_t y6 = 0, int8_t z6 = 0, int8_t w6 = 0,
int8_t x7 = 0, int8_t y7 = 0, int8_t z7 = 0, int8_t w7 = 0,
int8_t x8 = 0, int8_t y8 = 0, int8_t z8 = 0, int8_t w8 = 0,
int8_t x9 = 0, int8_t y9 = 0, int8_t z9 = 0, int8_t w9 = 0,
int8_t x10 = 0, int8_t y10 = 0, int8_t z10 = 0, int8_t w10 = 0,
int8_t x11 = 0, int8_t y11 = 0, int8_t z11 = 0, int8_t w11 = 0,
int8_t x12 = 0, int8_t y12 = 0, int8_t z12 = 0, int8_t w12 = 0,
int8_t x13 = 0, int8_t y13 = 0, int8_t z13 = 0, int8_t w13 = 0,
int8_t x14 = 0, int8_t y14 = 0, int8_t z14 = 0, int8_t w14 = 0,
int8_t x15 = 0, int8_t y15 = 0, int8_t z15 = 0, int8_t w15 = 0
) requires(std::same_as<T, int8_t>&& std::same_as<vector_type, __m512i>) {
v = _mm512_set_epi8(
w15, z15, y15, x15,
w14, z14, y14, x14,
w13, z13, y13, x13,
w12, z12, y12, x12,
w11, z11, y11, x11,
w10, z10, y10, x10,
w9, z9, y9, x9,
w8, z8, y8, x8,
w7, z7, y7, x7,
w6, z6, y6, x6,
w5, z5, y5, x5,
w4, z4, y4, x4,
w3, z3, y3, x3,
w2, z2, y2, x2,
w1, z1, y1, x1,
w0, z0, y0, x0
);
}
Vector(
int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0,
int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0,
int16_t x2 = 0, int16_t y2 = 0, int16_t z2 = 0, int16_t w2 = 0,
int16_t x3 = 0, int16_t y3 = 0, int16_t z3 = 0, int16_t w3 = 0,
int16_t x4 = 0, int16_t y4 = 0, int16_t z4 = 0, int16_t w4 = 0,
int16_t x5 = 0, int16_t y5 = 0, int16_t z5 = 0, int16_t w5 = 0,
int16_t x6 = 0, int16_t y6 = 0, int16_t z6 = 0, int16_t w6 = 0,
int16_t x7 = 0, int16_t y7 = 0, int16_t z7 = 0, int16_t w7 = 0
) requires(std::same_as<T, int16_t>&& std::same_as<vector_type, __m512i>) {
v = _mm512_set_epi16(
w7, z7, y7, x7,
w6, z6, y6, x6,
w5, z5, y5, x5,
w4, z4, y4, x4,
w3, z3, y3, x3,
w2, z2, y2, x2,
w1, z1, y1, x1,
w0, z0, y0, x0
);
}
Vector(
int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0,
int32_t x1 = 0, int32_t y1 = 0, int32_t z1 = 0, int32_t w1 = 0,
int32_t x2 = 0, int32_t y2 = 0, int32_t z2 = 0, int32_t w2 = 0,
int32_t x3 = 0, int32_t y3 = 0, int32_t z3 = 0, int32_t w3 = 0
) requires(std::same_as<T, int32_t>&& std::same_as<vector_type, __m512i>) {
v = _mm512_set_epi32(
w3, z3, y3, x3,
w2, z2, y2, x2,
w1, z1, y1, x1,
w0, z0, y0, x0
);
}
Vector(
int64_t x0 = 0, int64_t y0 = 0, int64_t z0 = 0, int64_t w0 = 0,
int64_t x1 = 0, int64_t y1 = 0, int64_t z1 = 0, int64_t w1 = 0
) requires(std::same_as<T, int64_t>&& std::same_as<vector_type, __m512i>) {
v = _mm512_set_epi64(
w1, z1, y1, x1,
w0, z0, y0, x0
);
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_mul_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_div_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_mul_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_div_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_mul_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_div_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_epi8(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_epi8(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi8(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi8(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_epi16(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_epi16(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi16(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi16(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_mul_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, i132> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi32(v, bv));
// }
template <uint32_t blen>
Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_add_epi64(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
template <uint32_t blen>
Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
return Vector<T, len>(_mm512_sub_epi64(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
}
// template <uint32_t blen>
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_mul_epi64(v, bv));
// }
// template <uint32_t blen>
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
// __m512i v = this->v;
// __m512i bv = b.v;
// return Vector<T, len>(_mm512_div_epi64(v, bv));
// }
void Store(T* data) const requires(std::same_as<vector_type, __m512h>) {
_mm512_storeu_ph(reinterpret_cast<void*>(data), reinterpret_cast<__m512h>(v));
}
void Store(T* data) const requires(std::same_as<vector_type, __m512>) {
_mm512_storeu_ps(reinterpret_cast<void*>(data), reinterpret_cast<__m512>(v));
}
void Store(T* data) const requires(std::same_as<vector_type, __m512d>) {
_mm512_storeu_pd(data, reinterpret_cast<__m512d>(v));
}
void Store(T* data) const requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
_mm512_storeu_epi8(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
}
void Store(T* data) const requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
_mm512_storeu_epi16(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
}
void Store(T* data) const requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i>) {
_mm512_storeu_epi32(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
}
void Store(T* data) const requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
_mm512_storeu_epi64(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
}
#pragma endregion
};
export template <typename T, uint32_t len, uint32_t vectorLenght>
class VectorVector : public Vector<T, len*vectorLenght> {
public:
VectorVector(__m128h v0, __m128h v1) requires(std::same_as<T, __fp16> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize()) {
this->v = _mm256_castps128_ps256(v0);
this->v = _mm256_insertf128_ps(this->v,v1,1);
}
VectorVector(__m128 v0, __m128 v1, __m128 v2, __m128 v3) requires(std::same_as<T, float> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize()) {
this->v = _mm512_castps256_ps512(_mm256_castps128_ps256(v0));
this->v = _mm512_insertfloatx4(this->v, v1, 1);
this->v = _mm512_insertfloatx4(this->v, v2, 2);
this->v = _mm512_insertfloatx4(this->v, v3, 3);
}
VectorVector(__m512 v) : Vector<T, len*vectorLenght>(v) { //requires(std::same_as<T, float> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize()) : Vector<T, len*vectorLenght>(v)
}
VectorVector(
float x0 = 0, float y0 = 0, float z0 = 0,
float x1 = 0, float y1 = 0, float z1 = 0,
float x2 = 0, float y2 = 0, float z2 = 0,
float x3 = 0, float y3 = 0, float z3 = 0,
float x4 = 0, float y4 = 0, float z4 = 0,
float x5 = 0
) requires(std::same_as<T, float> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize() && len == 3) :
Vector<T, len*vectorLenght>(
x0,y0,z0,
x1,y1,z1,
x2,y2,z2,
x3,y3,z3,
x4,y4,z4,
x5)
{}
VectorVector(
float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0,
float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0,
float x2 = 0, float y2 = 0, float z2 = 0, float w2 = 0,
float x3 = 0, float y3 = 0, float z3 = 0, float w3 = 0
) :
Vector<T, len*vectorLenght>(
w3, z3, y3, x3,
w2, z2, y2, x2,
w1, z1, y1, x1,
w0, z0, y0, x0)
{}
};
export Vector<float, 4> g_XMNegIdentityR0(-1.0f, 0.0f, 0.0f, 0.0f);
export Vector<float, 4> g_XMNegIdentityR1(0.0f, -1.0f, 0.0f, 0.0f);
export Vector<float, 4> g_XMNegIdentityR2(0.0f, 0.0f, -1.0f, 0.0f);
export Vector<float, 4> g_XMNegIdentityR3(0.0f, 0.0f, 0.0f, -1.0f);
export Vector<float, 4> g_XMIdentityR0(1.0f, 0.0f, 0.0f, 0.0f);
export Vector<float, 4> g_XMIdentityR1(0.0f, 1.0f, 0.0f, 0.0f);
export Vector<float, 4> g_XMIdentityR2(0.0f, 0.0f, 1.0f, 0.0f);
export Vector<float, 4> g_XMIdentityR3(0.0f, 0.0f, 0.0f, 1.0f);
export Vector<uint32_t, 4> g_XMMaskXY(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000);
export Vector<uint32_t, 4> g_XMMask3(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000);
export Vector<uint32_t, 4> g_XMMaskX(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000);
export Vector<uint32_t, 4> g_XMMaskY(0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000);
export Vector<uint32_t, 4> g_XMMaskZ(0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000);
export Vector<uint32_t, 4> g_XMMaskW( 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF);
}

5
Crafter.Math.cppm Normal file
View file

@ -0,0 +1,5 @@
export module Crafter.Math;
export import :BasicTypes;
export import :Vector;
export import :Matrix;
export import :Misc;

BIN
bin/crafter-math Executable file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
build/debug/Crafter.Math.o Normal file

Binary file not shown.

Binary file not shown.

BIN
build/debug/main_source.o Normal file

Binary file not shown.

21
main.cpp Normal file
View file

@ -0,0 +1,21 @@
#include <iostream>
#include <chrono>
#include <immintrin.h>
#include <random>
#include <format>
import Crafter.Math;
using namespace Crafter;
int main() {
Matrix<float, 4, 4, 1> matrix(
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
1, 0, 0, 1
);
Vector<float, 4> test(0, 0, 0, 1);
Vector<float, 4> result = matrix*test;
std::cout << result.ToString() << std::endl;
}

35
project.json Normal file
View file

@ -0,0 +1,35 @@
{
"name": "crafter-math",
"configurations": [
{
"name": "base",
"standard": "c++26",
"source_files": [],
"module_files": ["Crafter.Math-Vector", "Crafter.Math-BasicTypes", "Crafter.Math-MatrixAMX", "Crafter.Math-Matrix", "Crafter.Math-Misc", "Crafter.Math"],
"build_dir": "./build",
"output_dir": "./bin"
},
{
"name": "debug",
"type": "executable",
"source_files": ["main"],
"optimization_level": "3",
"extends":["base"]
},
{
"name": "lib",
"extends": ["base"],
"type":"library"
},
{
"name": "debug-lib",
"extends": ["lib"],
"optimization_level": "0"
},
{
"name": "release-lib",
"extends": ["lib"],
"optimization_level": "3"
}
]
}