1138 lines
No EOL
56 KiB
C++
Executable file
1138 lines
No EOL
56 KiB
C++
Executable file
/*
|
|
Crafter.Math
|
|
Copyright (C) 2025 Catcrafts
|
|
Catcrafts.net
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 3.0 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
module;
|
|
|
|
#include <type_traits>
|
|
#include <concepts>
|
|
#include <immintrin.h>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <cmath>
|
|
|
|
export module Crafter.Math:Vector;
|
|
|
|
import :BasicTypes;
|
|
import :Misc;
|
|
|
|
namespace Crafter {
|
|
template <typename T, uint32_t Len>
|
|
struct VectorBase {
|
|
T v[Len];
|
|
};
|
|
|
|
template <typename T>
|
|
struct VectorBase<T, 1> {
|
|
union {
|
|
T v[1];
|
|
T x;
|
|
};
|
|
};
|
|
|
|
template <typename T>
|
|
struct VectorBase<T, 2> {
|
|
union {
|
|
T v[2];
|
|
struct {
|
|
T x, y;
|
|
};
|
|
};
|
|
VectorBase<T, 2>(float x, float y): x(x), y(y) {
|
|
|
|
}
|
|
VectorBase<T, 2>() = default;
|
|
};
|
|
|
|
template <typename T>
|
|
struct VectorBase<T, 3> {
|
|
union {
|
|
T v[3];
|
|
struct {
|
|
T x, y, z;
|
|
};
|
|
};
|
|
VectorBase<T, 3>(float x, float y, float z): x(x), y(y), z(z) {
|
|
|
|
}
|
|
VectorBase<T, 3>() = default;
|
|
};
|
|
|
|
template <typename T>
|
|
struct VectorBase<T, 4> {
|
|
union {
|
|
T v[4];
|
|
struct {
|
|
T x, y, z, w;
|
|
};
|
|
};
|
|
VectorBase<T, 4>(float x, float y, float z, float w): x(x), y(y), z(z), w(w) {
|
|
|
|
}
|
|
VectorBase<T, 4>() = default;
|
|
};
|
|
|
|
export template <typename T, uint32_t Len>
|
|
class Vector : public VectorBase<T, Len> {
|
|
public:
|
|
Vector(float x, float y, float z, float w ) requires(std::same_as<T, float> && Len == 4) : VectorBase<T, Len>(x, y, z, w) {
|
|
|
|
}
|
|
Vector(float x, float y, float z) requires(std::same_as<T, float> && Len == 3) : VectorBase<T, Len>(x, y, z) {
|
|
|
|
}
|
|
Vector(float x, float y) requires(std::same_as<T, float> && Len == 2) : VectorBase<T, Len>(x, y) {
|
|
|
|
}
|
|
Vector() = default;
|
|
|
|
static Vector<T, Len> QuaternionRotationRollPitchYaw(float Pitch, float Yaw, float Roll) requires(Len == 4) {
|
|
const float halfpitch = Pitch * 0.5f;
|
|
float cp = cosf(halfpitch);
|
|
float sp = sinf(halfpitch);
|
|
|
|
const float halfyaw = Yaw * 0.5f;
|
|
float cy = cosf(halfyaw);
|
|
float sy = sinf(halfyaw);
|
|
|
|
const float halfroll = Roll * 0.5f;
|
|
float cr = cosf(halfroll);
|
|
float sr = sinf(halfroll);
|
|
|
|
return Vector<T, Len>(
|
|
cr * sp * cy + sr * cp * sy,
|
|
cr * cp * sy - sr * sp * cy,
|
|
sr * cp * cy - cr * sp * sy,
|
|
cr * cp * cy + sr * sp * sy
|
|
);
|
|
}
|
|
|
|
template <uint32_t Blen>
|
|
Vector<T, Len> operator+(Vector<T, Blen> b){
|
|
Vector<T, Len> resultVector;
|
|
for(uint32_t i = 0; i < std::min(Len, Blen); i++) {
|
|
resultVector.v[i] = this->v[i]+b.v[i];
|
|
}
|
|
return resultVector;
|
|
}
|
|
template <uint32_t Blen>
|
|
Vector<T, Len> operator-(Vector<T, Blen> b){
|
|
Vector<T, Len> resultVector;
|
|
for(uint32_t i = 0; i < std::min(Len, Blen); i++) {
|
|
resultVector.v[i] = this->v[i]-b.v[i];
|
|
}
|
|
return resultVector;
|
|
}
|
|
Vector<T, Len> operator-(){
|
|
Vector<T, Len> resultVector;
|
|
for(uint32_t i = 0; i < Len; i++) {
|
|
resultVector.v[i] = -this->v[i];
|
|
}
|
|
return resultVector;
|
|
}
|
|
template <uint32_t Blen>
|
|
Vector<T, Len> operator*(Vector<T, Blen> b){
|
|
Vector<T, Len> resultVector;
|
|
for(uint32_t i = 0; i < std::min(Len, Blen); i++) {
|
|
resultVector.v[i] = this->v[i]*b.v[i];
|
|
}
|
|
return resultVector;
|
|
}
|
|
template <uint32_t Blen>
|
|
Vector<T, Len> operator/(Vector<T, Blen> b){
|
|
Vector<T, Len> resultVector;
|
|
for(uint32_t i = 0; i < std::min(Len, Blen); i++) {
|
|
resultVector.v[i] = this->v[i]/b.v[i];
|
|
}
|
|
return resultVector;
|
|
}
|
|
|
|
Vector<T, Len> Rotate(Vector<T, 4> rotation) requires(Len == 3) {
|
|
Vector<T, 4> q = rotation.QuaternionConjugate();
|
|
Vector<T, 4> result = q.QuaternionMultiply(Vector<T, 4>(this->x, this->y, this->z, 0));
|
|
return Vector<T, Len>(result.x, result.y, result.z);
|
|
}
|
|
|
|
Vector<T, Len> Normalize() requires(Len == 3) {
|
|
float fLength = Length();
|
|
|
|
// Prevent divide by zero
|
|
if (fLength > 0)
|
|
{
|
|
fLength = 1.0f / fLength;
|
|
}
|
|
|
|
return Vector<T, Len>(this->v[0] * fLength, this->v[1] * fLength, this->v[2] * fLength);
|
|
}
|
|
|
|
Vector<T, Len> Normalize() requires(Len == 4) {
|
|
float fLength = Length();
|
|
|
|
// Prevent divide by zero
|
|
if (fLength > 0)
|
|
{
|
|
fLength = 1.0f / fLength;
|
|
}
|
|
|
|
return Vector<T, Len>(this->v[0] * fLength, this->v[1] * fLength, this->v[2] * fLength, this->v[3] * fLength);
|
|
}
|
|
|
|
float Length()
|
|
{
|
|
float Result = LengthSq();
|
|
return sqrtf(Result);
|
|
}
|
|
|
|
Vector<T, Len> ReciprocalLength() requires(Len == 3) {
|
|
Vector<T, Len> Result = LengthSq();
|
|
Result = ReciprocalSqrt(Result);
|
|
return Result;
|
|
}
|
|
|
|
Vector<T, Len> ReciprocalSqrt() requires(Len == 3)
|
|
{
|
|
return Vector<T, Len>(
|
|
1.f / sqrtf(this->v[0]),
|
|
1.f / sqrtf(this->v[1]),
|
|
1.f / sqrtf(this->v[2])
|
|
);
|
|
}
|
|
|
|
float LengthSq()
|
|
{
|
|
return Dot(*this);
|
|
}
|
|
|
|
float Dot(Vector<T, Len> v2) requires(Len == 3)
|
|
{
|
|
return this->v[0] * v2.v[0] + this->v[1] * v2.v[1] + this->v[2] * v2.v[2];
|
|
}
|
|
|
|
float Dot(Vector<T, Len> v2) requires(Len == 4)
|
|
{
|
|
return this->v[0] * v2.v[0] + this->v[1] * v2.v[1] + this->v[2] * v2.v[2] + this->v[3] * v2.v[3];
|
|
}
|
|
|
|
Vector<T, Len> Cross(Vector<T, Len> v2) requires(Len == 3) {
|
|
return Vector<T, Len>(
|
|
(this->v[1] * v2.v[2]) - (this->v[2] * v2.v[1]),
|
|
(this->v[2] * v2.v[0]) - (this->v[0] * v2.v[2]),
|
|
(this->v[0] * v2.v[1]) - (this->v[1] * v2.v[0])
|
|
);
|
|
};
|
|
|
|
Vector<T, Len> QuaternionConjugate() requires(Len == 4) {
|
|
return Vector<T, Len>(
|
|
-this->x,
|
|
-this->y,
|
|
-this->z,
|
|
this->w
|
|
);
|
|
}
|
|
|
|
Vector<T, Len> QuaternionMultiply(Vector<T, Len> q2) requires(Len == 4) {
|
|
return Vector<T, Len>(
|
|
(q2.v[3] * this->v[0]) + (q2.v[0] * this->v[3]) + (q2.v[1] * this->v[2]) - (q2.v[2] * this->v[1]),
|
|
(q2.v[3] * this->v[1]) - (q2.v[0] * this->v[2]) + (q2.v[1] * this->v[3]) + (q2.v[2] * this->v[0]),
|
|
(q2.v[3] * this->v[2]) + (q2.v[0] * this->v[1]) - (q2.v[1] * this->v[0]) + (q2.v[2] * this->v[3]),
|
|
(q2.v[3] * this->v[3]) - (q2.v[0] * this->v[0]) - (q2.v[1] * this->v[1]) - (q2.v[2] * this->v[2])
|
|
);
|
|
}
|
|
|
|
Vector<T, 4> QuaternionRotationAxis(float angle) requires(Len == 3) {
|
|
Vector<T, 3> Normal = Normalize();
|
|
return Normal.QuaternionRotationNormal(angle);
|
|
}
|
|
|
|
Vector<T, 4> QuaternionRotationNormal(float angle) requires(Len == 3) {
|
|
Vector<T, 4> N = Vector<T, 4>(this->x, this->y, this->z, 1);
|
|
|
|
float SinV, CosV;
|
|
XMScalarSinCos(&SinV, &CosV, 0.5f * angle);
|
|
|
|
Vector<T, 4> Scale = Vector<T, 4>(SinV, SinV, SinV, CosV);
|
|
return N * Scale;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// typedef
|
|
// typename std::conditional<(sizeof(T)* len > 32 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t> || std::same_as<T, uint64_t> || std::same_as<T, uint32_t> || std::same_as<T, uint16_t> || std::same_as<T, uint8_t>)), __m512i,
|
|
// typename std::conditional<(sizeof(T)* len > 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t> || std::same_as<T, uint64_t> || std::same_as<T, uint32_t> || std::same_as<T, uint16_t> || std::same_as<T, uint8_t>)), __m256i,
|
|
// typename std::conditional<(sizeof(T)* len <= 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t> || std::same_as<T, uint64_t> || std::same_as<T, uint32_t> || std::same_as<T, uint16_t> || std::same_as<T, uint8_t>)), __m128i,
|
|
// typename std::conditional<(len > 16 && std::same_as<T, __fp16>), __m512h,
|
|
// typename std::conditional<(len > 8 && std::same_as<T, __fp16>), __m256h,
|
|
// typename std::conditional<(len <= 8 && std::same_as<T, __fp16>), __m128h,
|
|
// typename std::conditional<(len > 8 && std::same_as<T, float>), __m512,
|
|
// typename std::conditional<(len > 4 && std::same_as<T, float>), __m256,
|
|
// typename std::conditional<(len <= 4 && std::same_as<T, float>), __m128,
|
|
// typename std::conditional<(len > 4 && std::same_as<T, double>), __m512d,
|
|
// typename std::conditional<(len > 2 && std::same_as<T, double>), __m256d, __m128d
|
|
// >::type>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type vector_type;
|
|
|
|
|
|
// public:
|
|
// template <typename Datatype, typename Vectortype>
|
|
// static consteval uint8_t GetVectorAlignedSize() {
|
|
// if constexpr(std::same_as<Datatype, int8_t> && std::same_as<Vectortype, __m512i>) {
|
|
// return 64;
|
|
// } else if constexpr(std::same_as<Vectortype, __m512h> || (std::same_as<T, int8_t> && std::same_as<Vectortype, __m256i>) || (std::same_as<T, int16_t> && std::same_as<Vectortype, __m512i>)) {
|
|
// return 32;
|
|
// } else if constexpr(std::same_as<Vectortype, __m256h> || std::same_as<Vectortype, __m512> || (std::same_as<Datatype, int8_t> && std::same_as<Vectortype, __m128i>) || (std::same_as<Datatype, int16_t> && std::same_as<Vectortype, __m256i>) || (std::same_as<T, int32_t> && std::same_as<Vectortype, __m512i>)) {
|
|
// return 16;
|
|
// } else if constexpr(std::same_as<Vectortype, __m128h> || std::same_as<Vectortype, __m256> || std::same_as<Vectortype, __m512d> || (std::same_as<Datatype, int16_t> && std::same_as<Vectortype, __m128i>) || (std::same_as<T, int32_t> && std::same_as<Vectortype, __m256i>) || (std::same_as<Datatype, int64_t> && std::same_as<Vectortype, __m512i>)) {
|
|
// return 8;
|
|
// } else if constexpr(std::same_as<Vectortype, __m128> || std::same_as<Vectortype, __m256d> || (std::same_as<Datatype, int32_t> && std::same_as<Vectortype, __m128>) || (std::same_as<Datatype, int64_t> && std::same_as<Vectortype, __m256i>)) {
|
|
// return 4;
|
|
// } else if constexpr(std::same_as<Vectortype, __m128d> || (std::same_as<Datatype, int64_t> && std::same_as<Vectortype, __m128>)) {
|
|
// return 2;
|
|
// } else{
|
|
// throw std::invalid_argument("");
|
|
// }
|
|
// }
|
|
// vector_type v;
|
|
|
|
// Vector() {};
|
|
// Vector(__m128h v) requires(std::same_as<vector_type, __m128h>) : v(v) { }
|
|
// Vector(__m128 v) requires(std::same_as<vector_type, __m128>) : v(v) { }
|
|
// Vector(__m128d v) requires(std::same_as<vector_type, __m128d>) : v(v) { }
|
|
// Vector(__m128i v) requires(std::same_as<vector_type, __m128i>) : v(v) { }
|
|
|
|
// Vector(__m256h v) requires(std::same_as<vector_type, __m256h>) : v(v) { }
|
|
// Vector(__m256 v) requires(std::same_as<vector_type, __m256>) : v(v) { }
|
|
// Vector(__m256d v) requires(std::same_as<vector_type, __m256d>) : v(v) { }
|
|
// Vector(__m256i v) requires(std::same_as<vector_type, __m256i>) : v(v) { }
|
|
|
|
// Vector(__m512h v) requires(std::same_as<vector_type, __m512h>) : v(v) { }
|
|
// Vector(__m512 v) requires(std::same_as<vector_type, __m512>) : v(v) { }
|
|
// Vector(__m512d v) requires(std::same_as<vector_type, __m512d>) : v(v) { }
|
|
// Vector(__m512i v) requires(std::same_as<vector_type, __m512i>) : v(v) { }
|
|
|
|
// template <uint32_t blen>
|
|
// void operator+=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// this->v = (*this+b).v;
|
|
// }
|
|
// template <uint32_t blen>
|
|
// void operator-=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// this->v = (*this-b).v;
|
|
// }
|
|
// template <uint32_t blen>
|
|
// void operator*=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// this->v = (*this*b).v;
|
|
// }
|
|
// template <uint32_t blen>
|
|
// void operator/=(Vector<T, blen> b) requires(Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// this->v = (*this/b).v;
|
|
// }
|
|
|
|
// std::string ToString() const {
|
|
// std::ostringstream ss;
|
|
// ss << "{ ";
|
|
// T store[GetVectorAlignedSize<T, vector_type>()];
|
|
// Store(store);
|
|
// for(uint8_t i = 0; i < len; i++) {
|
|
// ss << std::format("{}", store[i]);
|
|
// if(i+1 < len) {
|
|
// ss << ", ";
|
|
// }
|
|
// }
|
|
// ss << " }";
|
|
// return std::string(ss.str());
|
|
// }
|
|
// #pragma region 128
|
|
// Vector(
|
|
// const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0,
|
|
// const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0
|
|
// ) requires(std::same_as<T, __fp16> && std::same_as<vector_type, __m128h>) {
|
|
// __fp16 temp[]{ x0,y0,z0,w0,x1,y1,z1,w1,};
|
|
// v = _mm_load_ph(temp);
|
|
// }
|
|
|
|
// Vector(float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0) requires(std::same_as<T, float>&& std::same_as<vector_type, __m128>) {
|
|
// v = _mm_set_ps(w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(double x0 = 0, double y0 = 0) requires(std::same_as<T, double>&& std::same_as<vector_type, __m128d>) {
|
|
// v = _mm_set_pd(y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0,
|
|
// int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0,
|
|
// int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0,
|
|
// int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0
|
|
// ) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi8(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0,
|
|
// int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0
|
|
// ) requires(std::same_as<T, int16_t>&& std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi16(w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0) requires(std::same_as<T, int32_t>&& std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi32(w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(int64_t x0 = 0, int64_t y0 = 0) requires(std::same_as<T, int64_t>&& std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi64x(y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// uint8_t x0 = 0, uint8_t y0 = 0, uint8_t z0 = 0, uint8_t w0 = 0,
|
|
// uint8_t x1 = 0, uint8_t y1 = 0, uint8_t z1 = 0, uint8_t w1 = 0,
|
|
// uint8_t x2 = 0, uint8_t y2 = 0, uint8_t z2 = 0, uint8_t w2 = 0,
|
|
// uint8_t x3 = 0, uint8_t y3 = 0, uint8_t z3 = 0, uint8_t w3 = 0
|
|
// ) requires(std::same_as<T, uint8_t> && std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi8(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// uint16_t x0 = 0, uint16_t y0 = 0, uint16_t z0 = 0, uint16_t w0 = 0,
|
|
// uint16_t x1 = 0, uint16_t y1 = 0, uint16_t z1 = 0, uint16_t w1 = 0
|
|
// ) requires(std::same_as<T, uint16_t>&& std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi16(w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(uint32_t x0 = 0, uint32_t y0 = 0, uint32_t z0 = 0, uint32_t w0 = 0) requires(std::same_as<T, uint32_t>&& std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi32(w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(uint64_t x0 = 0, uint64_t y0 = 0) requires(std::same_as<T, uint64_t>&& std::same_as<vector_type, __m128i>) {
|
|
// v = _mm_set_epi64x(y0, x0);
|
|
// }
|
|
|
|
// static Vector<T, len> Zero() requires(std::same_as<vector_type, __m128>) {
|
|
// return Vector<T, len>(_mm_setzero_ps());
|
|
// }
|
|
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m128h>) {
|
|
// _mm_storeu_ph(reinterpret_cast<void*>(data), reinterpret_cast<__m128h>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m128>) {
|
|
// _mm_storeu_ps(reinterpret_cast<float*>(data), reinterpret_cast<__m128>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m128d>) {
|
|
// _mm_storeu_pd(data, reinterpret_cast<__m128d>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i>) {
|
|
// _mm_storeu_epi8(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m128i>) {
|
|
// _mm_storeu_epi16(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i>) {
|
|
// _mm_storeu_epi32(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m128i>) {
|
|
// _mm_storeu_epi64(reinterpret_cast<void*>(data), reinterpret_cast<__m128i>(v));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_mul_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m128h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_div_ph(reinterpret_cast<__m128h>(v), reinterpret_cast<__m128h>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_mul_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m128> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_div_ps(reinterpret_cast<__m128>(v), reinterpret_cast<__m128>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_mul_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m128d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_div_pd(reinterpret_cast<__m128d>(v), reinterpret_cast<__m128d>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_epi8(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_epi8(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi8(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi8(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_epi16(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_epi16(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi16(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi16(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_mul_epi32(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, i132> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi32(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_add_epi64(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m128i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm_sub_epi64(reinterpret_cast<__m128i>(v), reinterpret_cast<__m128i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi64(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi64(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> AddMask() {
|
|
|
|
// }
|
|
// #pragma endregion
|
|
// #pragma region 256
|
|
// Vector(
|
|
// const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0,
|
|
// const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0,
|
|
// const __fp16& x2 = 0, const __fp16& y2 = 0, const __fp16& z2 = 0, const __fp16& w2 = 0,
|
|
// const __fp16& x3 = 0, const __fp16& y3 = 0, const __fp16& z3 = 0, const __fp16& w3 = 0
|
|
// ) requires(std::same_as<T, __fp16>&& std::same_as<vector_type, __m256h>) {
|
|
// __fp16 temp[]{ w0,z0,y0,x0,w1,z1,y1,x1,w2,z2,y2,x2,w3,z3,y3,x3 };
|
|
// v = _mm256_load_ph(temp);
|
|
// }
|
|
|
|
// Vector(
|
|
// float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0,
|
|
// float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0
|
|
// ) requires(std::same_as<T, float>&& std::same_as<vector_type, __m256>) {
|
|
// v = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(double x0 = 0, double y0 = 0, double z0 = 0, double w0 = 0) requires(std::same_as<T, double>&& std::same_as<vector_type, __m256d>) {
|
|
// v = _mm256_set_pd(w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0,
|
|
// int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0,
|
|
// int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0,
|
|
// int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0,
|
|
// int8_t x4 = 0, int8_t y4 = 0, int8_t z4 = 0, int8_t w4 = 0,
|
|
// int8_t x5 = 0, int8_t y5 = 0, int8_t z5 = 0, int8_t w5 = 0,
|
|
// int8_t x6 = 0, int8_t y6 = 0, int8_t z6 = 0, int8_t w6 = 0,
|
|
// int8_t x7 = 0, int8_t y7 = 0, int8_t z7 = 0, int8_t w7 = 0
|
|
// ) requires(std::same_as<T, int8_t>&& std::same_as<vector_type, __m256i>) {
|
|
// v = _mm256_set_epi8(w7, z7, y7, x7, w6, z6, y6, x6, w5, z5, y5, x5, w4, z4, y4, x4, w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0,
|
|
// int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0,
|
|
// int16_t x2 = 0, int16_t y2 = 0, int16_t z2 = 0, int16_t w2 = 0,
|
|
// int16_t x3 = 0, int16_t y3 = 0, int16_t z3 = 0, int16_t w3 = 0
|
|
// ) requires(std::same_as<T, int16_t>&& std::same_as<vector_type, __m256i>) {
|
|
// v = _mm256_set_epi16(w3, z3, y3, x3, w2, z2, y2, x2, w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(
|
|
// int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0,
|
|
// int32_t x1 = 0, int32_t y1 = 0, int32_t z1 = 0, int32_t w1 = 0
|
|
// ) requires(std::same_as<T, int32_t>&& std::same_as<vector_type, __m256i>) {
|
|
// v = _mm256_set_epi32(w1, z1, y1, x1, w0, z0, y0, x0);
|
|
// }
|
|
|
|
// Vector(int64_t x0 = 0, int64_t y0 = 0, int64_t z0 = 0, int64_t w0 = 0) requires(std::same_as<T, int64_t>&& std::same_as<vector_type, __m256>) {
|
|
// v = _mm256_set_epi64x(w0, z0, y0, x0);
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_mul_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m256h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_div_ph(reinterpret_cast<__m256h>(v), reinterpret_cast<__m256h>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_mul_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m256> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_div_ps(reinterpret_cast<__m256>(v), reinterpret_cast<__m256>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_mul_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m256d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_div_pd(reinterpret_cast<__m256d>(v), reinterpret_cast<__m256d>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_epi8(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_epi8(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi8(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi8(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_epi16(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_epi16(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi16(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi16(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_mul_epi32(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, i132> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi32(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_add_epi64(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m256i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm256_sub_epi64(reinterpret_cast<__m256i>(v), reinterpret_cast<__m256i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi64(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi64(v, bv));
|
|
// // }
|
|
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m256h>) {
|
|
// _mm256_storeu_ph(reinterpret_cast<void*>(data), reinterpret_cast<__m256h>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m256>) {
|
|
// _mm256_storeu_ps(data, reinterpret_cast<__m256>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m256d>) {
|
|
// _mm256_storeu_pd(data, reinterpret_cast<__m256d>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m256i>) {
|
|
// _mm256_storeu_epi8(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m256i>) {
|
|
// _mm256_storeu_epi16(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m256i>) {
|
|
// _mm256_storeu_epi32(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m256i>) {
|
|
// _mm256_storeu_epi64(reinterpret_cast<void*>(data), reinterpret_cast<__m256i>(v));
|
|
// }
|
|
// #pragma endregion
|
|
// #pragma region 512
|
|
// Vector(
|
|
// const __fp16& x0 = 0, const __fp16& y0 = 0, const __fp16& z0 = 0, const __fp16& w0 = 0,
|
|
// const __fp16& x1 = 0, const __fp16& y1 = 0, const __fp16& z1 = 0, const __fp16& w1 = 0,
|
|
// const __fp16& x2 = 0, const __fp16& y2 = 0, const __fp16& z2 = 0, const __fp16& w2 = 0,
|
|
// const __fp16& x3 = 0, const __fp16& y3 = 0, const __fp16& z3 = 0, const __fp16& w3 = 0,
|
|
// const __fp16& x4 = 0, const __fp16& y4 = 0, const __fp16& z4 = 0, const __fp16& w4 = 0,
|
|
// const __fp16& x5 = 0, const __fp16& y5 = 0, const __fp16& z5 = 0, const __fp16& w5 = 0,
|
|
// const __fp16& x6 = 0, const __fp16& y6 = 0, const __fp16& z6 = 0, const __fp16& w6 = 0,
|
|
// const __fp16& x7 = 0, const __fp16& y7 = 0, const __fp16& z7 = 0, const __fp16& w7 = 0
|
|
// ) requires(std::same_as<T, __fp16>&& std::same_as<vector_type, __m512h>) {
|
|
// __fp16 temp[]{ w0,z0,y0,x0, w1,z1,y1,x1, w2,z2,y2,x2,w3, z3,y3,x3, w4,z4,y4,x4, w5,z5,y5,x5, w6,z6,y6,x6, w7,z7,y7,x7 };
|
|
// v = _mm512_load_ph(temp);
|
|
// }
|
|
|
|
// Vector(
|
|
// float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0,
|
|
// float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0,
|
|
// float x2 = 0, float y2 = 0, float z2 = 0, float w2 = 0,
|
|
// float x3 = 0, float y3 = 0, float z3 = 0, float w3 = 0
|
|
// ) requires(std::same_as<T, float>&& std::same_as<vector_type, __m512>) {
|
|
// v = _mm512_set_ps(
|
|
// w3, z3, y3, x3,
|
|
// w2, z2, y2, x2,
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0
|
|
// );
|
|
// }
|
|
|
|
// Vector(
|
|
// double x0 = 0, double y0 = 0, double z0 = 0, double w0 = 0,
|
|
// double x1 = 0, double y1 = 0, double z1 = 0, double w1 = 0
|
|
// ) requires(std::same_as<T, double>&& std::same_as<vector_type, __m512d>) {
|
|
// v = _mm512_set_pd(
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0
|
|
// );
|
|
// }
|
|
|
|
// Vector(
|
|
// int8_t x0 = 0, int8_t y0 = 0, int8_t z0 = 0, int8_t w0 = 0,
|
|
// int8_t x1 = 0, int8_t y1 = 0, int8_t z1 = 0, int8_t w1 = 0,
|
|
// int8_t x2 = 0, int8_t y2 = 0, int8_t z2 = 0, int8_t w2 = 0,
|
|
// int8_t x3 = 0, int8_t y3 = 0, int8_t z3 = 0, int8_t w3 = 0,
|
|
// int8_t x4 = 0, int8_t y4 = 0, int8_t z4 = 0, int8_t w4 = 0,
|
|
// int8_t x5 = 0, int8_t y5 = 0, int8_t z5 = 0, int8_t w5 = 0,
|
|
// int8_t x6 = 0, int8_t y6 = 0, int8_t z6 = 0, int8_t w6 = 0,
|
|
// int8_t x7 = 0, int8_t y7 = 0, int8_t z7 = 0, int8_t w7 = 0,
|
|
// int8_t x8 = 0, int8_t y8 = 0, int8_t z8 = 0, int8_t w8 = 0,
|
|
// int8_t x9 = 0, int8_t y9 = 0, int8_t z9 = 0, int8_t w9 = 0,
|
|
// int8_t x10 = 0, int8_t y10 = 0, int8_t z10 = 0, int8_t w10 = 0,
|
|
// int8_t x11 = 0, int8_t y11 = 0, int8_t z11 = 0, int8_t w11 = 0,
|
|
// int8_t x12 = 0, int8_t y12 = 0, int8_t z12 = 0, int8_t w12 = 0,
|
|
// int8_t x13 = 0, int8_t y13 = 0, int8_t z13 = 0, int8_t w13 = 0,
|
|
// int8_t x14 = 0, int8_t y14 = 0, int8_t z14 = 0, int8_t w14 = 0,
|
|
// int8_t x15 = 0, int8_t y15 = 0, int8_t z15 = 0, int8_t w15 = 0
|
|
// ) requires(std::same_as<T, int8_t>&& std::same_as<vector_type, __m512i>) {
|
|
// v = _mm512_set_epi8(
|
|
// w15, z15, y15, x15,
|
|
// w14, z14, y14, x14,
|
|
// w13, z13, y13, x13,
|
|
// w12, z12, y12, x12,
|
|
// w11, z11, y11, x11,
|
|
// w10, z10, y10, x10,
|
|
// w9, z9, y9, x9,
|
|
// w8, z8, y8, x8,
|
|
// w7, z7, y7, x7,
|
|
// w6, z6, y6, x6,
|
|
// w5, z5, y5, x5,
|
|
// w4, z4, y4, x4,
|
|
// w3, z3, y3, x3,
|
|
// w2, z2, y2, x2,
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0
|
|
// );
|
|
// }
|
|
|
|
// Vector(
|
|
// int16_t x0 = 0, int16_t y0 = 0, int16_t z0 = 0, int16_t w0 = 0,
|
|
// int16_t x1 = 0, int16_t y1 = 0, int16_t z1 = 0, int16_t w1 = 0,
|
|
// int16_t x2 = 0, int16_t y2 = 0, int16_t z2 = 0, int16_t w2 = 0,
|
|
// int16_t x3 = 0, int16_t y3 = 0, int16_t z3 = 0, int16_t w3 = 0,
|
|
// int16_t x4 = 0, int16_t y4 = 0, int16_t z4 = 0, int16_t w4 = 0,
|
|
// int16_t x5 = 0, int16_t y5 = 0, int16_t z5 = 0, int16_t w5 = 0,
|
|
// int16_t x6 = 0, int16_t y6 = 0, int16_t z6 = 0, int16_t w6 = 0,
|
|
// int16_t x7 = 0, int16_t y7 = 0, int16_t z7 = 0, int16_t w7 = 0
|
|
// ) requires(std::same_as<T, int16_t>&& std::same_as<vector_type, __m512i>) {
|
|
// v = _mm512_set_epi16(
|
|
// w7, z7, y7, x7,
|
|
// w6, z6, y6, x6,
|
|
// w5, z5, y5, x5,
|
|
// w4, z4, y4, x4,
|
|
// w3, z3, y3, x3,
|
|
// w2, z2, y2, x2,
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0
|
|
// );
|
|
// }
|
|
|
|
// Vector(
|
|
// int32_t x0 = 0, int32_t y0 = 0, int32_t z0 = 0, int32_t w0 = 0,
|
|
// int32_t x1 = 0, int32_t y1 = 0, int32_t z1 = 0, int32_t w1 = 0,
|
|
// int32_t x2 = 0, int32_t y2 = 0, int32_t z2 = 0, int32_t w2 = 0,
|
|
// int32_t x3 = 0, int32_t y3 = 0, int32_t z3 = 0, int32_t w3 = 0
|
|
// ) requires(std::same_as<T, int32_t>&& std::same_as<vector_type, __m512i>) {
|
|
// v = _mm512_set_epi32(
|
|
// w3, z3, y3, x3,
|
|
// w2, z2, y2, x2,
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0
|
|
// );
|
|
// }
|
|
|
|
// Vector(
|
|
// int64_t x0 = 0, int64_t y0 = 0, int64_t z0 = 0, int64_t w0 = 0,
|
|
// int64_t x1 = 0, int64_t y1 = 0, int64_t z1 = 0, int64_t w1 = 0
|
|
// ) requires(std::same_as<T, int64_t>&& std::same_as<vector_type, __m512i>) {
|
|
// v = _mm512_set_epi64(
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0
|
|
// );
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_mul_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m512h> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_div_ph(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_mul_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m512> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_div_ps(reinterpret_cast<__m512>(v), reinterpret_cast<__m512>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_mul_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<vector_type, __m512d> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_div_pd(reinterpret_cast<__m512d>(v), reinterpret_cast<__m512d>(b.v)));
|
|
// }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_epi8(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_epi8(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi8(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi8(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_epi16(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_epi16(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi16(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi16(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_mul_epi32(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, i132> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi32(v, bv));
|
|
// // }
|
|
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator+(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_add_epi64(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// template <uint32_t blen>
|
|
// Vector<T, len> operator-(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i> && Vector<T, blen>::GetVectorAlignedSize() == GetVectorAlignedSize()) {
|
|
// return Vector<T, len>(_mm512_sub_epi64(reinterpret_cast<__m512i>(v), reinterpret_cast<__m512i>(b.v)));
|
|
// }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator*(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_mul_epi64(v, bv));
|
|
// // }
|
|
// // template <uint32_t blen>
|
|
// // Vector<T, len> operator/(Vector<T, blen> b) requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// // __m512i v = this->v;
|
|
// // __m512i bv = b.v;
|
|
// // return Vector<T, len>(_mm512_div_epi64(v, bv));
|
|
// // }
|
|
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m512h>) {
|
|
// _mm512_storeu_ph(reinterpret_cast<void*>(data), reinterpret_cast<__m512h>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m512>) {
|
|
// _mm512_storeu_ps(reinterpret_cast<void*>(data), reinterpret_cast<__m512>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<vector_type, __m512d>) {
|
|
// _mm512_storeu_pd(data, reinterpret_cast<__m512d>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int8_t> && std::same_as<vector_type, __m512i>) {
|
|
// _mm512_storeu_epi8(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int16_t> && std::same_as<vector_type, __m512i>) {
|
|
// _mm512_storeu_epi16(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int32_t> && std::same_as<vector_type, __m512i>) {
|
|
// _mm512_storeu_epi32(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
|
|
// }
|
|
// void Store(T* data) const requires(std::same_as<T, int64_t> && std::same_as<vector_type, __m512i>) {
|
|
// _mm512_storeu_epi64(reinterpret_cast<void*>(data), reinterpret_cast<__m512i>(v));
|
|
// }
|
|
// #pragma endregion
|
|
// };
|
|
|
|
// export template <typename T, uint32_t len, uint32_t vectorLenght>
|
|
// class VectorVector : public Vector<T, len*vectorLenght> {
|
|
// public:
|
|
// VectorVector(__m128h v0, __m128h v1) requires(std::same_as<T, __fp16> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize()) {
|
|
// this->v = _mm256_castps128_ps256(v0);
|
|
// this->v = _mm256_insertf128_ps(this->v,v1,1);
|
|
// }
|
|
// VectorVector(__m128 v0, __m128 v1, __m128 v2, __m128 v3) requires(std::same_as<T, float> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize()) {
|
|
// this->v = _mm512_castps256_ps512(_mm256_castps128_ps256(v0));
|
|
// this->v = _mm512_insertfloatx4(this->v, v1, 1);
|
|
// this->v = _mm512_insertfloatx4(this->v, v2, 2);
|
|
// this->v = _mm512_insertfloatx4(this->v, v3, 3);
|
|
// }
|
|
// VectorVector(__m512 v) : Vector<T, len*vectorLenght>(v) { //requires(std::same_as<T, float> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize()) : Vector<T, len*vectorLenght>(v)
|
|
|
|
// }
|
|
// VectorVector(
|
|
// float x0 = 0, float y0 = 0, float z0 = 0,
|
|
// float x1 = 0, float y1 = 0, float z1 = 0,
|
|
// float x2 = 0, float y2 = 0, float z2 = 0,
|
|
// float x3 = 0, float y3 = 0, float z3 = 0,
|
|
// float x4 = 0, float y4 = 0, float z4 = 0,
|
|
// float x5 = 0
|
|
// ) requires(std::same_as<T, float> && vectorLenght*Vector<T, len>::GetVectorAlignedSize() == Vector<T, len*vectorLenght>::GetVectorAlignedSize() && len == 3) :
|
|
// Vector<T, len*vectorLenght>(
|
|
// x0,y0,z0,
|
|
// x1,y1,z1,
|
|
// x2,y2,z2,
|
|
// x3,y3,z3,
|
|
// x4,y4,z4,
|
|
// x5)
|
|
// {}
|
|
|
|
// VectorVector(
|
|
// float x0 = 0, float y0 = 0, float z0 = 0, float w0 = 0,
|
|
// float x1 = 0, float y1 = 0, float z1 = 0, float w1 = 0,
|
|
// float x2 = 0, float y2 = 0, float z2 = 0, float w2 = 0,
|
|
// float x3 = 0, float y3 = 0, float z3 = 0, float w3 = 0
|
|
// ) :
|
|
// Vector<T, len*vectorLenght>(
|
|
// w3, z3, y3, x3,
|
|
// w2, z2, y2, x2,
|
|
// w1, z1, y1, x1,
|
|
// w0, z0, y0, x0)
|
|
// {}
|
|
};
|
|
}
|
|
|
|
|
|
template <>
|
|
struct std::formatter<Crafter::Vector<float, 3>> : std::formatter<std::string> {
|
|
auto format(const Crafter::Vector<float, 3>& obj, format_context& ctx) const {
|
|
return std::formatter<std::string>::format(std::format("{{{}, {}, {}}}",
|
|
obj.x, obj.y, obj.z
|
|
), ctx);
|
|
}
|
|
}; |