278 lines
No EOL
12 KiB
C++
Executable file
278 lines
No EOL
12 KiB
C++
Executable file
module;
|
|
|
|
#include <type_traits>
|
|
#include <concepts>
|
|
#include <immintrin.h>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
|
|
export module Crafter.Math:Matrix;
|
|
|
|
import :BasicTypes;
|
|
import :Vector;
|
|
import :Misc;
|
|
|
|
namespace Crafter {
|
|
export template <typename T, uint32_t collumSize, uint32_t rowSize, uint32_t repeats>
|
|
class Matrix {
|
|
public:
|
|
typedef
|
|
typename std::conditional<(sizeof(T)* collumSize*repeats > 32 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t>)), __m512i,
|
|
typename std::conditional<(sizeof(T)* collumSize*repeats > 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t>)), __m256i,
|
|
typename std::conditional<(sizeof(T)* collumSize*repeats <= 16 && (std::same_as<T, int64_t> || std::same_as<T, int32_t> || std::same_as<T, int16_t> || std::same_as<T, int8_t>)), __m128i,
|
|
typename std::conditional<(collumSize*repeats > 16 && std::same_as<T, __fp16>), __m512h,
|
|
typename std::conditional<(collumSize*repeats > 8 && std::same_as<T, __fp16>), __m256h,
|
|
typename std::conditional<(collumSize*repeats <= 8 && std::same_as<T, __fp16>), __m128h,
|
|
typename std::conditional<(collumSize*repeats > 8 && std::same_as<T, float>), __m512,
|
|
typename std::conditional<(collumSize*repeats > 4 && std::same_as<T, float>), __m256,
|
|
typename std::conditional<(collumSize*repeats <= 4 && std::same_as<T, float>), __m128,
|
|
typename std::conditional<(collumSize*repeats > 4 && std::same_as<T, double>), __m512d,
|
|
typename std::conditional<(collumSize*repeats > 2 && std::same_as<T, double>), __m256d, __m128d
|
|
>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type>::type collum_type;
|
|
|
|
collum_type c[rowSize];
|
|
|
|
Matrix() {
|
|
|
|
}
|
|
|
|
Matrix(__m128 c0, __m128 c1, __m128 c2, __m128 c3) requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as<T, float>) {
|
|
c[0] = c0;
|
|
c[1] = c1;
|
|
c[2] = c2;
|
|
c[3] = c3;
|
|
}
|
|
|
|
Matrix(
|
|
float x0, float y0, float z0, float w0,
|
|
float x1, float y1, float z1, float w1,
|
|
float x2, float y2, float z2, float w2,
|
|
float x3, float y3, float z3, float w3
|
|
) requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as<T, float>) {
|
|
c[0] = _mm_set_ps(x3, x2, x1, x0);
|
|
c[1] = _mm_set_ps(y3, y2, y1, y0);
|
|
c[2] = _mm_set_ps(z3, z2, z1, z0);
|
|
c[3] = _mm_set_ps(w3, w2, w1, w0);
|
|
}
|
|
|
|
Vector<T, rowSize> operator*(Vector<T, 4> b) const requires(collumSize == 4 && rowSize == 4 && repeats == 1 && std::same_as<T, float>) {
|
|
__m128 result = _mm_mul_ps(reinterpret_cast<__m128>(c[0]), reinterpret_cast<__m128>(b.v));
|
|
result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[1]), reinterpret_cast<__m128>(b.v), result);
|
|
result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[2]), reinterpret_cast<__m128>(b.v), result);
|
|
result = _mm_fmadd_ps(reinterpret_cast<__m128>(c[3]), reinterpret_cast<__m128>(b.v), result);
|
|
return Vector<T, 4>(result);
|
|
}
|
|
|
|
|
|
// static Matrix<T, collums, rowSize, vectorSize> Scaling(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
|
|
// return Matrix<T, collums, rowSize, vectorSize>(
|
|
// _mm_set_ps(0, 0, 0, x),
|
|
// _mm_set_ps(0, 0, y, 0),
|
|
// _mm_set_ps(0, z, 0, 0),
|
|
// _mm_set_ps(1, 0, 0, 0)
|
|
// );
|
|
// }
|
|
|
|
// static Matrix<T, collums, rowSize, vectorSize> Translation(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
|
|
// return Matrix<T, collums, rowSize, vectorSize>(
|
|
// _mm_set_ps(0, 0, 0, 1),
|
|
// _mm_set_ps(0, 0, 1, 0),
|
|
// _mm_set_ps(0, 1, 0, 0),
|
|
// _mm_set_ps(1, z, y, x)
|
|
// );
|
|
// }
|
|
|
|
// // static Matrix<T, collums, rowSize, vectorSize> Rotation(float x, float y, float z) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
|
|
// // return Matrix<T, collums, rowSize, vectorSize>(
|
|
// // _mm_set_ps(0, 0, 0, 1),
|
|
// // _mm_set_ps(0, 0, 1, 0),
|
|
// // _mm_set_ps(0, 1, 0, 0),
|
|
// // _mm_set_ps(1, z, y, x)
|
|
// // );
|
|
// // }
|
|
|
|
// static Matrix<T, collums, rowSize, vectorSize> Idenity() requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
|
|
// return Matrix<T, collums, rowSize, vectorSize>(
|
|
// _mm_set_ps(0, 0, 0, 1),
|
|
// _mm_set_ps(0, 0, 1, 0),
|
|
// _mm_set_ps(0, 1, 0, 0),
|
|
// _mm_set_ps(1, 0, 0, 0)
|
|
// );
|
|
// }
|
|
|
|
// static Matrix<T, collums, rowSize, vectorSize> Projection(float FovAngleY, float AspectRatio, float NearZ, float FarZ) requires(collums == 4 && (rowSize == 3 || rowSize == 4) && vectorSize == 1 && std::same_as<T, float>) {
|
|
// float SinFov;
|
|
// float CosFov;
|
|
// XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
|
|
// float fRange = FarZ / (NearZ - FarZ);
|
|
// // Note: This is recorded on the stack
|
|
// float Height = CosFov / SinFov;
|
|
// __m128 rMem = {
|
|
// Height / AspectRatio,
|
|
// Height,
|
|
// fRange,
|
|
// fRange * NearZ
|
|
// };
|
|
// // Copy from memory to SSE register
|
|
// __m128 vValues = rMem;
|
|
// __m128 vTemp = _mm_setzero_ps();
|
|
// // Copy x only
|
|
// vTemp = _mm_move_ss(vTemp, vValues);
|
|
// // Height / AspectRatio,0,0,0
|
|
// Matrix<T, collums, rowSize, vectorSize> M;
|
|
// M.r[0] = vTemp;
|
|
// // 0,Height,0,0
|
|
// vTemp = vValues;
|
|
// vTemp = _mm_and_ps(vTemp, g_XMMaskY.v);
|
|
// M.r[1] = vTemp;
|
|
// // x=fRange,y=-fRange * NearZ,0,-1.0f
|
|
// vTemp = _mm_setzero_ps();
|
|
// vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3.v, _MM_SHUFFLE(3, 2, 3, 2));
|
|
// // 0,0,fRange,-1.0f
|
|
// vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0));
|
|
// M.r[2] = vTemp;
|
|
// // 0,0,fRange * NearZ,0.0f
|
|
// vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0));
|
|
// M.r[3] = vTemp;
|
|
// return M;
|
|
// }
|
|
|
|
// template <uint32_t vectorRowSize>
|
|
// Vector<T, rowSize> operator*(Vector<T, vectorRowSize> b) const requires(collums == 4 && vectorRowSize == 4 && rowSize >= 4 && vectorSize == 1 && std::same_as<T, float>) {
|
|
|
|
|
|
// //std::cout << Vector<T, rowSize>(allX).ToString() << std::endl;
|
|
|
|
// // __m128 result = _mm_permute_ps(b.v, 0b00000000);
|
|
// // result = _mm_fmadd_ps(result, r[0], r[3]);
|
|
|
|
// // __m128 allY = _mm_permute_ps(b.v, 0b10101010);
|
|
// // result = _mm_fmadd_ps(allY, r[1], result);
|
|
|
|
// // __m128 allZ = _mm_permute_ps(b.v, 0b01010101);
|
|
// // return Vector<T, rowSize>(_mm_fmadd_ps(allZ, r[2], result));
|
|
// return Vector<T, vectorRowSize>(1, 2, 3, 4);
|
|
// }
|
|
|
|
|
|
// Matrix<T, collums, rowSize, vectorSize> operator*(Matrix<T, collums, rowSize, vectorSize> b) const requires(collums == 4 && rowSize == 4 && vectorSize == 1 && std::same_as<T, float>) {
|
|
// Matrix<T, collums, rowSize, vectorSize> result;
|
|
// result.r[0] = _mm_permute_ps(b.r[0], 0b00000000);
|
|
// result.r[1] = _mm_fmadd_ps(_mm_permute_ps(b.r[1], 0b00000000), reinterpret_cast<__m128>(r[1]), reinterpret_cast<__m128>(result.r[0]));
|
|
// result.r[1] = _mm_permute_ps(b.r[1], 0b00000000);
|
|
// result.r[2] = _mm_permute_ps(b.r[2], 0b00000000);
|
|
// result.r[3] = _mm_permute_ps(b.r[3], 0b00000000);
|
|
|
|
// // result.r[0] = _mm_fmadd_ps(allY, reinterpret_cast<__m128>(r[1]), reinterpret_cast<__m128>(result.r[0]));
|
|
// // result.r[0] = _mm_fmadd_ps(allZ, reinterpret_cast<__m128>(r[2]), reinterpret_cast<__m128>(result.r[0]));
|
|
// // result.r[0] = _mm_fmadd_ps(allW, reinterpret_cast<__m128>(r[3]), reinterpret_cast<__m128>(result.r[0]));
|
|
|
|
// Float4x4 store;
|
|
// result.Store(&store);
|
|
|
|
// std::cout << std::format("{}", store) << std::endl;
|
|
|
|
// return result;
|
|
// }
|
|
|
|
// void Store(Float4x4* store) const requires(collums == 4 && rowSize == 4 && vectorSize == 1 && std::same_as<T, float>) {
|
|
// _mm_storeu_ps(store->r1, reinterpret_cast<__m128>(r[0]));
|
|
// _mm_storeu_ps(store->r2, reinterpret_cast<__m128>(r[1]));
|
|
// _mm_storeu_ps(store->r3, reinterpret_cast<__m128>(r[2]));
|
|
// _mm_storeu_ps(store->r4, reinterpret_cast<__m128>(r[3]));
|
|
// }
|
|
|
|
|
|
// // VectorVector<T, 4, 4> operator*(VectorVector<T, 4, 4> b) requires(collums == 4 && rowSize == 4 && vectorSize == 4 && std::same_as<T, float>) {
|
|
// // __m512 result = _mm512_permute_ps(b.v, 0b11111111);
|
|
// // result = _mm512_fmadd_ps(result, reinterpret_cast<__m512>(r[0]), reinterpret_cast<__m512>(r[3]));
|
|
|
|
// // __m512 allY = _mm512_permute_ps(b.v, 0b10101010);
|
|
// // result = _mm512_fmadd_ps(allY, reinterpret_cast<__m512>(r[1]), result);
|
|
|
|
// // __m512 allZ = _mm512_permute_ps(b.v, 0b01010101);
|
|
// // return VectorVector<T, 4, 4>(_mm512_fmadd_ps(allZ, reinterpret_cast<__m512>(r[2]), result));
|
|
// // }
|
|
|
|
// // m4x4float Transpose() const {
|
|
// // // x.x,x.y,y.x,y.y
|
|
// // __m128 vTemp1 = _mm_shuffle_ps(r[0], r[1], _MM_SHUFFLE(1, 0, 1, 0));
|
|
// // // x.z,x.w,y.z,y.w
|
|
// // __m128 vTemp3 = _mm_shuffle_ps(r[0], r[1], _MM_SHUFFLE(3, 2, 3, 2));
|
|
// // // z.x,z.y,w.x,w.y
|
|
// // __m128 vTemp2 = _mm_shuffle_ps(r[2], r[3], _MM_SHUFFLE(1, 0, 1, 0));
|
|
// // // z.z,z.w,w.z,w.w
|
|
// // __m128 vTemp4 = _mm_shuffle_ps(r[2], r[3], _MM_SHUFFLE(3, 2, 3, 2));
|
|
|
|
// // return m4x4float(
|
|
// // _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)),
|
|
// // _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)),
|
|
// // _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)),
|
|
// // _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1))
|
|
// // );
|
|
// // }
|
|
// // m4x4float operator*(m4x4float b) const {
|
|
// // __m256 t0 = _mm256_castps128_ps256(r[0]);
|
|
// // t0 = _mm256_insertf128_ps(t0, r[1], 1);
|
|
// // __m256 t1 = _mm256_castps128_ps256(r[2]);
|
|
// // t1 = _mm256_insertf128_ps(t1, r[3], 1);
|
|
|
|
// // __m256 u0 = _mm256_castps128_ps256(b.r[0]);
|
|
// // u0 = _mm256_insertf128_ps(u0, b.r[1], 1);
|
|
// // __m256 u1 = _mm256_castps128_ps256(b.r[2]);
|
|
// // u1 = _mm256_insertf128_ps(u1, b.r[3], 1);
|
|
|
|
// // __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0));
|
|
// // __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0));
|
|
// // __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00);
|
|
// // __m256 c0 = _mm256_mul_ps(a0, b0);
|
|
// // __m256 c1 = _mm256_mul_ps(a1, b0);
|
|
|
|
// // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1));
|
|
// // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1));
|
|
// // b0 = _mm256_permute2f128_ps(u0, u0, 0x11);
|
|
// // __m256 c2 = _mm256_fmadd_ps(a0, b0, c0);
|
|
// // __m256 c3 = _mm256_fmadd_ps(a1, b0, c1);
|
|
|
|
// // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2));
|
|
// // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2));
|
|
// // __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00);
|
|
// // __m256 c4 = _mm256_mul_ps(a0, b1);
|
|
// // __m256 c5 = _mm256_mul_ps(a1, b1);
|
|
|
|
// // a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3));
|
|
// // a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3));
|
|
// // b1 = _mm256_permute2f128_ps(u1, u1, 0x11);
|
|
// // __m256 c6 = _mm256_fmadd_ps(a0, b1, c4);
|
|
// // __m256 c7 = _mm256_fmadd_ps(a1, b1, c5);
|
|
|
|
// // t0 = _mm256_add_ps(c2, c6);
|
|
// // t1 = _mm256_add_ps(c3, c7);
|
|
|
|
// // return m4x4float(
|
|
// // _mm256_castps256_ps128(t0),
|
|
// // _mm256_extractf128_ps(t0, 1),
|
|
// // _mm256_castps256_ps128(t1),
|
|
// // _mm256_extractf128_ps(t1, 1)
|
|
// // );
|
|
// // }
|
|
|
|
|
|
void Store(Crafter::Float4x4& store) const {
|
|
_mm_storeu_ps(store.c1, c[0]);
|
|
_mm_storeu_ps(store.c2, c[1]);
|
|
_mm_storeu_ps(store.c3, c[2]);
|
|
_mm_storeu_ps(store.c4, c[3]);
|
|
}
|
|
};
|
|
}
|
|
|
|
template <>
|
|
struct std::formatter<Crafter::Matrix<float, 4, 4, 1>> : std::formatter<std::string> {
|
|
auto format(const Crafter::Matrix<float, 4, 4, 1>& obj, format_context& ctx) const {
|
|
Crafter::Float4x4 store;
|
|
obj.Store(store);
|
|
return std::formatter<std::string>::format(std::format("{}", store), ctx);
|
|
}
|
|
}; |