more F16 math

This commit is contained in:
Jorijn van der Graaf 2026-03-19 05:53:17 +01:00
commit f1fbbe0faf
3 changed files with 82 additions and 42 deletions

View file

@ -30,44 +30,38 @@ int main() {
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end-start) << std::endl;
// std::println("{}", vfC);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0, 100);
Vector<_Float16, 1326, 32> vA;
// for(std::uint32_t i = 0; i < 2; i++) {
// vA.v[i] = i;
// }
// for(std::uint32_t i = 2; i < 4; i++) {
// vA.v[i] = i-2;
// }
// for(std::uint32_t i = 4; i < 6; i++) {
// vA.v[i] = i-4;
// }
// for(std::uint32_t i = 6; i < 8; i++) {
// vA.v[i] = i-6;
// }
for(std::uint32_t i = 0; i < 8; i++) {
vA.v[i] = i;
Vector<_Float16, 32, 32> vA;
for(std::uint32_t i = 0; i < 32; i++) {
vA.v[i] = dist(gen);
}
for(std::uint32_t i = 8; i < 16; i++) {
vA.v[i] = i-8;
}
for(std::uint32_t i = 16; i < 24; i++) {
vA.v[i] = i-16;
}
for(std::uint32_t i = 24; i < 32; i++) {
}
VectorF16<8, 1, 4> vfA(&vA);
std::tuple<VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>> dot = VectorF16<8, 1, 4>::Normalize(vfA, vfA, vfA, vfA, vfA, vfA, vfA, vfA);
std::println("{}", std::get<0>(dot));
Vector<float, 8, 8> vB;
for(std::uint32_t i = 0; i < 8; i++) {
vB.v[i] = i;
}
vB.Normalize();
std::string log;
for(std::uint32_t i = 0; i < 8; i++) {
log += std::format("{} ", (float)vB.v[i]);
std::chrono::duration<double> totalVector(0);
std::tuple<VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>> vfA {VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA)};
for(std::uint32_t i = 0; i < 1000000; i++) {
auto start = std::chrono::high_resolution_clock::now();
vfA = VectorF16<4, 2, 4>::Normalize(std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
auto end = std::chrono::high_resolution_clock::now();
totalVector += end-start;
}
std::println("{{{}}}", log);
std::chrono::duration<double> totalScalar(0);
Vector<_Float16, 4, 4> vB;
for(std::uint32_t i = 0; i < 4; i++) {
vB.v[i] = dist(gen);
}
for(std::uint32_t i = 0; i < 1000000; i++) {
auto start2 = std::chrono::high_resolution_clock::now();
vB.Normalize();
auto end2 = std::chrono::high_resolution_clock::now();
totalScalar += end2-start2;
}
std::println("{} {} {} {}", std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
std::println("{}", vB);
std::println("Vector: {}, Scalar: {}", std::chrono::duration_cast<std::chrono::milliseconds>(totalVector), std::chrono::duration_cast<std::chrono::milliseconds>(totalScalar*8));
}