more F16 math

This commit is contained in:
Jorijn van der Graaf 2026-03-22 03:51:09 +01:00
commit 1544e92391
2 changed files with 306 additions and 135 deletions

View file

@ -30,38 +30,38 @@ int main() {
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end-start) << std::endl;
// std::println("{}", vfC);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0, 100);
// std::random_device rd;
// std::mt19937 gen(rd());
// std::uniform_real_distribution<float> dist(0, 100);
Vector<_Float16, 32, 32> vA;
for(std::uint32_t i = 0; i < 32; i++) {
vA.v[i] = dist(gen);
}
// Vector<_Float16, 32, 32> vA;
// for(std::uint32_t i = 0; i < 32; i++) {
// vA.v[i] = dist(gen);
// }
std::string log;
std::chrono::duration<double> totalVector(0);
std::tuple<VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>> vfA {VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA)};
for(std::uint32_t i = 0; i < 1000000; i++) {
auto start = std::chrono::high_resolution_clock::now();
vfA = VectorF16<4, 2, 4>::Normalize(std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
auto end = std::chrono::high_resolution_clock::now();
totalVector += end-start;
}
// std::string log;
// std::chrono::duration<double> totalVector(0);
// std::tuple<VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>> vfA {VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA)};
// for(std::uint32_t i = 0; i < 1000000; i++) {
// auto start = std::chrono::high_resolution_clock::now();
// vfA = VectorF16<4, 2, 4>::Normalize(std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
// auto end = std::chrono::high_resolution_clock::now();
// totalVector += end-start;
// }
std::chrono::duration<double> totalScalar(0);
Vector<_Float16, 4, 4> vB;
for(std::uint32_t i = 0; i < 4; i++) {
vB.v[i] = dist(gen);
}
for(std::uint32_t i = 0; i < 1000000; i++) {
auto start2 = std::chrono::high_resolution_clock::now();
vB.Normalize();
auto end2 = std::chrono::high_resolution_clock::now();
totalScalar += end2-start2;
}
// std::chrono::duration<double> totalScalar(0);
// Vector<_Float16, 4, 4> vB;
// for(std::uint32_t i = 0; i < 4; i++) {
// vB.v[i] = dist(gen);
// }
// for(std::uint32_t i = 0; i < 1000000; i++) {
// auto start2 = std::chrono::high_resolution_clock::now();
// vB.Normalize();
// auto end2 = std::chrono::high_resolution_clock::now();
// totalScalar += end2-start2;
// }
std::println("{} {} {} {}", std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
std::println("{}", vB);
std::println("Vector: {}, Scalar: {}", std::chrono::duration_cast<std::chrono::milliseconds>(totalVector), std::chrono::duration_cast<std::chrono::milliseconds>(totalScalar*8));
// std::println("{} {} {} {}", std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
// std::println("{}", vB);
// std::println("Vector: {}, Scalar: {}", std::chrono::duration_cast<std::chrono::milliseconds>(totalVector), std::chrono::duration_cast<std::chrono::milliseconds>(totalScalar*8));
}