more F16 math
This commit is contained in:
parent
c54ff6228c
commit
f1fbbe0faf
3 changed files with 82 additions and 42 deletions
|
|
@ -30,44 +30,38 @@ int main() {
|
|||
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end-start) << std::endl;
|
||||
// std::println("{}", vfC);
|
||||
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_real_distribution<float> dist(0, 100);
|
||||
|
||||
Vector<_Float16, 1326, 32> vA;
|
||||
// for(std::uint32_t i = 0; i < 2; i++) {
|
||||
// vA.v[i] = i;
|
||||
// }
|
||||
// for(std::uint32_t i = 2; i < 4; i++) {
|
||||
// vA.v[i] = i-2;
|
||||
// }
|
||||
// for(std::uint32_t i = 4; i < 6; i++) {
|
||||
// vA.v[i] = i-4;
|
||||
// }
|
||||
// for(std::uint32_t i = 6; i < 8; i++) {
|
||||
// vA.v[i] = i-6;
|
||||
// }
|
||||
for(std::uint32_t i = 0; i < 8; i++) {
|
||||
vA.v[i] = i;
|
||||
Vector<_Float16, 32, 32> vA;
|
||||
for(std::uint32_t i = 0; i < 32; i++) {
|
||||
vA.v[i] = dist(gen);
|
||||
}
|
||||
for(std::uint32_t i = 8; i < 16; i++) {
|
||||
vA.v[i] = i-8;
|
||||
}
|
||||
for(std::uint32_t i = 16; i < 24; i++) {
|
||||
vA.v[i] = i-16;
|
||||
}
|
||||
for(std::uint32_t i = 24; i < 32; i++) {
|
||||
}
|
||||
VectorF16<8, 1, 4> vfA(&vA);
|
||||
std::tuple<VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>, VectorF16<8, 1, 4>> dot = VectorF16<8, 1, 4>::Normalize(vfA, vfA, vfA, vfA, vfA, vfA, vfA, vfA);
|
||||
std::println("{}", std::get<0>(dot));
|
||||
|
||||
Vector<float, 8, 8> vB;
|
||||
for(std::uint32_t i = 0; i < 8; i++) {
|
||||
vB.v[i] = i;
|
||||
}
|
||||
vB.Normalize();
|
||||
std::string log;
|
||||
for(std::uint32_t i = 0; i < 8; i++) {
|
||||
log += std::format("{} ", (float)vB.v[i]);
|
||||
std::chrono::duration<double> totalVector(0);
|
||||
std::tuple<VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>, VectorF16<4, 2, 4>> vfA {VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA), VectorF16<4, 2, 4>(&vA)};
|
||||
for(std::uint32_t i = 0; i < 1000000; i++) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
vfA = VectorF16<4, 2, 4>::Normalize(std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
totalVector += end-start;
|
||||
}
|
||||
std::println("{{{}}}", log);
|
||||
|
||||
std::chrono::duration<double> totalScalar(0);
|
||||
Vector<_Float16, 4, 4> vB;
|
||||
for(std::uint32_t i = 0; i < 4; i++) {
|
||||
vB.v[i] = dist(gen);
|
||||
}
|
||||
for(std::uint32_t i = 0; i < 1000000; i++) {
|
||||
auto start2 = std::chrono::high_resolution_clock::now();
|
||||
vB.Normalize();
|
||||
auto end2 = std::chrono::high_resolution_clock::now();
|
||||
totalScalar += end2-start2;
|
||||
}
|
||||
|
||||
std::println("{} {} {} {}", std::get<0>(vfA), std::get<1>(vfA), std::get<2>(vfA), std::get<3>(vfA));
|
||||
std::println("{}", vB);
|
||||
std::println("Vector: {}, Scalar: {}", std::chrono::duration_cast<std::chrono::milliseconds>(totalVector), std::chrono::duration_cast<std::chrono::milliseconds>(totalScalar*8));
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue