F16 rendering

This commit is contained in:
Jorijn van der Graaf 2026-04-01 18:43:18 +02:00
commit 477b7dd087
5 changed files with 120 additions and 79 deletions

View file

@ -66,15 +66,20 @@ export namespace Crafter {
dirty.right = std::min(std::uint16_t(element->scaled.position.x+element->scaled.size.x), dirty.right);
dirty.bottom = std::min(std::uint16_t(element->scaled.position.y+element->scaled.size.y), dirty.bottom);
if(dirty.right <= dirty.left || dirty.bottom <= dirty.top) {
continue;
}
const Vector<T, 4, 4>* src_buffer = element->buffer.data();
std::int32_t src_width = element->scaled.size.x;
std::int32_t src_height = element->scaled.size.y;
std::uint16_t src_width = element->scaled.size.x;
std::uint16_t src_height = element->scaled.size.y;
switch (element->opaque) {
case OpaqueType::FullyOpaque: {
for (std::int32_t y = dirty.top; y < dirty.bottom; y++) {
std::int32_t src_y = y - element->scaled.position.y;
std::memcpy(&this->buffer[frame][y * this->sizeX], &src_buffer[src_y * src_width], dirty.right-dirty.left*sizeof(Vector<T, Channels, Alignment>));
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
std::uint16_t src_y = y - element->scaled.position.y;
std::uint16_t src_x = dirty.left - element->scaled.position.x;
std::memcpy(&this->buffer[frame][y * this->sizeX + dirty.left], &src_buffer[src_y * src_width + src_x], (dirty.right - dirty.left) * sizeof(Vector<T, Channels, Alignment>));
}
break;
}
@ -83,20 +88,20 @@ export namespace Crafter {
if constexpr(std::same_as<T, _Float16>) {
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
std::uint16_t src_y = y - element->scaled.position.y;
std::uint32_t pixel_width = dirty.right - dirty.left;
std::uint16_t pixel_width = dirty.right - dirty.left;
constexpr std::uint32_t simd_width = VectorF16<1, 1>::MaxElement / 4;
std::uint32_t rows = pixel_width / simd_width;
for (std::uint32_t x = 0; x < rows; x++) {
std::uint16_t px = dirty.left + x * simd_width;
std::uint16_t src_x = px - element->scaled.position.x;
std::uint16_t dst_x = px;
VectorF16<4, simd_width> src(&src_buffer[src_y * src_width + src_x].v[0]);
VectorF16<4, simd_width> dst(&buffer[frame][y * this->sizeX + dst_x].v[0]);
VectorF16<4, simd_width> dst(&buffer[frame][y * this->sizeX + px].v[0]);
VectorF16<4, simd_width> oneMinusSrcA = VectorF16<4, simd_width>(1) - src.Shuffle<{{3, 3, 3, 3}}>();
VectorF16<4, simd_width> result = VectorF16<4, simd_width>::MulitplyAdd(dst, oneMinusSrcA, src);
result.Store(buffer[frame][y * this->sizeX + dst_x]);
result.Store(&buffer[frame][y * this->sizeX + px].v[0]);
}
std::uint32_t remainder = pixel_width - (rows * simd_width);
@ -108,7 +113,7 @@ export namespace Crafter {
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + px];
_Float16 oneMinusSrcA = (_Float16)1.0f - src[3];
_Float16 oneMinusSrcA = (_Float16)1.0f - src.a;
buffer[frame][y * this->sizeX + px] = Vector<T, Channels, Alignment>(
src.r + dst.r * oneMinusSrcA,
@ -119,9 +124,10 @@ export namespace Crafter {
}
}
} else {
for (std::int32_t y = dirty.top; y < dirty.bottom; y++) {
std::int32_t src_y = y - element->scaled.position.y;
std::memcpy(&this->buffer[frame][y * this->sizeX], &src_buffer[src_y * src_width], dirty.right-dirty.left*sizeof(Vector<T, Channels, Alignment>));
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
std::uint16_t src_y = y - element->scaled.position.y;
std::uint16_t src_x = dirty.left - element->scaled.position.x;
std::memcpy(&this->buffer[frame][y * this->sizeX + dirty.left], &src_buffer[src_y * src_width + src_x], (dirty.right - dirty.left) * sizeof(Vector<T, Channels, Alignment>));
}
}
break;