vector renderring

This commit is contained in:
Jorijn van der Graaf 2026-03-31 15:22:55 +02:00
commit c895c266fb
5 changed files with 53 additions and 72 deletions

View file

@ -46,11 +46,11 @@ export namespace Crafter {
struct Rendertarget : RendertargetBase<Frames> {
Vector<T, Channels, Alignment>* buffer[Frames];
Rendertarget() = default;
Rendertarget(std::int16_t sizeX, std::int16_t sizeY) : RendertargetBase<Frames>(sizeX, sizeY) {
Rendertarget(std::uint16_t sizeX, std::uint16_t sizeY) : RendertargetBase<Frames>(sizeX, sizeY) {
}
void RenderElement(Transform2D* elementTransform, std::uint8_t frame, std::vector<ClipRect>&& dirtyRects) {
RenderingElement2DBase<Frames>* element = dynamic_cast<RenderingElement2DBase<Frames>*>(elementTransform);
RenderingElement2DBase<T, Frames>* element = dynamic_cast<RenderingElement2DBase<T, Frames>*>(elementTransform);
if(element) {
#ifdef CRAFTER_TIMING
auto start = std::chrono::high_resolution_clock::now();
@ -63,10 +63,10 @@ export namespace Crafter {
for(ClipRect dirty : dirtyRects) {
dirty.left = std::max(element->scaled.position.x, dirty.left);
dirty.top = std::max(element->scaled.position.y, dirty.top);
dirty.right = std::min(element->scaled.position.x+element->scaled.size.x, dirty.right);
dirty.bottom = std::min(element->scaled.position.y+element->scaled.size.y, dirty.bottom);
dirty.right = std::min(std::uint16_t(element->scaled.position.x+element->scaled.size.x), dirty.right);
dirty.bottom = std::min(std::uint16_t(element->scaled.position.y+element->scaled.size.y), dirty.bottom);
const Vector<std::uint8_t, 4>* src_buffer = element->buffer.data();
const Vector<T, 4, 4>* src_buffer = element->buffer.data();
std::int32_t src_width = element->scaled.size.x;
std::int32_t src_height = element->scaled.size.y;
@ -83,65 +83,46 @@ export namespace Crafter {
if constexpr(std::same_as<T, _Float16>) {
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
std::uint16_t src_y = y - element->scaled.position.y;
std::uint16_t rowSize = dirty.right - dirty.left;
constexpr std::uint8_t elementsPerVector = VectorF16L<1, 1, 1>::MaxSize/VectorF16L<1, 1, 1>::MaxElement;
while(rowSize > 0) {
if(rowSize < elementsPerVector) {
for(; rowSize > 0; rowSize--) {
std::uint16_t src_x = rowSize - element->scaled.position.x;
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + rowSize];
std::uint32_t pixel_width = dirty.right - dirty.left;
constexpr std::uint32_t simd_width = VectorF16<1, 1>::MaxElement / 4;
std::uint32_t rows = pixel_width / simd_width;
_Float16 outA = src.a + dst.a * (1.0f - src.a);
this->buffer[frame][y * this->sizeX + rowSize] = Vector<T, Channels, Alignment>(
static_cast<T>((src.r + dst.r * (1.0f - src.a)) / outA),
static_cast<T>((src.g + dst.g * (1.0f - src.a)) / outA),
static_cast<T>((src.b + dst.b * (1.0f - src.a)) / outA),
static_cast<T>(outA * 255)
);
}
break;
} else {
std::uint16_t src_x = rowSize - element->scaled.position.x;
using VectorType = VectorF16L<4, VectorF16L<1, 1, 1>::MaxElement / 4, VectorF16L<1, 1, 1>::MaxSize/((VectorF16L<1, 1, 1>::MaxElement / 4)*4)>;
VectorType src(src_buffer[src_y * src_width + src_x].v);
VectorType dst(buffer[frame][y * this->sizeX + rowSize].v);
for (std::uint32_t x = 0; x < rows; x++) {
std::uint16_t px = dirty.left + x * simd_width;
std::uint16_t src_x = px - element->scaled.position.x;
std::uint16_t dst_x = px;
VectorType srcA = src.ShufflePacked<3,3,3,3>();
VectorType dstA = dst.ShufflePacked<3,3,3,3>();
VectorType srcANeg = -srcA;
VectorType outA = VectorType::MulitplyAdd(dstA, srcANeg, srcA);
VectorType result = src + dst * srcANeg / outA;
result = VectorType::BlendPacked<0,0,0,1>(dst, outA);
result.Store(buffer[frame][y * this->sizeX + rowSize].v);
rowSize -= elementsPerVector;
}
VectorF16<4, simd_width> src(&src_buffer[src_y * src_width + src_x].v[0]);
VectorF16<4, simd_width> dst(&buffer[frame][y * this->sizeX + dst_x].v[0]);
VectorF16<4, simd_width> oneMinusSrcA = VectorF16<4, simd_width>(1) - src.Shuffle<{{3, 3, 3, 3}}>();
VectorF16<4, simd_width> result = VectorF16<4, simd_width>::MulitplyAdd(dst, oneMinusSrcA, src);
result.Store(buffer[frame][y * this->sizeX + dst_x]);
}
}
} else {
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
std::uint16_t src_y = y - element->scaled.position.y;
for (std::uint16_t x = dirty.left; x < dirty.right; x++) {
std::uint16_t src_x = x - element->scaled.position.x;
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + x];
float srcA = src.a / 255.0f;
float dstA = dst.a / 255.0f;
float outA = srcA + dstA * (1.0f - srcA);
this->buffer[frame][y * this->sizeX + x] = Vector<T, Channels, Alignment>(
static_cast<T>((src.r + dst.r * (1.0f - srcA)) / outA),
static_cast<T>((src.g + dst.g * (1.0f - srcA)) / outA),
static_cast<T>((src.b + dst.b * (1.0f - srcA)) / outA),
static_cast<T>(outA * 255)
std::uint32_t remainder = pixel_width - (rows * simd_width);
std::uint16_t remainder_start = dirty.left + rows * simd_width;
for (std::uint8_t x = 0; x < remainder; x++) {
std::uint16_t px = remainder_start + x;
std::uint16_t src_x = px - element->scaled.position.x;
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + px];
_Float16 oneMinusSrcA = (_Float16)1.0f - src[3];
buffer[frame][y * this->sizeX + px] = Vector<T, Channels, Alignment>(
src.r + dst.r * oneMinusSrcA,
src.g + dst.g * oneMinusSrcA,
src.b + dst.b * oneMinusSrcA,
src.a + dst.a * oneMinusSrcA
);
}
}
} else {
for (std::int32_t y = dirty.top; y < dirty.bottom; y++) {
std::int32_t src_y = y - element->scaled.position.y;
std::memcpy(&this->buffer[frame][y * this->sizeX], &src_buffer[src_y * src_width], dirty.right-dirty.left*sizeof(Vector<T, Channels, Alignment>));
}
}
break;
}
@ -158,15 +139,15 @@ export namespace Crafter {
}
void AddOldRects(Transform2D* elementTransform, std::uint8_t frame, std::vector<ClipRect>& clipRects) {
RenderingElement2DBase<Frames>* element = dynamic_cast<RenderingElement2DBase<Frames>*>(elementTransform);
RenderingElement2DBase<T, Frames>* element = dynamic_cast<RenderingElement2DBase<T, Frames>*>(elementTransform);
if(element) {
if(element->scaled.position.x != element->oldScale[frame].position.x || element->scaled.position.y != element->oldScale[frame].position.y || element->scaled.size.x != element->oldScale[frame].size.x || element->scaled.size.y != element->oldScale[frame].size.y || element->redraw[frame]) {
clipRects.emplace_back(std::max(element->scaled.position.x, std::int32_t(0)), std::min(element->scaled.position.x + element->scaled.size.x, this->sizeX), std::max(element->scaled.position.y, std::int32_t(0)), std::min(element->scaled.position.y + element->scaled.size.y, this->sizeY));
clipRects.emplace_back(std::max(element->oldScale[frame].position.x, std::int32_t(0)), std::min(element->oldScale[frame].position.x + element->oldScale[frame].size.x, this->sizeX), std::max(element->oldScale[frame].position.y, std::int32_t(0)), std::min(element->oldScale[frame].position.y + element->oldScale[frame].size.y, this->sizeY));
clipRects.emplace_back(std::max(element->scaled.position.x, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.x + element->scaled.size.x), this->sizeX), std::max(element->scaled.position.y, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.y + element->scaled.size.y), this->sizeY));
clipRects.emplace_back(std::max(element->oldScale[frame].position.x, std::uint16_t(0)), std::min(std::uint16_t(element->oldScale[frame].position.x + element->oldScale[frame].size.x), this->sizeX), std::max(element->oldScale[frame].position.y, std::uint16_t(0)), std::min(std::uint16_t(element->oldScale[frame].position.y + element->oldScale[frame].size.y), this->sizeY));
element->oldScale[frame] = element->scaled;
element->redraw[frame] = false;
} else if(element->redraw[frame]) {
clipRects.emplace_back(std::max(element->scaled.position.x, std::int32_t(0)), std::min(element->scaled.position.x + element->scaled.size.x, this->sizeX), std::max(element->scaled.position.y, std::int32_t(0)), std::min(element->scaled.position.y + element->scaled.size.y, this->sizeY));
clipRects.emplace_back(std::max(element->scaled.position.x, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.x + element->scaled.size.x), this->sizeX), std::max(element->scaled.position.y, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.y + element->scaled.size.y), this->sizeY));
element->oldScale[frame] = element->scaled;
element->redraw[frame] = false;
}