vector renderring
This commit is contained in:
commit
c895c266fb
5 changed files with 53 additions and 72 deletions
|
|
@ -46,11 +46,11 @@ export namespace Crafter {
|
|||
struct Rendertarget : RendertargetBase<Frames> {
|
||||
Vector<T, Channels, Alignment>* buffer[Frames];
|
||||
Rendertarget() = default;
|
||||
Rendertarget(std::int16_t sizeX, std::int16_t sizeY) : RendertargetBase<Frames>(sizeX, sizeY) {
|
||||
Rendertarget(std::uint16_t sizeX, std::uint16_t sizeY) : RendertargetBase<Frames>(sizeX, sizeY) {
|
||||
|
||||
}
|
||||
void RenderElement(Transform2D* elementTransform, std::uint8_t frame, std::vector<ClipRect>&& dirtyRects) {
|
||||
RenderingElement2DBase<Frames>* element = dynamic_cast<RenderingElement2DBase<Frames>*>(elementTransform);
|
||||
RenderingElement2DBase<T, Frames>* element = dynamic_cast<RenderingElement2DBase<T, Frames>*>(elementTransform);
|
||||
if(element) {
|
||||
#ifdef CRAFTER_TIMING
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -63,10 +63,10 @@ export namespace Crafter {
|
|||
for(ClipRect dirty : dirtyRects) {
|
||||
dirty.left = std::max(element->scaled.position.x, dirty.left);
|
||||
dirty.top = std::max(element->scaled.position.y, dirty.top);
|
||||
dirty.right = std::min(element->scaled.position.x+element->scaled.size.x, dirty.right);
|
||||
dirty.bottom = std::min(element->scaled.position.y+element->scaled.size.y, dirty.bottom);
|
||||
dirty.right = std::min(std::uint16_t(element->scaled.position.x+element->scaled.size.x), dirty.right);
|
||||
dirty.bottom = std::min(std::uint16_t(element->scaled.position.y+element->scaled.size.y), dirty.bottom);
|
||||
|
||||
const Vector<std::uint8_t, 4>* src_buffer = element->buffer.data();
|
||||
const Vector<T, 4, 4>* src_buffer = element->buffer.data();
|
||||
std::int32_t src_width = element->scaled.size.x;
|
||||
std::int32_t src_height = element->scaled.size.y;
|
||||
|
||||
|
|
@ -83,65 +83,46 @@ export namespace Crafter {
|
|||
if constexpr(std::same_as<T, _Float16>) {
|
||||
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
|
||||
std::uint16_t src_y = y - element->scaled.position.y;
|
||||
std::uint16_t rowSize = dirty.right - dirty.left;
|
||||
constexpr std::uint8_t elementsPerVector = VectorF16L<1, 1, 1>::MaxSize/VectorF16L<1, 1, 1>::MaxElement;
|
||||
while(rowSize > 0) {
|
||||
if(rowSize < elementsPerVector) {
|
||||
for(; rowSize > 0; rowSize--) {
|
||||
std::uint16_t src_x = rowSize - element->scaled.position.x;
|
||||
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
|
||||
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + rowSize];
|
||||
std::uint32_t pixel_width = dirty.right - dirty.left;
|
||||
constexpr std::uint32_t simd_width = VectorF16<1, 1>::MaxElement / 4;
|
||||
std::uint32_t rows = pixel_width / simd_width;
|
||||
|
||||
_Float16 outA = src.a + dst.a * (1.0f - src.a);
|
||||
this->buffer[frame][y * this->sizeX + rowSize] = Vector<T, Channels, Alignment>(
|
||||
static_cast<T>((src.r + dst.r * (1.0f - src.a)) / outA),
|
||||
static_cast<T>((src.g + dst.g * (1.0f - src.a)) / outA),
|
||||
static_cast<T>((src.b + dst.b * (1.0f - src.a)) / outA),
|
||||
static_cast<T>(outA * 255)
|
||||
);
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
std::uint16_t src_x = rowSize - element->scaled.position.x;
|
||||
using VectorType = VectorF16L<4, VectorF16L<1, 1, 1>::MaxElement / 4, VectorF16L<1, 1, 1>::MaxSize/((VectorF16L<1, 1, 1>::MaxElement / 4)*4)>;
|
||||
VectorType src(src_buffer[src_y * src_width + src_x].v);
|
||||
VectorType dst(buffer[frame][y * this->sizeX + rowSize].v);
|
||||
for (std::uint32_t x = 0; x < rows; x++) {
|
||||
std::uint16_t px = dirty.left + x * simd_width;
|
||||
std::uint16_t src_x = px - element->scaled.position.x;
|
||||
std::uint16_t dst_x = px;
|
||||
|
||||
VectorType srcA = src.ShufflePacked<3,3,3,3>();
|
||||
VectorType dstA = dst.ShufflePacked<3,3,3,3>();
|
||||
|
||||
VectorType srcANeg = -srcA;
|
||||
|
||||
VectorType outA = VectorType::MulitplyAdd(dstA, srcANeg, srcA);
|
||||
|
||||
VectorType result = src + dst * srcANeg / outA;
|
||||
result = VectorType::BlendPacked<0,0,0,1>(dst, outA);
|
||||
result.Store(buffer[frame][y * this->sizeX + rowSize].v);
|
||||
|
||||
rowSize -= elementsPerVector;
|
||||
}
|
||||
VectorF16<4, simd_width> src(&src_buffer[src_y * src_width + src_x].v[0]);
|
||||
VectorF16<4, simd_width> dst(&buffer[frame][y * this->sizeX + dst_x].v[0]);
|
||||
VectorF16<4, simd_width> oneMinusSrcA = VectorF16<4, simd_width>(1) - src.Shuffle<{{3, 3, 3, 3}}>();
|
||||
VectorF16<4, simd_width> result = VectorF16<4, simd_width>::MulitplyAdd(dst, oneMinusSrcA, src);
|
||||
result.Store(buffer[frame][y * this->sizeX + dst_x]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (std::uint16_t y = dirty.top; y < dirty.bottom; y++) {
|
||||
std::uint16_t src_y = y - element->scaled.position.y;
|
||||
for (std::uint16_t x = dirty.left; x < dirty.right; x++) {
|
||||
std::uint16_t src_x = x - element->scaled.position.x;
|
||||
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
|
||||
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + x];
|
||||
|
||||
float srcA = src.a / 255.0f;
|
||||
float dstA = dst.a / 255.0f;
|
||||
|
||||
float outA = srcA + dstA * (1.0f - srcA);
|
||||
this->buffer[frame][y * this->sizeX + x] = Vector<T, Channels, Alignment>(
|
||||
static_cast<T>((src.r + dst.r * (1.0f - srcA)) / outA),
|
||||
static_cast<T>((src.g + dst.g * (1.0f - srcA)) / outA),
|
||||
static_cast<T>((src.b + dst.b * (1.0f - srcA)) / outA),
|
||||
static_cast<T>(outA * 255)
|
||||
std::uint32_t remainder = pixel_width - (rows * simd_width);
|
||||
std::uint16_t remainder_start = dirty.left + rows * simd_width;
|
||||
|
||||
for (std::uint8_t x = 0; x < remainder; x++) {
|
||||
std::uint16_t px = remainder_start + x;
|
||||
std::uint16_t src_x = px - element->scaled.position.x;
|
||||
|
||||
Vector<T, Channels, Alignment> src = src_buffer[src_y * src_width + src_x];
|
||||
Vector<T, Channels, Alignment> dst = buffer[frame][y * this->sizeX + px];
|
||||
_Float16 oneMinusSrcA = (_Float16)1.0f - src[3];
|
||||
|
||||
buffer[frame][y * this->sizeX + px] = Vector<T, Channels, Alignment>(
|
||||
src.r + dst.r * oneMinusSrcA,
|
||||
src.g + dst.g * oneMinusSrcA,
|
||||
src.b + dst.b * oneMinusSrcA,
|
||||
src.a + dst.a * oneMinusSrcA
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (std::int32_t y = dirty.top; y < dirty.bottom; y++) {
|
||||
std::int32_t src_y = y - element->scaled.position.y;
|
||||
std::memcpy(&this->buffer[frame][y * this->sizeX], &src_buffer[src_y * src_width], dirty.right-dirty.left*sizeof(Vector<T, Channels, Alignment>));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -158,15 +139,15 @@ export namespace Crafter {
|
|||
}
|
||||
|
||||
void AddOldRects(Transform2D* elementTransform, std::uint8_t frame, std::vector<ClipRect>& clipRects) {
|
||||
RenderingElement2DBase<Frames>* element = dynamic_cast<RenderingElement2DBase<Frames>*>(elementTransform);
|
||||
RenderingElement2DBase<T, Frames>* element = dynamic_cast<RenderingElement2DBase<T, Frames>*>(elementTransform);
|
||||
if(element) {
|
||||
if(element->scaled.position.x != element->oldScale[frame].position.x || element->scaled.position.y != element->oldScale[frame].position.y || element->scaled.size.x != element->oldScale[frame].size.x || element->scaled.size.y != element->oldScale[frame].size.y || element->redraw[frame]) {
|
||||
clipRects.emplace_back(std::max(element->scaled.position.x, std::int32_t(0)), std::min(element->scaled.position.x + element->scaled.size.x, this->sizeX), std::max(element->scaled.position.y, std::int32_t(0)), std::min(element->scaled.position.y + element->scaled.size.y, this->sizeY));
|
||||
clipRects.emplace_back(std::max(element->oldScale[frame].position.x, std::int32_t(0)), std::min(element->oldScale[frame].position.x + element->oldScale[frame].size.x, this->sizeX), std::max(element->oldScale[frame].position.y, std::int32_t(0)), std::min(element->oldScale[frame].position.y + element->oldScale[frame].size.y, this->sizeY));
|
||||
clipRects.emplace_back(std::max(element->scaled.position.x, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.x + element->scaled.size.x), this->sizeX), std::max(element->scaled.position.y, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.y + element->scaled.size.y), this->sizeY));
|
||||
clipRects.emplace_back(std::max(element->oldScale[frame].position.x, std::uint16_t(0)), std::min(std::uint16_t(element->oldScale[frame].position.x + element->oldScale[frame].size.x), this->sizeX), std::max(element->oldScale[frame].position.y, std::uint16_t(0)), std::min(std::uint16_t(element->oldScale[frame].position.y + element->oldScale[frame].size.y), this->sizeY));
|
||||
element->oldScale[frame] = element->scaled;
|
||||
element->redraw[frame] = false;
|
||||
} else if(element->redraw[frame]) {
|
||||
clipRects.emplace_back(std::max(element->scaled.position.x, std::int32_t(0)), std::min(element->scaled.position.x + element->scaled.size.x, this->sizeX), std::max(element->scaled.position.y, std::int32_t(0)), std::min(element->scaled.position.y + element->scaled.size.y, this->sizeY));
|
||||
clipRects.emplace_back(std::max(element->scaled.position.x, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.x + element->scaled.size.x), this->sizeX), std::max(element->scaled.position.y, std::uint16_t(0)), std::min(std::uint16_t(element->scaled.position.y + element->scaled.size.y), this->sizeY));
|
||||
element->oldScale[frame] = element->scaled;
|
||||
element->redraw[frame] = false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue