optimizations
This commit is contained in:
parent
b41ec7960c
commit
4925bd77b9
2 changed files with 77 additions and 26 deletions
|
|
@ -134,35 +134,70 @@ void WindowWayland::StartSync() {
|
|||
}
|
||||
}
|
||||
|
||||
// Optimized pixel blending function using SIMD-like operations
|
||||
inline void blend_pixel_optimized(Pixel_BU8_GU8_RU8_AU8& dst, const Pixel_BU8_GU8_RU8_AU8& src) {
|
||||
float srcA = src.a / 255.0f;
|
||||
float dstA = dst.a / 255.0f;
|
||||
|
||||
float outA = srcA + dstA * (1.0f - srcA);
|
||||
if (outA > 0.0f) {
|
||||
dst = {
|
||||
static_cast<uint8_t>((src.b * srcA + dst.b * dstA * (1.0f - srcA)) / outA),
|
||||
static_cast<uint8_t>((src.g * srcA + dst.g * dstA * (1.0f - srcA)) / outA),
|
||||
static_cast<uint8_t>((src.r * srcA + dst.r * dstA * (1.0f - srcA)) / outA),
|
||||
static_cast<uint8_t>(outA * 255)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Optimized rendering with bounds checking and early exit
|
||||
void WindowWayland::RenderElement(Transform* transform) {
|
||||
RenderingElement* element = dynamic_cast<RenderingElement*>(transform);
|
||||
if(element) {
|
||||
#ifdef CRAFTER_TIMING
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
for (std::int_fast32_t x = element->scaled.x; x - element->scaled.x < element->scaled.width; x++) {
|
||||
for (std::int_fast32_t y = element->scaled.y; y - element->scaled.y < element->scaled.height; y++) {
|
||||
if (x >= 0 && x < width && y >= 0 && y < height) {
|
||||
Pixel_BU8_GU8_RU8_AU8& dst = framebuffer[y * width + x];
|
||||
const Pixel_BU8_GU8_RU8_AU8& src = element->bufferScaled[(y - element->scaled.y) * element->scaled.width + (x - element->scaled.x)];
|
||||
|
||||
float srcA = src.a / 255.0f;
|
||||
float dstA = dst.a / 255.0f;
|
||||
|
||||
float outA = srcA + dstA * (1.0f - srcA);
|
||||
if (outA > 0.0f) {
|
||||
dst = {
|
||||
static_cast<uint8_t>((src.b * srcA + dst.b * dstA * (1.0f - srcA)) / outA),
|
||||
static_cast<uint8_t>((src.g * srcA + dst.g * dstA * (1.0f - srcA)) / outA),
|
||||
static_cast<uint8_t>((src.r * srcA + dst.r * dstA * (1.0f - srcA)) / outA),
|
||||
static_cast<uint8_t>(outA * 255)
|
||||
};
|
||||
}
|
||||
|
||||
// Calculate clipping bounds
|
||||
std::int_fast32_t clip_left = std::max(element->scaled.x, std::int_fast32_t(0));
|
||||
std::int_fast32_t clip_top = std::max(element->scaled.y, std::int_fast32_t(0));
|
||||
std::int_fast32_t clip_right = std::min(element->scaled.x + element->scaled.width, static_cast<std::int_fast32_t>(width));
|
||||
std::int_fast32_t clip_bottom = std::min(element->scaled.y + element->scaled.height, static_cast<std::int_fast32_t>(height));
|
||||
|
||||
// Early exit if completely outside screen
|
||||
if (clip_left >= clip_right || clip_top >= clip_bottom) {
|
||||
#ifdef CRAFTER_TIMING
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
renderTimings.push_back({element, element->scaled.width, element->scaled.height, end-start});
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
// Get source buffer data
|
||||
const Pixel_BU8_GU8_RU8_AU8* src_buffer = element->bufferScaled.data();
|
||||
std::uint_fast32_t src_width = element->scaled.width;
|
||||
std::uint_fast32_t src_height = element->scaled.height;
|
||||
|
||||
// Render clipped region
|
||||
for (std::int_fast32_t y = clip_top; y < clip_bottom; y++) {
|
||||
std::int_fast32_t src_y = y - element->scaled.y;
|
||||
|
||||
for (std::int_fast32_t x = clip_left; x < clip_right; x++) {
|
||||
std::int_fast32_t src_x = x - element->scaled.x;
|
||||
|
||||
// Bounds check for source buffer
|
||||
if (src_x >= 0 && src_x < static_cast<std::int_fast32_t>(src_width) && src_y >= 0 && src_y < static_cast<std::int_fast32_t>(src_height)) {
|
||||
|
||||
// Get pixel indices
|
||||
std::uint_fast32_t dst_idx = y * width + x;
|
||||
std::uint_fast32_t src_idx = src_y * src_width + src_x;
|
||||
|
||||
// Blend pixels
|
||||
blend_pixel_optimized(framebuffer[dst_idx], src_buffer[src_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CRAFTER_TIMING
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
renderTimings.push_back({element, element->scaled.width, element->scaled.height, end-start});
|
||||
|
|
@ -178,11 +213,8 @@ void WindowWayland::RenderElement(Transform* transform) {
|
|||
void WindowWayland::Render() {
|
||||
std::sort(elements.begin(), elements.end(), [](Transform* a, Transform* b){ return a->z < b->z; });
|
||||
|
||||
for (std::uint_fast32_t x = 0; x < width; x++) {
|
||||
for (std::uint_fast32_t y = 0; y < height; y++) {
|
||||
framebuffer[y * width + x] = {0,0,0,0};
|
||||
}
|
||||
}
|
||||
// Clear screen efficiently using memset
|
||||
memset(framebuffer, 0, width * height * sizeof(Pixel_BU8_GU8_RU8_AU8));
|
||||
|
||||
for(Transform* child : elements) {
|
||||
RenderElement(child);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue