// Crafter.Graphics UI shader contract — shared by every standard UI compute // shader and intended to be #included by user-authored shaders that want to // dispatch alongside them. Layouts here are FROZEN: only additive changes // (using the reserved `flags` bits or `_pad`). #extension GL_EXT_shader_image_load_formatted : enable #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable #extension GL_EXT_descriptor_heap : enable #extension GL_EXT_nonuniform_qualifier : enable #extension GL_EXT_buffer_reference : enable // ─── bindless heap declarations ───────────────────────────────────────── // The same heap slot can be read as either uiImages[] (storage image) or // uiTextures[] (sampled image) depending on which descriptor was written // at that slot. Samplers live in a separate sampler heap. layout(descriptor_heap) uniform image2D uiImages[]; layout(descriptor_heap) uniform texture2D uiTextures[]; layout(descriptor_heap) uniform sampler uiSamplers[]; // ─── push-constant header ─────────────────────────────────────────────── // Every UI dispatch's push-constant struct begins with this. User shaders // MUST embed it as the first member so UIRenderer::FillHeader works. struct UIDispatchHeader { uint outImage; // heap slot of the swapchain image (this frame) uint itemBuffer; // heap slot of the item SSBO uvec2 surfaceSize; // window pixel size vec4 clipRectPx; // (xy, wh) — every standard shader honors this uint itemCount; uint frameIdx; uint flags; // user-defined feature bits uint _pad; // reserved — keep zeroed }; // ─── standard item structs ────────────────────────────────────────────── // These match the C++ Crafter::QuadItem / CircleItem / ImageItem / GlyphItem // byte-for-byte under std430. struct QuadItem { vec4 rect; vec4 color; vec4 corners; vec4 outline; }; // rect = (x, y, w, h) in pixels // color = filled body RGBA (premultiplied alpha not assumed) // corners = per-corner radius in px (TL, TR, BR, BL); 0 = sharp // outline = (thickness, R, G, B); thickness > 0 paints an outline of given color struct CircleItem { vec4 centerRadius; vec4 color; vec4 outline; }; // centerRadius = (cx, cy, radius, _) // outline.x = thickness (0 = filled), .yzw = outline RGB struct ImageItem { vec4 rect; vec4 uv; vec4 tint; uvec4 slots; }; // rect = (x, y, w, h) // uv = (u0, v0, u1, v1) into the source texture // tint = multiplied with the sampled color // slots = (textureHeapSlot, samplerHeapSlot, _, _) struct GlyphItem { vec4 rect; vec4 uv; vec4 color; }; // rect = (x, y, w, h) on screen // uv = (u0, v0, u1, v1) into the SDF font atlas // color = glyph color (alpha modulated by SDF) // ─── SSBO heap views ──────────────────────────────────────────────────── // One declaration per item type; each shader uses the one matching its // dispatch. Indexed by hdr.itemBuffer. layout(descriptor_heap, std430) readonly buffer UIQuadBuf { QuadItem items[]; } uiQuadHeap[]; layout(descriptor_heap, std430) readonly buffer UICircleBuf { CircleItem items[]; } uiCircleHeap[]; layout(descriptor_heap, std430) readonly buffer UIImageBuf { ImageItem items[]; } uiImageHeap[]; layout(descriptor_heap, std430) readonly buffer UIGlyphBuf { GlyphItem items[]; } uiGlyphHeap[]; // ──── Driver workaround: per-member SSBO load ──────────────────────────── // `UIItem it = itemHeap[idx].items[i]` emits an OpLoad of a composite type // from a descriptor-heap'd SSBO, which crashes the GPU on the NVIDIA // VK_EXT_descriptor_heap path (verified with a 1-float struct repro). // Reading individual members works (each becomes OpAccessChain + scalar // OpLoad). LoadItem reassembles the struct member-by-member into a local; // the rest of the shader then operates on a regular local var. ImageItem LoadImageItem(uint heap, uint i) { ImageItem it; it.rect = uiImageHeap[heap].items[i].rect; it.uv = uiImageHeap[heap].items[i].uv; it.tint = uiImageHeap[heap].items[i].tint; it.slots = uiImageHeap[heap].items[i].slots; return it; } GlyphItem LoadGlpyhtem(uint heap, uint i) { GlyphItem it; it.rect = uiGlyphHeap[heap].items[i].rect; it.uv = uiGlyphHeap[heap].items[i].uv; it.color = uiGlyphHeap[heap].items[i].color; return it; } CircleItem LoadCircleItem(uint heap, uint i) { CircleItem it; it.centerRadius = uiCircleHeap[heap].items[i].centerRadius; it.color = uiCircleHeap[heap].items[i].color; it.outline = uiCircleHeap[heap].items[i].outline; return it; } QuadItem LoadQuadItem(uint heap, uint i) { QuadItem it; it.rect = uiQuadHeap[heap].items[i].rect; it.color = uiQuadHeap[heap].items[i].color; it.corners = uiQuadHeap[heap].items[i].corners; it.outline = uiQuadHeap[heap].items[i].outline; return it; } // ─── pixel-tile dispatch model ───────────────────────────────────────── // Standard shaders dispatch one workgroup per 8×8 screen tile. Each thread // owns ONE pixel and iterates ALL items in order, accumulating the result // in a local register, then stores once at the end. This guarantees correct // z-order within a single dispatch (no inter-workgroup race on imageLoad/ // imageStore) and gives the user simple semantics: "items render in array // order, later items overdraw earlier ones". // // Caller dispatches `(ceil(W/8), ceil(H/8), 1)` — no need to know the max // item size. // Returns the screen pixel and validates against the surface and clip rect. bool uiResolveScreenPixel(UIDispatchHeader hdr, out ivec2 screenPx) { uvec2 px = gl_GlobalInvocationID.xy; if (px.x >= hdr.surfaceSize.x || px.y >= hdr.surfaceSize.y) return false; if (float(px.x) < hdr.clipRectPx.x || float(px.y) < hdr.clipRectPx.y) return false; if (float(px.x) >= hdr.clipRectPx.x + hdr.clipRectPx.z) return false; if (float(px.y) >= hdr.clipRectPx.y + hdr.clipRectPx.w) return false; screenPx = ivec2(px); return true; } // Non-premultiplied "src over dst" blend. Both operands and result are // straight-alpha vec4. Use this when iterating items in a loop with a local // accumulator. vec4 uiBlendOver(vec4 dst, vec4 src) { float a = clamp(src.a, 0.0, 1.0); vec3 outRGB = mix(dst.rgb, src.rgb, a); float outA = a + dst.a * (1.0 - a); return vec4(outRGB, outA); } // SDF for a rounded rect with per-corner radius. p is the point relative to // the rect's center; halfSize is the rect half-extents; r is per-corner // (TL, TR, BR, BL). Returns signed distance (negative inside). float uiSdRoundRect(vec2 p, vec2 halfSize, vec4 r) { // Pick the radius for the quadrant p is in. r.xy = (p.x > 0.0) ? r.zy : r.wx; // pick TR/BR vs TL/BL r.x = (p.y > 0.0) ? r.x : r.y; vec2 q = abs(p) - halfSize + r.x; return min(max(q.x, q.y), 0.0) + length(max(q, 0.0)) - r.x; }