#version 460 #extension GL_EXT_descriptor_heap : enable #extension GL_EXT_nonuniform_qualifier : enable #extension GL_EXT_scalar_block_layout : enable #extension GL_EXT_shader_image_load_formatted : enable #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable layout(local_size_x = 16, local_size_y = 16) in; // ──── Item types — must match UI::ItemType in UIDrawList.cppm ──────────── const uint TYPE_RECT = 0u; const uint TYPE_ROUND_RECT = 1u; const uint TYPE_GLYPH = 2u; const uint TYPE_IMAGE = 3u; const uint TYPE_CLIP_PUSH = 5u; const uint TYPE_CLIP_POP = 6u; #define MAX_CLIP_DEPTH 8 // ──── Draw item — must match UI::UIItem layout (88 bytes, scalar) ──────── struct UIItem { uint itype; uint flags; vec2 posPx; vec2 sizePx; vec4 color; vec4 colorB; vec4 uvRect; uint imageIdx; uint cornerRadiusPx; vec2 reserved; }; // ──── Bindless heap views — VK_EXT_descriptor_heap untyped model ───────── // Each `layout(descriptor_heap)` declaration is a typed view over the same // resource heap; indexing is in slot units (image-descriptor units for // image2D, buffer-descriptor units for buffers, etc.). The application // passes the absolute heap slot indices via push constants. layout(descriptor_heap, scalar) readonly buffer UIItemBuf { UIItem items[]; } itemHeap[]; layout(descriptor_heap) uniform image2D images[]; layout(descriptor_heap) uniform texture2D textures[]; layout(descriptor_heap) uniform sampler samplers[]; // ──── Push constants ───────────────────────────────────────────────────── layout(push_constant) uniform PC { uint itemCount; vec2 surfaceSize; float scale; uint outImageHeapIdx; // storage-image slot of the current swapchain view uint itemBufHeapIdx; // SSBO slot of the current frame's items uint atlasTextureHeapIdx; // sampled-image slot of the SDF atlas uint bindlessBaseHeapIdx; // base sampled-image slot for user images uint linearSamplerHeapIdx; // sampler-heap slot } pc; // ──── Driver workaround: per-member SSBO load ──────────────────────────── // `UIItem it = itemHeap[idx].items[i]` emits an OpLoad of a composite type // from a descriptor-heap'd SSBO, which crashes the GPU on the NVIDIA // VK_EXT_descriptor_heap path (verified with a 1-float struct repro). // Reading individual members works (each becomes OpAccessChain + scalar // OpLoad). LoadItem reassembles the struct member-by-member into a local; // the rest of the shader then operates on a regular local var. UIItem LoadItem(uint i) { UIItem it; it.itype = itemHeap[pc.itemBufHeapIdx].items[i].itype; it.flags = itemHeap[pc.itemBufHeapIdx].items[i].flags; it.posPx = itemHeap[pc.itemBufHeapIdx].items[i].posPx; it.sizePx = itemHeap[pc.itemBufHeapIdx].items[i].sizePx; it.color = itemHeap[pc.itemBufHeapIdx].items[i].color; it.colorB = itemHeap[pc.itemBufHeapIdx].items[i].colorB; it.uvRect = itemHeap[pc.itemBufHeapIdx].items[i].uvRect; it.imageIdx = itemHeap[pc.itemBufHeapIdx].items[i].imageIdx; it.cornerRadiusPx = itemHeap[pc.itemBufHeapIdx].items[i].cornerRadiusPx; it.reserved = itemHeap[pc.itemBufHeapIdx].items[i].reserved; return it; } // ──── Shading helpers ──────────────────────────────────────────────────── // In-bounds sharp rectangle. vec4 ShadeRect(UIItem it, vec2 fp) { if (any(lessThan (fp, it.posPx)) || any(greaterThanEqual(fp, it.posPx + it.sizePx))) return vec4(0.0); return it.color; } // SDF for a rounded rectangle. p is offset from rect centre. float sdRoundRect(vec2 p, vec2 halfSize, float r) { vec2 q = abs(p) - halfSize + vec2(r); return length(max(q, vec2(0.0))) + min(max(q.x, q.y), 0.0) - r; } vec4 ShadeRoundRect(UIItem it, vec2 fp) { vec2 centre = it.posPx + it.sizePx * 0.5; float r = float(it.cornerRadiusPx); float d = sdRoundRect(fp - centre, it.sizePx * 0.5, r); // 1-pixel AA band around the edge. float a = clamp(0.5 - d, 0.0, 1.0); return it.color * a; } vec4 ShadeGlyph(UIItem it, vec2 fp) { if (any(lessThan (fp, it.posPx)) || any(greaterThanEqual(fp, it.posPx + it.sizePx))) return vec4(0.0); vec2 localUV = (fp - it.posPx) / it.sizePx; vec2 atlasUV = it.uvRect.xy + localUV * it.uvRect.zw; // Inline sampler2D construction — GLSL doesn't allow sampler2D as a // local variable, only as a function argument or uniform. float dist = texture( sampler2D(textures[pc.atlasTextureHeapIdx], samplers[pc.linearSamplerHeapIdx]), atlasUV ).r; // SDF threshold (stored on-edge value = 128/255 ≈ 0.502). A small // sample-units band gives ~1 screen pixel of AA at typical sizes. float aa = 0.05; float a = smoothstep(0.5 - aa, 0.5 + aa, dist); return it.color * a; } vec4 ShadeImage(UIItem it, vec2 fp) { if (any(lessThan (fp, it.posPx)) || any(greaterThanEqual(fp, it.posPx + it.sizePx))) return vec4(0.0); vec2 localUV = (fp - it.posPx) / it.sizePx; vec2 sourceUV = it.uvRect.xy + localUV * it.uvRect.zw; uint slot = pc.bindlessBaseHeapIdx + it.imageIdx; return texture( sampler2D(textures[nonuniformEXT(slot)], samplers[pc.linearSamplerHeapIdx]), sourceUV ) * it.color; } // ──── Main ─────────────────────────────────────────────────────────────── void main() { ivec2 ip = ivec2(gl_GlobalInvocationID.xy); if (any(greaterThanEqual(ip, ivec2(pc.surfaceSize)))) return; vec2 fp = vec2(ip) + 0.5; // pixel centre // Composite over what's already in the swapchain (3D output, clear, …). vec4 dst = imageLoad(images[pc.outImageHeapIdx], ip); // Clip stack — current effective rect in (x, y, w, h). vec4 clipStack[MAX_CLIP_DEPTH]; int clipTop = 0; clipStack[0] = vec4(0.0, 0.0, pc.surfaceSize); for (uint i = 0u; i < pc.itemCount; ++i) { UIItem it = LoadItem(i); if (it.itype == TYPE_CLIP_PUSH) { vec4 outer = clipStack[clipTop]; vec2 a = max(outer.xy, it.posPx); vec2 b = min(outer.xy + outer.zw, it.posPx + it.sizePx); int next = min(clipTop + 1, MAX_CLIP_DEPTH - 1); clipStack[next] = vec4(a, max(b - a, vec2(0.0))); clipTop = next; continue; } if (it.itype == TYPE_CLIP_POP) { clipTop = max(clipTop - 1, 0); continue; } // Skip if pixel is outside the current clip rect. vec4 c = clipStack[clipTop]; if (any(lessThan(fp, c.xy)) || any(greaterThanEqual(fp, c.xy + c.zw))) continue; vec4 src; switch (it.itype) { case TYPE_RECT: src = ShadeRect (it, fp); break; case TYPE_ROUND_RECT: src = ShadeRoundRect (it, fp); break; case TYPE_GLYPH: src = ShadeGlyph (it, fp); break; case TYPE_IMAGE: src = ShadeImage (it, fp); break; default: src = vec4(0.0); } // Premultiplied "OVER": dst = src + dst * (1 - src.a) dst.rgb = src.rgb + dst.rgb * (1.0 - src.a); dst.a = src.a + dst.a * (1.0 - src.a); } imageStore(images[pc.outImageHeapIdx], ip, dst); }