From 825da78f7fec24b5a92926353907ddb7a4083522 Mon Sep 17 00:00:00 2001 From: Jorijn van der Graaf Date: Tue, 5 May 2026 00:02:04 +0200 Subject: [PATCH] descriptor heap leak fix --- implementations/Crafter.Graphics-UI.cpp | 12 +- ...Crafter.Graphics-DescriptorHeapVulkan.cppm | 208 +++++++++++++++++- interfaces/Crafter.Graphics-UI.cppm | 41 ++-- 3 files changed, 232 insertions(+), 29 deletions(-) diff --git a/implementations/Crafter.Graphics-UI.cpp b/implementations/Crafter.Graphics-UI.cpp index 28c96a6..b54abf0 100644 --- a/implementations/Crafter.Graphics-UI.cpp +++ b/implementations/Crafter.Graphics-UI.cpp @@ -51,7 +51,7 @@ void UIRenderer::Initialize(Window& window, DescriptorHeapVulkan& heap, VkComman // Allocate one image slot for the swapchain output. Each per-frame heap // copy will hold ITS frame's image at this slot. auto outRange = heap_->AllocateImageSlots(1); - outImageSlot_ = outRange.firstElement; + outImageSlot_ = ImageSlot{heap_, outRange.firstElement}; WriteSwapchainDescriptors(); @@ -59,7 +59,7 @@ void UIRenderer::Initialize(Window& window, DescriptorHeapVulkan& heap, VkComman // already before reaching here, so atlas->image is live). if (fontAtlas != nullptr) { auto atlasImg = heap_->AllocateImageSlots(1); - fontAtlasImageSlot_ = atlasImg.firstElement; + fontAtlasImageSlot_ = ImageSlot{heap_, atlasImg.firstElement}; fontAtlasSamplerSlot_ = RegisterLinearClampSampler(); WriteFontAtlasDescriptor(); } @@ -165,7 +165,7 @@ void UIRenderer::DispatchText(VkCommandBuffer cmd, std::uint32_t bufferSlot, std::uint32_t itemCount, std::array clipRectPx) { if (itemCount == 0) return; - if (fontAtlasImageSlot_ == 0xFFFF) { + if (!fontAtlasImageSlot_) { throw std::runtime_error("UIRenderer::DispatchText: no FontAtlas registered (set fontAtlas before Initialize)"); } // Flush any glyphs that ShapeText calls (during this onBuild) just @@ -314,7 +314,7 @@ void UIRenderer::WriteBufferDescriptor(std::uint16_t slot, VkDeviceAddress addre for (auto& h : heap_->resourceHeap) h.FlushDevice(); } -std::uint16_t UIRenderer::RegisterSampler(const VkSamplerCreateInfo& info) { +SamplerSlot UIRenderer::RegisterSampler(const VkSamplerCreateInfo& info) { auto range = heap_->AllocateSamplerSlots(1); std::array infos{}; std::array destinations{}; @@ -329,10 +329,10 @@ std::uint16_t UIRenderer::RegisterSampler(const VkSamplerCreateInfo& info) { Device::device, Window::numFrames, infos.data(), destinations.data() ); for (auto& h : heap_->samplerHeap) h.FlushDevice(); - return range.firstElement; + return SamplerSlot{heap_, range.firstElement}; } -std::uint16_t UIRenderer::RegisterLinearClampSampler() { +SamplerSlot UIRenderer::RegisterLinearClampSampler() { VkSamplerCreateInfo s { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = VK_FILTER_LINEAR, diff --git a/interfaces/Crafter.Graphics-DescriptorHeapVulkan.cppm b/interfaces/Crafter.Graphics-DescriptorHeapVulkan.cppm index d1a4476..ee73a13 100644 --- a/interfaces/Crafter.Graphics-DescriptorHeapVulkan.cppm +++ b/interfaces/Crafter.Graphics-DescriptorHeapVulkan.cppm @@ -44,6 +44,14 @@ export namespace Crafter { std::uint16_t bufferNext = 0; std::uint16_t samplerNext = 0; + // Per-pool freelists of recyclable slot indices (raw, pre-bufferStartElement + // for the buffer pool). Populated by Free*Slots, consumed by Allocate*Slots + // before falling through to the bump pointer. Single-slot allocations only — + // multi-slot allocations need contiguity and bypass the freelist. + std::vector imageFreelist; + std::vector bufferFreelist; + std::vector samplerFreelist; + void Initialize(std::uint16_t images, std::uint16_t buffers, std::uint16_t samplers) { std::uint32_t descriptorRegion = images * Device::descriptorHeapProperties.imageDescriptorSize + buffers * Device::descriptorHeapProperties.bufferDescriptorSize; std::uint32_t alignedDescriptorRegion = (descriptorRegion + Device::descriptorHeapProperties.imageDescriptorAlignment - 1) & ~(Device::descriptorHeapProperties.imageDescriptorAlignment - 1); @@ -62,6 +70,15 @@ export namespace Crafter { imageNext = 0; bufferNext = 0; samplerNext = 0; + // Reserve so Free*Slots' push_back is noexcept (slot handle dtors + // call Free*Slots and must not throw). At most `capacity` slots + // can ever be free at one time. + imageFreelist.clear(); + imageFreelist.reserve(images); + bufferFreelist.clear(); + bufferFreelist.reserve(buffers); + samplerFreelist.clear(); + samplerFreelist.reserve(samplers); for(std::uint8_t i = 0; i < Window::numFrames; i++) { resourceHeap[i].Resize(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_DESCRIPTOR_HEAP_BIT_EXT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, resourceSize); @@ -70,8 +87,14 @@ export namespace Crafter { } ImageSlotRange AllocateImageSlots(std::uint16_t count) { + if (count == 1 && !imageFreelist.empty()) { + std::uint16_t s = imageFreelist.back(); + imageFreelist.pop_back(); + return {s, 1}; + } if (imageNext + count > imageCapacity) { - throw std::runtime_error(std::format("DescriptorHeapVulkan: out of image slots ({} requested, {} remaining of {})", count, imageCapacity - imageNext, imageCapacity)); + std::uint16_t remaining = static_cast((imageCapacity - imageNext) + imageFreelist.size()); + throw std::runtime_error(std::format("DescriptorHeapVulkan: out of image slots ({} requested, {} remaining of {})", count, remaining, imageCapacity)); } ImageSlotRange r{imageNext, count}; imageNext += count; @@ -79,8 +102,14 @@ export namespace Crafter { } BufferSlotRange AllocateBufferSlots(std::uint16_t count) { + if (count == 1 && !bufferFreelist.empty()) { + std::uint16_t s = bufferFreelist.back(); + bufferFreelist.pop_back(); + return {s, 1}; + } if (bufferNext + count > bufferCapacity) { - throw std::runtime_error(std::format("DescriptorHeapVulkan: out of buffer slots ({} requested, {} remaining of {})", count, bufferCapacity - bufferNext, bufferCapacity)); + std::uint16_t remaining = static_cast((bufferCapacity - bufferNext) + bufferFreelist.size()); + throw std::runtime_error(std::format("DescriptorHeapVulkan: out of buffer slots ({} requested, {} remaining of {})", count, remaining, bufferCapacity)); } BufferSlotRange r{bufferNext, count}; bufferNext += count; @@ -88,14 +117,45 @@ export namespace Crafter { } SamplerSlotRange AllocateSamplerSlots(std::uint16_t count) { + if (count == 1 && !samplerFreelist.empty()) { + std::uint16_t s = samplerFreelist.back(); + samplerFreelist.pop_back(); + return {s, 1}; + } if (samplerNext + count > samplerCapacity) { - throw std::runtime_error(std::format("DescriptorHeapVulkan: out of sampler slots ({} requested, {} remaining of {})", count, samplerCapacity - samplerNext, samplerCapacity)); + std::uint16_t remaining = static_cast((samplerCapacity - samplerNext) + samplerFreelist.size()); + throw std::runtime_error(std::format("DescriptorHeapVulkan: out of sampler slots ({} requested, {} remaining of {})", count, remaining, samplerCapacity)); } SamplerSlotRange r{samplerNext, count}; samplerNext += count; return r; } + // Return slots to the per-pool freelist. The descriptors at these slots + // are NOT zeroed; the next allocation that reuses a slot overwrites it + // via WriteSampledImageDescriptor / WriteBufferDescriptor / sampler write. + // Caller MUST ensure no in-flight GPU frame still references the freed + // slots — typically vkDeviceWaitIdle (or a scene-switch barrier) before + // freeing. Multi-slot ranges are decomposed into their individual slots. + // noexcept: capacity is reserved at Initialize so push_back never reallocates. + void FreeImageSlots(ImageSlotRange r) noexcept { + for (std::uint16_t i = 0; i < r.count; ++i) { + imageFreelist.push_back(static_cast(r.firstElement + i)); + } + } + + void FreeBufferSlots(BufferSlotRange r) noexcept { + for (std::uint16_t i = 0; i < r.count; ++i) { + bufferFreelist.push_back(static_cast(r.firstElement + i)); + } + } + + void FreeSamplerSlots(SamplerSlotRange r) noexcept { + for (std::uint16_t i = 0; i < r.count; ++i) { + samplerFreelist.push_back(static_cast(r.firstElement + i)); + } + } + std::uint32_t ImageByteOffset(std::uint16_t firstElement) const { return firstElement * Device::descriptorHeapProperties.imageDescriptorSize; } @@ -124,4 +184,146 @@ export namespace Crafter { return bufferStartElement; } }; + + // ─── RAII slot handles ────────────────────────────────────────────── + // + // Move-only owners of a single bindless slot. Destructor returns the slot + // to the heap's freelist (noexcept — Free*Slots can't allocate because + // Initialize reserved the freelist). Implicitly convertible to std::uint16_t + // for use with FillHeader / WriteFooDescriptor / etc.; for buffer slots + // this conversion folds in the heap's bufferStartElement so callers always + // get the absolute heap index that GLSL `descriptor_heap` expects. + // + // Lifetime: the handle MUST be destroyed (or moved-from then destroyed) + // before the DescriptorHeapVulkan it references. The handle MUST NOT be + // destroyed while a frame in-flight on the GPU still references its slot — + // free at scene-switch boundaries (vkDeviceWaitIdle), not mid-frame. + // + // An empty (default-constructed or moved-from) handle is a no-op on + // destruction and converts to the sentinel 0xFFFF. + + struct ImageSlot { + ImageSlot() = default; + ImageSlot(DescriptorHeapVulkan* heap, std::uint16_t raw) noexcept + : heap_(heap), raw_(raw) {} + + ImageSlot(const ImageSlot&) = delete; + ImageSlot& operator=(const ImageSlot&) = delete; + + ImageSlot(ImageSlot&& o) noexcept : heap_(o.heap_), raw_(o.raw_) { + o.heap_ = nullptr; + } + ImageSlot& operator=(ImageSlot&& o) noexcept { + if (this != &o) { + Reset(); + heap_ = o.heap_; + raw_ = o.raw_; + o.heap_ = nullptr; + } + return *this; + } + + ~ImageSlot() noexcept { Reset(); } + + void Reset() noexcept { + if (heap_) { + heap_->FreeImageSlots({raw_, 1}); + heap_ = nullptr; + } + } + + operator std::uint16_t() const noexcept { + return heap_ ? raw_ : std::uint16_t{0xFFFF}; + } + explicit operator bool() const noexcept { return heap_ != nullptr; } + + private: + DescriptorHeapVulkan* heap_ = nullptr; + std::uint16_t raw_ = 0; + }; + + struct BufferSlot { + BufferSlot() = default; + BufferSlot(DescriptorHeapVulkan* heap, std::uint16_t raw) noexcept + : heap_(heap), raw_(raw) {} + + BufferSlot(const BufferSlot&) = delete; + BufferSlot& operator=(const BufferSlot&) = delete; + + BufferSlot(BufferSlot&& o) noexcept : heap_(o.heap_), raw_(o.raw_) { + o.heap_ = nullptr; + } + BufferSlot& operator=(BufferSlot&& o) noexcept { + if (this != &o) { + Reset(); + heap_ = o.heap_; + raw_ = o.raw_; + o.heap_ = nullptr; + } + return *this; + } + + ~BufferSlot() noexcept { Reset(); } + + void Reset() noexcept { + if (heap_) { + heap_->FreeBufferSlots({raw_, 1}); + heap_ = nullptr; + } + } + + // Absolute heap index = bufferStartElement + raw. GLSL descriptor_heap + // SSBOs are indexed in buffer-descriptor units from heap byte 0; the + // raw bump-allocator index is relative to the buffer region's start. + operator std::uint16_t() const noexcept { + return heap_ + ? static_cast(heap_->bufferStartElement + raw_) + : std::uint16_t{0xFFFF}; + } + explicit operator bool() const noexcept { return heap_ != nullptr; } + + private: + DescriptorHeapVulkan* heap_ = nullptr; + std::uint16_t raw_ = 0; + }; + + struct SamplerSlot { + SamplerSlot() = default; + SamplerSlot(DescriptorHeapVulkan* heap, std::uint16_t raw) noexcept + : heap_(heap), raw_(raw) {} + + SamplerSlot(const SamplerSlot&) = delete; + SamplerSlot& operator=(const SamplerSlot&) = delete; + + SamplerSlot(SamplerSlot&& o) noexcept : heap_(o.heap_), raw_(o.raw_) { + o.heap_ = nullptr; + } + SamplerSlot& operator=(SamplerSlot&& o) noexcept { + if (this != &o) { + Reset(); + heap_ = o.heap_; + raw_ = o.raw_; + o.heap_ = nullptr; + } + return *this; + } + + ~SamplerSlot() noexcept { Reset(); } + + void Reset() noexcept { + if (heap_) { + heap_->FreeSamplerSlots({raw_, 1}); + heap_ = nullptr; + } + } + + operator std::uint16_t() const noexcept { + return heap_ ? raw_ : std::uint16_t{0xFFFF}; + } + explicit operator bool() const noexcept { return heap_ != nullptr; } + + private: + DescriptorHeapVulkan* heap_ = nullptr; + std::uint16_t raw_ = 0; + }; } \ No newline at end of file diff --git a/interfaces/Crafter.Graphics-UI.cppm b/interfaces/Crafter.Graphics-UI.cppm index 9c12c69..a733e24 100644 --- a/interfaces/Crafter.Graphics-UI.cppm +++ b/interfaces/Crafter.Graphics-UI.cppm @@ -208,24 +208,27 @@ export namespace Crafter { // Allocates a heap slot for the buffer and writes its descriptor into // every per-frame heap. The user's mapped buffer is shared across // frames — fine because Window::Render currently waits idle before - // submitting the next frame. Returns the slot index for use in headers. + // submitting the next frame. Returns a move-only BufferSlot handle + // whose destructor returns the slot to the heap. Implicitly converts + // to the absolute heap index when passed to FillHeader / Dispatch*. template - std::uint16_t RegisterBuffer(VulkanBuffer& buffer); + BufferSlot RegisterBuffer(VulkanBuffer& buffer); // Same for an ImageVulkan-managed sampled image (e.g. a user texture). // Caller specifies the layout the image will be sampled in (typically // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL). template - std::uint16_t RegisterImage(ImageVulkan& image, VkFormat format, - VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + ImageSlot RegisterImage(ImageVulkan& image, VkFormat format, + VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // Allocates a sampler slot and writes a VkSamplerCreateInfo into // every per-frame sampler heap. v1 takes the create-info inline. - std::uint16_t RegisterSampler(const VkSamplerCreateInfo& info); + SamplerSlot RegisterSampler(const VkSamplerCreateInfo& info); - // Convenience: a linear-filter, clamp-to-edge sampler. Returns the - // slot. Useful for the FontAtlas and most plain image sampling. - std::uint16_t RegisterLinearClampSampler(); + // Convenience: a linear-filter, clamp-to-edge sampler. Returns a + // SamplerSlot handle. Useful for the FontAtlas and most plain image + // sampling. + SamplerSlot RegisterLinearClampSampler(); // Shapes a UTF-8 string into glyph quads at (x, y) baseline. Calls // FontAtlas::Ensure for each codepoint (rasterising on first use), @@ -252,14 +255,14 @@ export namespace Crafter { // heap, that slot points at THAT frame's swapchain image. So the // shader's `uiImages[hdr.outImage]` is always the current frame's // swapchain image regardless of which heap is bound. - std::uint16_t outImageSlot_ = 0; + ImageSlot outImageSlot_; // Stable VkImageViewCreateInfos for the descriptor heap to ingest. // These must outlive the write call. VkImageViewCreateInfo atlasViewCreateInfo_{}; - std::uint16_t fontAtlasImageSlot_ = 0xFFFF; - std::uint16_t fontAtlasSamplerSlot_ = 0xFFFF; + ImageSlot fontAtlasImageSlot_; + SamplerSlot fontAtlasSamplerSlot_; bool firstDispatchThisFrame_ = true; @@ -279,19 +282,17 @@ export namespace Crafter { // ─── template-method implementations ──────────────────────────────── template - std::uint16_t UIRenderer::RegisterBuffer(VulkanBuffer& buffer) { + BufferSlot UIRenderer::RegisterBuffer(VulkanBuffer& buffer) { auto range = heap_->AllocateBufferSlots(1); WriteBufferDescriptor(range.firstElement, buffer.address, buffer.size); - // GLSL `descriptor_heap` indexes buffer-typed views in buffer-descriptor - // units from heap byte 0; the actual buffer region starts past the - // image region at `bufferStartElement`. Return the absolute index so - // the user just hands it to FillHeader without thinking about it. - return static_cast(heap_->bufferStartElement + range.firstElement); + // BufferSlot's operator uint16_t() folds in heap_->bufferStartElement, + // so callers receive the absolute heap index when they convert. + return BufferSlot{heap_, range.firstElement}; } template - std::uint16_t UIRenderer::RegisterImage(ImageVulkan& image, VkFormat format, - VkImageLayout layout) { + ImageSlot UIRenderer::RegisterImage(ImageVulkan& image, VkFormat format, + VkImageLayout layout) { auto range = heap_->AllocateImageSlots(1); // Build a stable view-create-info that lives as long as the heap reads @@ -318,6 +319,6 @@ export namespace Crafter { }, }; WriteSampledImageDescriptor(range.firstElement, info, layout); - return range.firstElement; + return ImageSlot{heap_, range.firstElement}; } }