Two gaps in the Vulkan RT path that fault the device on the NVIDIA
proprietary driver with a non-trivial pipeline (simple VulkanTriangle
never hit them):
1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit
shader that traces a secondary ray (shadow ray — a very common
pattern) recursed past the pipeline limit (UB → device fault).
PipelineRTVulkan::Init now takes a maxRecursionDepth parameter
(default 1, clamped to the device's maxRayRecursionDepth).
2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader
that reads an accelerationStructureEXT from the heap — including
compute shaders — to read the TLAS device address from a push
constant, but only RTPass pushed that address. A compute shader that
ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten
push slot → garbage AS handle → VK_ERROR_DEVICE_LOST.
WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult
{patched, tlasPushOffset} instead of writing the clobber-prone global
Device::workaroundTlasPushOffset (removed). VulkanShader stores it;
ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and
ComputeShader tracks its own offset and pushes the caller-supplied
TLAS address in Dispatch (new defaulted tlasAddress parameter),
mirroring RTPass::Record.
The PushConstantRewrite regression test now asserts Patch's returned
patched/offset and adds two ray-querying compute-shader cases, proving
the rewrite is stage-agnostic and the per-shader offset is correct.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
80 lines
3.7 KiB
C++
80 lines
3.7 KiB
C++
/*
|
|
Crafter®.Graphics
|
|
Copyright (C) 2026 Catcrafts®
|
|
catcrafts.net
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License version 3.0 as published by the Free Software Foundation;
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
module;
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
|
#include "vulkan/vulkan.h"
|
|
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
|
export module Crafter.Graphics:ComputeShader;
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
|
import std;
|
|
import :Device;
|
|
|
|
export namespace Crafter {
|
|
// Tier 1: thin compute-pipeline wrapper. Owns one VkPipeline created with
|
|
// VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT (no pipeline layout — the
|
|
// bindless heap supplies all bindings, push constants travel via
|
|
// vkCmdPushDataEXT). Use this to dispatch the four standard UI shaders
|
|
// and any user-authored compute shader that follows the ui-shared.glsl
|
|
// contract.
|
|
class ComputeShader {
|
|
public:
|
|
VkPipeline pipeline = VK_NULL_HANDLE;
|
|
|
|
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): set by
|
|
// Load when this shader ray-queries the TLAS through the descriptor
|
|
// heap and was rewritten to read its device address from a push
|
|
// constant. `workaroundTlasPushOffset` is the byte offset of that member
|
|
// (after the caller's own push payload, or 0 if the shader had none).
|
|
// Tracked per-shader — a global is clobbered by whichever shader was
|
|
// patched last. Both inert (false/0) on every other driver.
|
|
bool workaroundNeedsTlas = false;
|
|
std::uint32_t workaroundTlasPushOffset = 0;
|
|
|
|
ComputeShader() = default;
|
|
ComputeShader(const ComputeShader&) = delete;
|
|
ComputeShader& operator=(const ComputeShader&) = delete;
|
|
ComputeShader(ComputeShader&& other) noexcept;
|
|
ComputeShader& operator=(ComputeShader&& other) noexcept;
|
|
~ComputeShader();
|
|
|
|
// Loads a SPIR-V compute shader from disk and creates a pipeline that
|
|
// uses the bindless descriptor-heap binding model.
|
|
void Load(const std::filesystem::path& spvPath);
|
|
|
|
// Bind, push constants (if any), dispatch. Caller computes group counts
|
|
// and is responsible for any inter-dispatch barriers (UIRenderer::Dispatch
|
|
// wraps this with the standard write-after-write barrier).
|
|
//
|
|
// tlasAddress is the NVIDIA descriptor-heap AS-read workaround hook
|
|
// (issue #15 / #7): a shader that ray-queries the TLAS through the
|
|
// descriptor heap is rewritten to read its device address from a push
|
|
// constant, so the caller must supply the active frame's TLAS address
|
|
// (RenderingElement3D::tlases[frameIdx].address) here. It is pushed at
|
|
// the shader's workaroundTlasPushOffset only when the shader was
|
|
// rewritten (workaroundNeedsTlas) — ignored otherwise and on every
|
|
// other driver, so shaders that don't touch an AS pass nothing.
|
|
void Dispatch(VkCommandBuffer cmd,
|
|
const void* push, std::uint32_t pushBytes,
|
|
std::uint32_t gx,
|
|
std::uint32_t gy = 1,
|
|
std::uint32_t gz = 1,
|
|
VkDeviceAddress tlasAddress = 0) const;
|
|
};
|
|
}
|
|
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|