fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21)
Two gaps in the Vulkan RT path that fault the device on the NVIDIA
proprietary driver with a non-trivial pipeline (simple VulkanTriangle
never hit them):
1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit
shader that traces a secondary ray (shadow ray — a very common
pattern) recursed past the pipeline limit (UB → device fault).
PipelineRTVulkan::Init now takes a maxRecursionDepth parameter
(default 1, clamped to the device's maxRayRecursionDepth).
2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader
that reads an accelerationStructureEXT from the heap — including
compute shaders — to read the TLAS device address from a push
constant, but only RTPass pushed that address. A compute shader that
ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten
push slot → garbage AS handle → VK_ERROR_DEVICE_LOST.
WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult
{patched, tlasPushOffset} instead of writing the clobber-prone global
Device::workaroundTlasPushOffset (removed). VulkanShader stores it;
ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and
ComputeShader tracks its own offset and pushes the caller-supplied
TLAS address in Dispatch (new defaulted tlasAddress parameter),
mirroring RTPass::Record.
The PushConstantRewrite regression test now asserts Patch's returned
patched/offset and adds two ray-querying compute-shader cases, proving
the rewrite is stage-agnostic and the per-shader offset is correct.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
2790bbd576
commit
1c310762a7
8 changed files with 248 additions and 75 deletions
|
|
@ -26,7 +26,10 @@ import std;
|
|||
|
||||
using namespace Crafter;
|
||||
|
||||
ComputeShader::ComputeShader(ComputeShader&& other) noexcept : pipeline(other.pipeline) {
|
||||
ComputeShader::ComputeShader(ComputeShader&& other) noexcept
|
||||
: pipeline(other.pipeline),
|
||||
workaroundNeedsTlas(other.workaroundNeedsTlas),
|
||||
workaroundTlasPushOffset(other.workaroundTlasPushOffset) {
|
||||
other.pipeline = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
|
|
@ -36,6 +39,8 @@ ComputeShader& ComputeShader::operator=(ComputeShader&& other) noexcept {
|
|||
vkDestroyPipeline(Device::device, pipeline, nullptr);
|
||||
}
|
||||
pipeline = other.pipeline;
|
||||
workaroundNeedsTlas = other.workaroundNeedsTlas;
|
||||
workaroundTlasPushOffset = other.workaroundTlasPushOffset;
|
||||
other.pipeline = VK_NULL_HANDLE;
|
||||
}
|
||||
return *this;
|
||||
|
|
@ -51,6 +56,13 @@ ComputeShader::~ComputeShader() {
|
|||
void ComputeShader::Load(const std::filesystem::path& spvPath) {
|
||||
VulkanShader shader(spvPath, "main", VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
|
||||
|
||||
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): remember
|
||||
// whether VulkanShader rewrote a heap acceleration-structure read in this
|
||||
// module, and where it expects the TLAS address pushed, so Dispatch can
|
||||
// feed it the per-frame TLAS. Per-shader, not a global — see ComputeShader.
|
||||
workaroundNeedsTlas = shader.patchedAS;
|
||||
workaroundTlasPushOffset = shader.tlasPushOffset;
|
||||
|
||||
// Spec: with VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT, layout MUST be
|
||||
// VK_NULL_HANDLE — bindings come from the bound descriptor heap and push
|
||||
// constants are pushed via vkCmdPushDataEXT instead of vkCmdPushConstants.
|
||||
|
|
@ -77,7 +89,8 @@ void ComputeShader::Dispatch(VkCommandBuffer cmd,
|
|||
const void* push, std::uint32_t pushBytes,
|
||||
std::uint32_t gx,
|
||||
std::uint32_t gy,
|
||||
std::uint32_t gz) const {
|
||||
std::uint32_t gz,
|
||||
VkDeviceAddress tlasAddress) const {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
if (push != nullptr && pushBytes > 0) {
|
||||
VkPushDataInfoEXT pushInfo {
|
||||
|
|
@ -87,5 +100,18 @@ void ComputeShader::Dispatch(VkCommandBuffer cmd,
|
|||
};
|
||||
Device::vkCmdPushDataEXT(cmd, &pushInfo);
|
||||
}
|
||||
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): if this shader
|
||||
// ray-queries the TLAS through the heap it was rewritten to read the TLAS
|
||||
// device address from a push constant; push the caller-supplied address
|
||||
// where the rewrite reads it (after any user payload, or offset 0 if none).
|
||||
// Mirrors RTPass::Record for the RT pipeline. Inert on every other driver.
|
||||
if (Device::workaroundDescriptorHeapAS && workaroundNeedsTlas) {
|
||||
VkPushDataInfoEXT tlasPush {
|
||||
.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT,
|
||||
.offset = workaroundTlasPushOffset,
|
||||
.data = { .address = &tlasAddress, .size = sizeof(tlasAddress) },
|
||||
};
|
||||
Device::vkCmdPushDataEXT(cmd, &tlasPush);
|
||||
}
|
||||
vkCmdDispatch(cmd, gx, gy, gz);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue