fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21)
Two gaps in the Vulkan RT path that fault the device on the NVIDIA
proprietary driver with a non-trivial pipeline (simple VulkanTriangle
never hit them):
1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit
shader that traces a secondary ray (shadow ray — a very common
pattern) recursed past the pipeline limit (UB → device fault).
PipelineRTVulkan::Init now takes a maxRecursionDepth parameter
(default 1, clamped to the device's maxRayRecursionDepth).
2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader
that reads an accelerationStructureEXT from the heap — including
compute shaders — to read the TLAS device address from a push
constant, but only RTPass pushed that address. A compute shader that
ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten
push slot → garbage AS handle → VK_ERROR_DEVICE_LOST.
WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult
{patched, tlasPushOffset} instead of writing the clobber-prone global
Device::workaroundTlasPushOffset (removed). VulkanShader stores it;
ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and
ComputeShader tracks its own offset and pushes the caller-supplied
TLAS address in Dispatch (new defaulted tlasAddress parameter),
mirroring RTPass::Record.
The PushConstantRewrite regression test now asserts Patch's returned
patched/offset and adds two ray-querying compute-shader cases, proving
the rewrite is stage-agnostic and the per-shader offset is correct.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
2790bbd576
commit
1c310762a7
8 changed files with 248 additions and 75 deletions
|
|
@ -29,7 +29,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||
// them through the real Patch(), and asserts with spirv-val that the result is
|
||||
// valid and contains exactly one push-constant variable — both for shaders
|
||||
// that already have a push constant (merge path) and for those that don't
|
||||
// (synthesize path). It also checks the published TLAS push-constant offset.
|
||||
// (synthesize path). It also checks the returned TLAS push-constant offset.
|
||||
//
|
||||
// It additionally covers ray-querying *compute* shaders (issue #21): the
|
||||
// rewrite is stage-agnostic, and ComputeShader::Dispatch now pushes the TLAS
|
||||
// address at the per-shader offset Patch returns, so a compute shader that
|
||||
// reads an acceleration structure through the descriptor heap must be patched
|
||||
// and report a correct offset exactly like a raygen does.
|
||||
//
|
||||
// Delete this test together with the rest of the workaround once a fixed NVIDIA
|
||||
// driver ships.
|
||||
|
|
@ -89,7 +95,7 @@ struct Case {
|
|||
std::string_view glsl;
|
||||
bool readsAccelStruct; // whether Patch should rewrite anything
|
||||
bool hasExistingPushConst; // whether the source already declares a push block
|
||||
std::uint32_t expectedOffset; // expected Device::workaroundTlasPushOffset (only checked when readsAccelStruct)
|
||||
std::uint32_t expectedOffset; // expected PatchResult::tlasPushOffset (only checked when readsAccelStruct)
|
||||
};
|
||||
|
||||
// Shared raygen scaffolding: a heap AS + heap image, traced and stored to.
|
||||
|
|
@ -156,6 +162,110 @@ std::string BuildSource(const Case& c) {
|
|||
return s;
|
||||
}
|
||||
|
||||
// Compute counterpart of the raygen cases (issue #21): a shader that ray-queries
|
||||
// the heap TLAS via rayQueryEXT. Shares the offset math with the raygen merge
|
||||
// path, so we only need one merge case and one synthesize case to prove compute
|
||||
// stages are handled identically.
|
||||
struct ComputeCase {
|
||||
std::string_view name;
|
||||
std::string_view glsl; // optional push-constant declaration
|
||||
bool hasExistingPushConst;
|
||||
std::uint32_t expectedOffset; // expected PatchResult::tlasPushOffset
|
||||
};
|
||||
|
||||
const std::array<ComputeCase, 2> kComputeCases = {{
|
||||
// No push constant → fresh single-member block synthesized at offset 0.
|
||||
{ "compute-no-push", std::string_view{""}, false, 0 },
|
||||
// Existing block {uint f; @0}; ends at 4, TLAS rounds up to the next 8.
|
||||
{ "compute-merge-uint",
|
||||
std::string_view{"layout(push_constant) uniform PC { uint f; } pc;\n"}, true, 8 },
|
||||
}};
|
||||
|
||||
std::string BuildComputeSource(const ComputeCase& c) {
|
||||
std::string s =
|
||||
"#version 460\n"
|
||||
"#extension GL_EXT_ray_query : enable\n"
|
||||
"#extension GL_EXT_shader_image_load_formatted : enable\n"
|
||||
"#extension GL_EXT_descriptor_heap : enable\n"
|
||||
"#extension GL_EXT_nonuniform_qualifier : enable\n"
|
||||
"layout(descriptor_heap) uniform accelerationStructureEXT topLevelAS[];\n"
|
||||
"layout(descriptor_heap) uniform writeonly image2D image[];\n";
|
||||
s += c.glsl;
|
||||
s += "layout(local_size_x = 64) in;\n";
|
||||
s += "void main() {\n";
|
||||
s += " vec3 origin = vec3(0.0);\n";
|
||||
s += " vec3 dir = vec3(0.0, 0.0, 1.0);\n";
|
||||
s += " rayQueryEXT rq;\n";
|
||||
s += " rayQueryInitializeEXT(rq, topLevelAS[0], gl_RayFlagsNoneEXT, 0xFF, origin, 0.001, dir, 10000.0);\n";
|
||||
s += " while (rayQueryProceedEXT(rq)) {}\n";
|
||||
float pushRef = 0; (void)pushRef;
|
||||
std::string val = c.hasExistingPushConst ? "float(pc.f)" : "1.0";
|
||||
s += " imageStore(image[0], ivec2(gl_GlobalInvocationID.xy), vec4(" + val + "));\n";
|
||||
s += "}\n";
|
||||
return s;
|
||||
}
|
||||
|
||||
// Compile `source` for `stage`, run Patch(), and assert: spirv-val accepts it,
|
||||
// exactly one push-constant variable survives, and Patch reports patched/offset
|
||||
// matching expectations. Returns true on success.
|
||||
bool RunCase(const fs::path& dir, std::string_view name, std::string_view stage,
|
||||
const std::string& source, bool readsAccelStruct,
|
||||
std::uint32_t expectedOffset) {
|
||||
const fs::path glslPath = dir / (std::string(name) + "." + std::string(stage) + ".glsl");
|
||||
const fs::path spvPath = dir / (std::string(name) + ".spv");
|
||||
const fs::path patched = dir / (std::string(name) + ".patched.spv");
|
||||
|
||||
{ std::ofstream f(glslPath); f << source; }
|
||||
|
||||
std::string compile = "glslang --target-env vulkan1.4 -V -S " + std::string(stage)
|
||||
+ " \"" + glslPath.string() + "\" -o \"" + spvPath.string() + "\" > /dev/null";
|
||||
if (RunCommand(compile) != 0) {
|
||||
std::println(std::cerr, "[{}] glslang failed to compile the source shader", name);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::uint32_t> words = ReadSpirv(spvPath);
|
||||
if (words.size() < 5) {
|
||||
std::println(std::cerr, "[{}] could not read compiled SPIR-V", name);
|
||||
return false;
|
||||
}
|
||||
|
||||
WorkaroundNvidiaAS::PatchResult patch = WorkaroundNvidiaAS::Patch(words);
|
||||
WriteSpirv(patched, words);
|
||||
|
||||
// 1. The patched module must pass spirv-val under the engine's flags.
|
||||
std::string validate = "spirv-val \"" + patched.string()
|
||||
+ "\" --relax-block-layout --scalar-block-layout --target-env vulkan1.4";
|
||||
if (RunCommand(validate) != 0) {
|
||||
std::println(std::cerr, "[{}] spirv-val rejected the patched module", name);
|
||||
return false;
|
||||
}
|
||||
|
||||
// 2. Exactly one push-constant variable — the whole point of issue #18.
|
||||
int pcVars = CountPushConstantVariables(words);
|
||||
if (pcVars != 1) {
|
||||
std::println(std::cerr, "[{}] expected exactly 1 push-constant variable, found {}", name, pcVars);
|
||||
return false;
|
||||
}
|
||||
|
||||
// 3. Patch must report it rewrote the shader exactly when it reads an AS.
|
||||
if (patch.patched != readsAccelStruct) {
|
||||
std::println(std::cerr, "[{}] expected patched={}, got {}", name, readsAccelStruct, patch.patched);
|
||||
return false;
|
||||
}
|
||||
|
||||
// 4. The returned TLAS offset must match the expected layout end.
|
||||
if (readsAccelStruct && patch.tlasPushOffset != expectedOffset) {
|
||||
std::println(std::cerr, "[{}] expected TLAS push offset {}, got {}",
|
||||
name, expectedOffset, patch.tlasPushOffset);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::println(std::cout, "[{}] ok (push-constant vars: {}, tlas offset: {})",
|
||||
name, pcVars, readsAccelStruct ? patch.tlasPushOffset : 0u);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
|
|
@ -165,58 +275,14 @@ int main() {
|
|||
|
||||
int failures = 0;
|
||||
for (const Case& c : kCases) {
|
||||
const fs::path glslPath = dir / (std::string(c.name) + ".rgen.glsl");
|
||||
const fs::path spvPath = dir / (std::string(c.name) + ".spv");
|
||||
const fs::path patched = dir / (std::string(c.name) + ".patched.spv");
|
||||
|
||||
{ std::ofstream f(glslPath); f << BuildSource(c); }
|
||||
|
||||
std::string compile = "glslang --target-env vulkan1.4 -V -S rgen \""
|
||||
+ glslPath.string() + "\" -o \"" + spvPath.string() + "\" > /dev/null";
|
||||
if (RunCommand(compile) != 0) {
|
||||
std::println(std::cerr, "[{}] glslang failed to compile the source shader", c.name);
|
||||
if (!RunCase(dir, c.name, "rgen", BuildSource(c), c.readsAccelStruct, c.expectedOffset))
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<std::uint32_t> words = ReadSpirv(spvPath);
|
||||
if (words.size() < 5) {
|
||||
std::println(std::cerr, "[{}] could not read compiled SPIR-V", c.name);
|
||||
}
|
||||
// Ray-querying compute shaders (issue #21) — must be patched and report a
|
||||
// correct per-shader offset just like the raygen cases above.
|
||||
for (const ComputeCase& c : kComputeCases) {
|
||||
if (!RunCase(dir, c.name, "comp", BuildComputeSource(c), /*readsAccelStruct=*/true, c.expectedOffset))
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
|
||||
Device::workaroundTlasPushOffset = 0xDEADBEEFu; // poison so we know Patch set it
|
||||
WorkaroundNvidiaAS::Patch(words);
|
||||
WriteSpirv(patched, words);
|
||||
|
||||
// 1. The patched module must pass spirv-val under the engine's flags.
|
||||
std::string validate = "spirv-val \"" + patched.string()
|
||||
+ "\" --relax-block-layout --scalar-block-layout --target-env vulkan1.4";
|
||||
if (RunCommand(validate) != 0) {
|
||||
std::println(std::cerr, "[{}] spirv-val rejected the patched module", c.name);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Exactly one push-constant variable — the whole point of issue #18.
|
||||
int pcVars = CountPushConstantVariables(words);
|
||||
if (pcVars != 1) {
|
||||
std::println(std::cerr, "[{}] expected exactly 1 push-constant variable, found {}", c.name, pcVars);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. The TLAS offset Patch published must match the expected layout end.
|
||||
if (c.readsAccelStruct && Device::workaroundTlasPushOffset != c.expectedOffset) {
|
||||
std::println(std::cerr, "[{}] expected TLAS push offset {}, got {}",
|
||||
c.name, c.expectedOffset, Device::workaroundTlasPushOffset);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::println(std::cout, "[{}] ok (push-constant vars: {}, tlas offset: {})",
|
||||
c.name, pcVars, c.readsAccelStruct ? Device::workaroundTlasPushOffset : 0u);
|
||||
}
|
||||
|
||||
if (failures != 0) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue