Two gaps in the Vulkan RT path that fault the device on the NVIDIA
proprietary driver with a non-trivial pipeline (simple VulkanTriangle
never hit them):
1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit
shader that traces a secondary ray (shadow ray — a very common
pattern) recursed past the pipeline limit (UB → device fault).
PipelineRTVulkan::Init now takes a maxRecursionDepth parameter
(default 1, clamped to the device's maxRayRecursionDepth).
2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader
that reads an accelerationStructureEXT from the heap — including
compute shaders — to read the TLAS device address from a push
constant, but only RTPass pushed that address. A compute shader that
ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten
push slot → garbage AS handle → VK_ERROR_DEVICE_LOST.
WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult
{patched, tlasPushOffset} instead of writing the clobber-prone global
Device::workaroundTlasPushOffset (removed). VulkanShader stores it;
ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and
ComputeShader tracks its own offset and pushes the caller-supplied
TLAS address in Dispatch (new defaulted tlasAddress parameter),
mirroring RTPass::Record.
The PushConstantRewrite regression test now asserts Patch's returned
patched/offset and adds two ray-querying compute-shader cases, proving
the rewrite is stage-agnostic and the per-shader offset is correct.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
121 lines
5.1 KiB
C++
121 lines
5.1 KiB
C++
/*
|
||
Crafter®.Graphics
|
||
Copyright (C) 2026 Catcrafts®
|
||
catcrafts.net
|
||
|
||
This library is free software; you can redistribute it and/or
|
||
modify it under the terms of the GNU Lesser General Public
|
||
License version 3.0 as published by the Free Software Foundation;
|
||
|
||
This library is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
Lesser General Public License for more details.
|
||
|
||
You should have received a copy of the GNU Lesser General Public
|
||
License along with this library; if not, write to the Free Software
|
||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
*/
|
||
module;
|
||
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
||
#include "vulkan/vulkan.h"
|
||
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
||
export module Crafter.Graphics:RTPass;
|
||
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
||
import std;
|
||
import :RenderPass;
|
||
import :Window;
|
||
import :Device;
|
||
import :PipelineRTVulkan;
|
||
import :RenderingElement3D;
|
||
|
||
export namespace Crafter {
|
||
struct RTPass : RenderPass {
|
||
PipelineRTVulkan* pipeline;
|
||
|
||
RTPass(PipelineRTVulkan* p) : pipeline(p) {}
|
||
|
||
void Record(VkCommandBuffer cmd, std::uint32_t frameIdx, Window& window) override {
|
||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline->pipeline);
|
||
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): feed
|
||
// the active frame's TLAS device address into the push-constant
|
||
// block that VulkanShader synthesizes, so the rewritten raygen can
|
||
// reach the acceleration structure by address instead of through
|
||
// the faulting heap descriptor. Inert on every other driver.
|
||
if (Device::workaroundDescriptorHeapAS && pipeline->workaroundNeedsTlas) {
|
||
VkDeviceAddress tlasAddr = RenderingElement3D::tlases[frameIdx].address;
|
||
VkPushDataInfoEXT pushInfo {
|
||
.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT,
|
||
// Where the rewritten raygen reads the TLAS address: 0 when
|
||
// VulkanShader synthesized a fresh block, or the offset of
|
||
// the member it appended to the shader's existing block.
|
||
// Tracked per-pipeline (copied from the shader table) so a
|
||
// later-loaded shader can't clobber it.
|
||
.offset = pipeline->workaroundTlasPushOffset,
|
||
.data = { .address = &tlasAddr, .size = sizeof(tlasAddr) },
|
||
};
|
||
Device::vkCmdPushDataEXT(cmd, &pushInfo);
|
||
}
|
||
Device::vkCmdTraceRaysKHR(cmd,
|
||
&pipeline->raygenRegion,
|
||
&pipeline->missRegion,
|
||
&pipeline->hitRegion,
|
||
&pipeline->callableRegion,
|
||
window.width, window.height, 1);
|
||
}
|
||
};
|
||
}
|
||
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
||
|
||
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
|
||
import std;
|
||
import :RenderPass;
|
||
import :Window;
|
||
import :WebGPU;
|
||
import :PipelineRTWebGPU;
|
||
import :RenderingElement3D;
|
||
|
||
export namespace Crafter {
|
||
// DOM-mode RT pass — dispatches the megakernel pipeline at frame Record
|
||
// time. Picks up the current TLAS for the frame and the application's
|
||
// raygen-side push data (typically empty in v1; pass via window.passes
|
||
// wiring if needed later).
|
||
struct RTPass : RenderPass {
|
||
PipelineRTWebGPU* pipeline;
|
||
// Optional per-dispatch push data forwarded after the standard
|
||
// RTDispatchHeader. Null means "no extra data".
|
||
const void* pushPtr = nullptr;
|
||
std::uint32_t pushBytes = 0;
|
||
// Resolved WebGPU resource handles for each user binding the
|
||
// pipeline was loaded with, in declaration order. The example
|
||
// owns the storage (typically a small std::array of u32). Null /
|
||
// 0 means "no user bindings".
|
||
const void* handlesPtr = nullptr;
|
||
std::uint32_t handlesCount = 0;
|
||
// Wavefront bounce budget: number of (TRACE; SHADE) iterations.
|
||
// 1 = primary rays only; 2 = primary + one continuation/shadow
|
||
// bounce; etc. The library unrolls GENERATE; (PREP; TRACE; SHADE)
|
||
// ×maxDepth; RESOLVE.
|
||
std::uint32_t maxDepth = 1;
|
||
|
||
RTPass(PipelineRTWebGPU* p) : pipeline(p) {}
|
||
|
||
void Record(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t frameIdx, Window& window) override {
|
||
const std::uint32_t gx = (window.width + 7u) / 8u;
|
||
const std::uint32_t gy = (window.height + 7u) / 8u;
|
||
auto& tlas = RenderingElement3D::tlases[frameIdx];
|
||
WebGPU::wgpuDispatchRT(
|
||
pipeline->pipelineHandle,
|
||
pushPtr,
|
||
static_cast<std::int32_t>(pushBytes),
|
||
tlas.buffer.handle,
|
||
static_cast<std::int32_t>(tlas.builtInstanceCount),
|
||
static_cast<std::int32_t>(gx),
|
||
static_cast<std::int32_t>(gy),
|
||
handlesPtr,
|
||
static_cast<std::int32_t>(handlesCount),
|
||
static_cast<std::int32_t>(maxDepth));
|
||
}
|
||
};
|
||
}
|
||
#endif // CRAFTER_GRAPHICS_WINDOW_DOM
|