Crafter.Graphics/interfaces/Crafter.Graphics-RTPass.cppm
catbot 1c310762a7 fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21)
Two gaps in the Vulkan RT path that fault the device on the NVIDIA
proprietary driver with a non-trivial pipeline (simple VulkanTriangle
never hit them):

1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit
   shader that traces a secondary ray (shadow ray — a very common
   pattern) recursed past the pipeline limit (UB → device fault).
   PipelineRTVulkan::Init now takes a maxRecursionDepth parameter
   (default 1, clamped to the device's maxRayRecursionDepth).

2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader
   that reads an accelerationStructureEXT from the heap — including
   compute shaders — to read the TLAS device address from a push
   constant, but only RTPass pushed that address. A compute shader that
   ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten
   push slot → garbage AS handle → VK_ERROR_DEVICE_LOST.

   WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult
   {patched, tlasPushOffset} instead of writing the clobber-prone global
   Device::workaroundTlasPushOffset (removed). VulkanShader stores it;
   ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and
   ComputeShader tracks its own offset and pushes the caller-supplied
   TLAS address in Dispatch (new defaulted tlasAddress parameter),
   mirroring RTPass::Record.

The PushConstantRewrite regression test now asserts Patch's returned
patched/offset and adds two ray-querying compute-shader cases, proving
the rewrite is stage-agnostic and the per-shader offset is correct.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 18:35:39 +00:00

121 lines
5.1 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
module;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
#include "vulkan/vulkan.h"
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
export module Crafter.Graphics:RTPass;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import :RenderPass;
import :Window;
import :Device;
import :PipelineRTVulkan;
import :RenderingElement3D;
export namespace Crafter {
struct RTPass : RenderPass {
PipelineRTVulkan* pipeline;
RTPass(PipelineRTVulkan* p) : pipeline(p) {}
void Record(VkCommandBuffer cmd, std::uint32_t frameIdx, Window& window) override {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline->pipeline);
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): feed
// the active frame's TLAS device address into the push-constant
// block that VulkanShader synthesizes, so the rewritten raygen can
// reach the acceleration structure by address instead of through
// the faulting heap descriptor. Inert on every other driver.
if (Device::workaroundDescriptorHeapAS && pipeline->workaroundNeedsTlas) {
VkDeviceAddress tlasAddr = RenderingElement3D::tlases[frameIdx].address;
VkPushDataInfoEXT pushInfo {
.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT,
// Where the rewritten raygen reads the TLAS address: 0 when
// VulkanShader synthesized a fresh block, or the offset of
// the member it appended to the shader's existing block.
// Tracked per-pipeline (copied from the shader table) so a
// later-loaded shader can't clobber it.
.offset = pipeline->workaroundTlasPushOffset,
.data = { .address = &tlasAddr, .size = sizeof(tlasAddr) },
};
Device::vkCmdPushDataEXT(cmd, &pushInfo);
}
Device::vkCmdTraceRaysKHR(cmd,
&pipeline->raygenRegion,
&pipeline->missRegion,
&pipeline->hitRegion,
&pipeline->callableRegion,
window.width, window.height, 1);
}
};
}
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import :RenderPass;
import :Window;
import :WebGPU;
import :PipelineRTWebGPU;
import :RenderingElement3D;
export namespace Crafter {
// DOM-mode RT pass — dispatches the megakernel pipeline at frame Record
// time. Picks up the current TLAS for the frame and the application's
// raygen-side push data (typically empty in v1; pass via window.passes
// wiring if needed later).
struct RTPass : RenderPass {
PipelineRTWebGPU* pipeline;
// Optional per-dispatch push data forwarded after the standard
// RTDispatchHeader. Null means "no extra data".
const void* pushPtr = nullptr;
std::uint32_t pushBytes = 0;
// Resolved WebGPU resource handles for each user binding the
// pipeline was loaded with, in declaration order. The example
// owns the storage (typically a small std::array of u32). Null /
// 0 means "no user bindings".
const void* handlesPtr = nullptr;
std::uint32_t handlesCount = 0;
// Wavefront bounce budget: number of (TRACE; SHADE) iterations.
// 1 = primary rays only; 2 = primary + one continuation/shadow
// bounce; etc. The library unrolls GENERATE; (PREP; TRACE; SHADE)
// ×maxDepth; RESOLVE.
std::uint32_t maxDepth = 1;
RTPass(PipelineRTWebGPU* p) : pipeline(p) {}
void Record(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t frameIdx, Window& window) override {
const std::uint32_t gx = (window.width + 7u) / 8u;
const std::uint32_t gy = (window.height + 7u) / 8u;
auto& tlas = RenderingElement3D::tlases[frameIdx];
WebGPU::wgpuDispatchRT(
pipeline->pipelineHandle,
pushPtr,
static_cast<std::int32_t>(pushBytes),
tlas.buffer.handle,
static_cast<std::int32_t>(tlas.builtInstanceCount),
static_cast<std::int32_t>(gx),
static_cast<std::int32_t>(gy),
handlesPtr,
static_cast<std::int32_t>(handlesCount),
static_cast<std::int32_t>(maxDepth));
}
};
}
#endif // CRAFTER_GRAPHICS_WINDOW_DOM