Crafter.Graphics/interfaces/Crafter.Graphics-RTPass.cppm

121 lines
5.1 KiB
Text
Raw Normal View History

2025-11-25 19:54:03 +01:00
/*
Crafter®.Graphics
2026-03-09 20:10:19 +01:00
Copyright (C) 2026 Catcrafts®
2025-11-25 19:54:03 +01:00
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
2026-05-01 23:35:37 +02:00
module;
2026-05-18 02:07:48 +02:00
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
2026-05-01 23:35:37 +02:00
#include "vulkan/vulkan.h"
2026-05-18 02:07:48 +02:00
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
2026-05-01 23:35:37 +02:00
export module Crafter.Graphics:RTPass;
2026-05-18 02:07:48 +02:00
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
2025-11-25 19:54:03 +01:00
import std;
2026-05-01 23:35:37 +02:00
import :RenderPass;
import :Window;
import :Device;
import :PipelineRTVulkan;
fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15) Reading an acceleration structure through VK_EXT_descriptor_heap aborts with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension driver fault isolated in #7 (engine setup is correct and validation-clean; images/buffers through the same heap work, and both traceRayEXT and inline rayQuery fault identically on the AS read). An acceleration structure can equally be reached by its device address via OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never touches the faulting heap path. glslang has no GLSL spelling for that conversion, so VulkanShader rewrites the compiled SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS device address from a synthesized push-constant block followed by the convert. RTPass pushes the active frame's TLAS address into that push constant. User GLSL and example code are unchanged; acceleration structures still bind into the heap normally. The workaround is gated on Device::workaroundDescriptorHeapAS (true only on the NVIDIA proprietary driver) and confined to one fenced block in Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64 feature toggle — delete those once a fixed NVIDIA driver ships and the heap AS read becomes the direct path again. Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090), validation-layer-clean, no device loss. The SPIR-V rewrite was independently validated with spirv-val on both the VulkanTriangle and Sponza raygen modules. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00
import :RenderingElement3D;
2025-11-25 19:54:03 +01:00
export namespace Crafter {
2026-05-01 23:35:37 +02:00
struct RTPass : RenderPass {
PipelineRTVulkan* pipeline;
2026-03-09 20:10:19 +01:00
2026-05-01 23:35:37 +02:00
RTPass(PipelineRTVulkan* p) : pipeline(p) {}
fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15) Reading an acceleration structure through VK_EXT_descriptor_heap aborts with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension driver fault isolated in #7 (engine setup is correct and validation-clean; images/buffers through the same heap work, and both traceRayEXT and inline rayQuery fault identically on the AS read). An acceleration structure can equally be reached by its device address via OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never touches the faulting heap path. glslang has no GLSL spelling for that conversion, so VulkanShader rewrites the compiled SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS device address from a synthesized push-constant block followed by the convert. RTPass pushes the active frame's TLAS address into that push constant. User GLSL and example code are unchanged; acceleration structures still bind into the heap normally. The workaround is gated on Device::workaroundDescriptorHeapAS (true only on the NVIDIA proprietary driver) and confined to one fenced block in Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64 feature toggle — delete those once a fixed NVIDIA driver ships and the heap AS read becomes the direct path again. Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090), validation-layer-clean, no device loss. The SPIR-V rewrite was independently validated with spirv-val on both the VulkanTriangle and Sponza raygen modules. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00
void Record(VkCommandBuffer cmd, std::uint32_t frameIdx, Window& window) override {
2026-05-01 23:35:37 +02:00
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline->pipeline);
fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15) Reading an acceleration structure through VK_EXT_descriptor_heap aborts with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension driver fault isolated in #7 (engine setup is correct and validation-clean; images/buffers through the same heap work, and both traceRayEXT and inline rayQuery fault identically on the AS read). An acceleration structure can equally be reached by its device address via OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never touches the faulting heap path. glslang has no GLSL spelling for that conversion, so VulkanShader rewrites the compiled SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS device address from a synthesized push-constant block followed by the convert. RTPass pushes the active frame's TLAS address into that push constant. User GLSL and example code are unchanged; acceleration structures still bind into the heap normally. The workaround is gated on Device::workaroundDescriptorHeapAS (true only on the NVIDIA proprietary driver) and confined to one fenced block in Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64 feature toggle — delete those once a fixed NVIDIA driver ships and the heap AS read becomes the direct path again. Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090), validation-layer-clean, no device loss. The SPIR-V rewrite was independently validated with spirv-val on both the VulkanTriangle and Sponza raygen modules. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): feed
// the active frame's TLAS device address into the push-constant
// block that VulkanShader synthesizes, so the rewritten raygen can
// reach the acceleration structure by address instead of through
// the faulting heap descriptor. Inert on every other driver.
fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21) Two gaps in the Vulkan RT path that fault the device on the NVIDIA proprietary driver with a non-trivial pipeline (simple VulkanTriangle never hit them): 1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit shader that traces a secondary ray (shadow ray — a very common pattern) recursed past the pipeline limit (UB → device fault). PipelineRTVulkan::Init now takes a maxRecursionDepth parameter (default 1, clamped to the device's maxRayRecursionDepth). 2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader that reads an accelerationStructureEXT from the heap — including compute shaders — to read the TLAS device address from a push constant, but only RTPass pushed that address. A compute shader that ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten push slot → garbage AS handle → VK_ERROR_DEVICE_LOST. WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult {patched, tlasPushOffset} instead of writing the clobber-prone global Device::workaroundTlasPushOffset (removed). VulkanShader stores it; ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and ComputeShader tracks its own offset and pushes the caller-supplied TLAS address in Dispatch (new defaulted tlasAddress parameter), mirroring RTPass::Record. The PushConstantRewrite regression test now asserts Patch's returned patched/offset and adds two ray-querying compute-shader cases, proving the rewrite is stage-agnostic and the per-shader offset is correct. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 18:35:39 +00:00
if (Device::workaroundDescriptorHeapAS && pipeline->workaroundNeedsTlas) {
fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15) Reading an acceleration structure through VK_EXT_descriptor_heap aborts with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension driver fault isolated in #7 (engine setup is correct and validation-clean; images/buffers through the same heap work, and both traceRayEXT and inline rayQuery fault identically on the AS read). An acceleration structure can equally be reached by its device address via OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never touches the faulting heap path. glslang has no GLSL spelling for that conversion, so VulkanShader rewrites the compiled SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS device address from a synthesized push-constant block followed by the convert. RTPass pushes the active frame's TLAS address into that push constant. User GLSL and example code are unchanged; acceleration structures still bind into the heap normally. The workaround is gated on Device::workaroundDescriptorHeapAS (true only on the NVIDIA proprietary driver) and confined to one fenced block in Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64 feature toggle — delete those once a fixed NVIDIA driver ships and the heap AS read becomes the direct path again. Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090), validation-layer-clean, no device loss. The SPIR-V rewrite was independently validated with spirv-val on both the VulkanTriangle and Sponza raygen modules. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00
VkDeviceAddress tlasAddr = RenderingElement3D::tlases[frameIdx].address;
VkPushDataInfoEXT pushInfo {
.sType = VK_STRUCTURE_TYPE_PUSH_DATA_INFO_EXT,
fix(vulkan-rt): merge TLAS push constant into existing block (#18) The NVIDIA descriptor-heap AS-read workaround (#15) rewrote heap acceleration-structure reads into a load of the TLAS device address from a push-constant block. It always *synthesized a new* push-constant block, so any ray-tracing shader that already declared one ended up with two — which SPIR-V forbids ("at most one push constant block statically used per entry point"), and vkCreateShaderModule's spirv-val check rejected: Entry point id '4' uses more than one PushConstant interface. WorkaroundNvidiaAS::Patch now detects an existing PushConstant variable and, when present, appends a single ulong member (the TLAS address) to that block instead of adding a second one, reading the address through the shader's own push-constant variable. The append offset is the end of the user's block, computed from the members' explicit Offset/ArrayStride/ MatrixStride decorations (correct under both scalar and std140 layout) and rounded up to 8. Shaders with no push constant of their own keep getting a freshly synthesized single-member block at offset 0, exactly as before. That offset is published via Device::workaroundTlasPushOffset and RTPass feeds it to vkCmdPushDataEXT so the address lands where the rewritten load reads it (0 for the synthesized case, preserving prior behaviour). Verified on the affected driver (NVIDIA 610.43.02, RTX 4090): VulkanTriangle ray-traces correctly and validation-clean both with and without a user-declared raygen push constant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 02:28:02 +00:00
// Where the rewritten raygen reads the TLAS address: 0 when
// VulkanShader synthesized a fresh block, or the offset of
// the member it appended to the shader's existing block.
fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21) Two gaps in the Vulkan RT path that fault the device on the NVIDIA proprietary driver with a non-trivial pipeline (simple VulkanTriangle never hit them): 1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit shader that traces a secondary ray (shadow ray — a very common pattern) recursed past the pipeline limit (UB → device fault). PipelineRTVulkan::Init now takes a maxRecursionDepth parameter (default 1, clamped to the device's maxRayRecursionDepth). 2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader that reads an accelerationStructureEXT from the heap — including compute shaders — to read the TLAS device address from a push constant, but only RTPass pushed that address. A compute shader that ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten push slot → garbage AS handle → VK_ERROR_DEVICE_LOST. WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult {patched, tlasPushOffset} instead of writing the clobber-prone global Device::workaroundTlasPushOffset (removed). VulkanShader stores it; ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and ComputeShader tracks its own offset and pushes the caller-supplied TLAS address in Dispatch (new defaulted tlasAddress parameter), mirroring RTPass::Record. The PushConstantRewrite regression test now asserts Patch's returned patched/offset and adds two ray-querying compute-shader cases, proving the rewrite is stage-agnostic and the per-shader offset is correct. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 18:35:39 +00:00
// Tracked per-pipeline (copied from the shader table) so a
// later-loaded shader can't clobber it.
.offset = pipeline->workaroundTlasPushOffset,
fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15) Reading an acceleration structure through VK_EXT_descriptor_heap aborts with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension driver fault isolated in #7 (engine setup is correct and validation-clean; images/buffers through the same heap work, and both traceRayEXT and inline rayQuery fault identically on the AS read). An acceleration structure can equally be reached by its device address via OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never touches the faulting heap path. glslang has no GLSL spelling for that conversion, so VulkanShader rewrites the compiled SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS device address from a synthesized push-constant block followed by the convert. RTPass pushes the active frame's TLAS address into that push constant. User GLSL and example code are unchanged; acceleration structures still bind into the heap normally. The workaround is gated on Device::workaroundDescriptorHeapAS (true only on the NVIDIA proprietary driver) and confined to one fenced block in Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64 feature toggle — delete those once a fixed NVIDIA driver ships and the heap AS read becomes the direct path again. Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090), validation-layer-clean, no device loss. The SPIR-V rewrite was independently validated with spirv-val on both the VulkanTriangle and Sponza raygen modules. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00
.data = { .address = &tlasAddr, .size = sizeof(tlasAddr) },
};
Device::vkCmdPushDataEXT(cmd, &pushInfo);
}
2026-05-01 23:35:37 +02:00
Device::vkCmdTraceRaysKHR(cmd,
&pipeline->raygenRegion,
&pipeline->missRegion,
&pipeline->hitRegion,
&pipeline->callableRegion,
window.width, window.height, 1);
}
2025-11-25 19:54:03 +01:00
};
2026-05-01 23:35:37 +02:00
}
2026-05-18 02:07:48 +02:00
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
2026-05-18 18:43:30 +02:00
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import :RenderPass;
import :Window;
import :WebGPU;
import :PipelineRTWebGPU;
import :RenderingElement3D;
export namespace Crafter {
// DOM-mode RT pass — dispatches the megakernel pipeline at frame Record
// time. Picks up the current TLAS for the frame and the application's
// raygen-side push data (typically empty in v1; pass via window.passes
// wiring if needed later).
struct RTPass : RenderPass {
PipelineRTWebGPU* pipeline;
// Optional per-dispatch push data forwarded after the standard
// RTDispatchHeader. Null means "no extra data".
const void* pushPtr = nullptr;
std::uint32_t pushBytes = 0;
2026-05-19 00:27:09 +02:00
// Resolved WebGPU resource handles for each user binding the
// pipeline was loaded with, in declaration order. The example
// owns the storage (typically a small std::array of u32). Null /
// 0 means "no user bindings".
const void* handlesPtr = nullptr;
std::uint32_t handlesCount = 0;
// Wavefront bounce budget: number of (TRACE; SHADE) iterations.
// 1 = primary rays only; 2 = primary + one continuation/shadow
// bounce; etc. The library unrolls GENERATE; (PREP; TRACE; SHADE)
// ×maxDepth; RESOLVE.
std::uint32_t maxDepth = 1;
2026-05-18 18:43:30 +02:00
RTPass(PipelineRTWebGPU* p) : pipeline(p) {}
void Record(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t frameIdx, Window& window) override {
const std::uint32_t gx = (window.width + 7u) / 8u;
const std::uint32_t gy = (window.height + 7u) / 8u;
auto& tlas = RenderingElement3D::tlases[frameIdx];
WebGPU::wgpuDispatchRT(
pipeline->pipelineHandle,
pushPtr,
static_cast<std::int32_t>(pushBytes),
tlas.buffer.handle,
static_cast<std::int32_t>(tlas.builtInstanceCount),
static_cast<std::int32_t>(gx),
2026-05-19 00:27:09 +02:00
static_cast<std::int32_t>(gy),
handlesPtr,
static_cast<std::int32_t>(handlesCount),
static_cast<std::int32_t>(maxDepth));
2026-05-18 18:43:30 +02:00
}
};
}
#endif // CRAFTER_GRAPHICS_WINDOW_DOM