Crafter.Graphics/interfaces/Crafter.Graphics-PipelineRTVulkan.cppm

137 lines
6.9 KiB
Text
Raw Permalink Normal View History

2026-01-29 00:45:02 +01:00
/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
module;
2026-05-18 02:07:48 +02:00
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
2026-03-02 23:53:13 +01:00
#include "vulkan/vulkan.h"
2026-05-18 02:07:48 +02:00
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
2026-01-29 00:45:02 +01:00
export module Crafter.Graphics:PipelineRTVulkan;
2026-05-18 02:07:48 +02:00
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
2026-01-29 00:45:02 +01:00
import std;
2026-03-09 20:10:19 +01:00
import :Device;
2026-01-29 01:31:17 +01:00
import :VulkanBuffer;
2026-01-31 21:08:42 +01:00
import :ShaderBindingTableVulkan;
2026-01-29 01:31:17 +01:00
import :Types;
2026-01-29 00:45:02 +01:00
export namespace Crafter {
2026-04-05 22:53:59 +02:00
struct PipelineRTVulkan {
2026-02-22 00:46:38 +01:00
VkPipeline pipeline;
std::vector<std::uint8_t> shaderHandles;
2026-04-10 22:26:15 +02:00
VulkanBuffer<std::uint8_t, true> sbtBuffer;
2026-02-22 00:46:38 +01:00
VkStridedDeviceAddressRegionKHR raygenRegion;
VkStridedDeviceAddressRegionKHR missRegion;
VkStridedDeviceAddressRegionKHR hitRegion;
VkStridedDeviceAddressRegionKHR callableRegion;
fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21) Two gaps in the Vulkan RT path that fault the device on the NVIDIA proprietary driver with a non-trivial pipeline (simple VulkanTriangle never hit them): 1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit shader that traces a secondary ray (shadow ray — a very common pattern) recursed past the pipeline limit (UB → device fault). PipelineRTVulkan::Init now takes a maxRecursionDepth parameter (default 1, clamped to the device's maxRayRecursionDepth). 2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader that reads an accelerationStructureEXT from the heap — including compute shaders — to read the TLAS device address from a push constant, but only RTPass pushed that address. A compute shader that ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten push slot → garbage AS handle → VK_ERROR_DEVICE_LOST. WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult {patched, tlasPushOffset} instead of writing the clobber-prone global Device::workaroundTlasPushOffset (removed). VulkanShader stores it; ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and ComputeShader tracks its own offset and pushes the caller-supplied TLAS address in Dispatch (new defaulted tlasAddress parameter), mirroring RTPass::Record. The PushConstantRewrite regression test now asserts Patch's returned patched/offset and adds two ray-querying compute-shader cases, proving the rewrite is stage-agnostic and the per-shader offset is correct. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 18:35:39 +00:00
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): copied
// from the shader table at Init so RTPass can push the active TLAS
// device address into the patched shaders' push constant. Inert on
// every other driver.
bool workaroundNeedsTlas = false;
std::uint32_t workaroundTlasPushOffset = 0;
// maxRecursionDepth: the maximum ray-recursion depth the pipeline must
// support — i.e. the deepest chain of nested traceRayEXT calls. The
// raygen counts as depth 1, so a closest-hit shader that traces a shadow
// ray needs 2. Tracing beyond the value the pipeline was created with is
// undefined behaviour and faults the device, so a consumer with any
// recursion past the raygen must raise this. Defaults to 1 (raygen-only,
// matching the simple examples) and is clamped to the device's
// maxRayRecursionDepth.
void Init(VkCommandBuffer cmd, std::span<VkRayTracingShaderGroupCreateInfoKHR> raygenGroups, std::span<VkRayTracingShaderGroupCreateInfoKHR> missGroups, std::span<VkRayTracingShaderGroupCreateInfoKHR> hitGroups, ShaderBindingTableVulkan& shaderTable, std::uint32_t maxRecursionDepth = 1) {
workaroundNeedsTlas = shaderTable.workaroundNeedsTlas;
workaroundTlasPushOffset = shaderTable.workaroundTlasPushOffset;
2026-02-22 00:46:38 +01:00
std::vector<VkRayTracingShaderGroupCreateInfoKHR> groups;
groups.reserve(raygenGroups.size() + missGroups.size() + hitGroups.size());
groups.insert(groups.end(), raygenGroups.begin(), raygenGroups.end());
groups.insert(groups.end(), missGroups.begin(), missGroups.end());
groups.insert(groups.end(), hitGroups.begin(), hitGroups.end());
2026-04-05 22:53:59 +02:00
VkPipelineCreateFlags2CreateInfo flags2 = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO,
.flags = VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT
};
VkRayTracingPipelineCreateInfoKHR rtPipelineInfo {
2026-02-22 00:46:38 +01:00
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,
2026-04-05 22:53:59 +02:00
.pNext = &flags2,
.flags = 0,
2026-02-22 00:46:38 +01:00
.stageCount = static_cast<std::uint32_t>(shaderTable.shaderStages.size()),
.pStages = shaderTable.shaderStages.data(),
.groupCount = static_cast<std::uint32_t>(groups.size()),
.pGroups = groups.data(),
fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21) Two gaps in the Vulkan RT path that fault the device on the NVIDIA proprietary driver with a non-trivial pipeline (simple VulkanTriangle never hit them): 1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit shader that traces a secondary ray (shadow ray — a very common pattern) recursed past the pipeline limit (UB → device fault). PipelineRTVulkan::Init now takes a maxRecursionDepth parameter (default 1, clamped to the device's maxRayRecursionDepth). 2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader that reads an accelerationStructureEXT from the heap — including compute shaders — to read the TLAS device address from a push constant, but only RTPass pushed that address. A compute shader that ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten push slot → garbage AS handle → VK_ERROR_DEVICE_LOST. WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult {patched, tlasPushOffset} instead of writing the clobber-prone global Device::workaroundTlasPushOffset (removed). VulkanShader stores it; ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and ComputeShader tracks its own offset and pushes the caller-supplied TLAS address in Dispatch (new defaulted tlasAddress parameter), mirroring RTPass::Record. The PushConstantRewrite regression test now asserts Patch's returned patched/offset and adds two ray-querying compute-shader cases, proving the rewrite is stage-agnostic and the per-shader offset is correct. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 18:35:39 +00:00
.maxPipelineRayRecursionDepth = std::min(maxRecursionDepth, Device::rayTracingProperties.maxRayRecursionDepth),
2026-04-05 22:53:59 +02:00
.layout = VK_NULL_HANDLE
2026-02-22 00:46:38 +01:00
};
2026-03-09 20:10:19 +01:00
Device::CheckVkResult(Device::vkCreateRayTracingPipelinesKHR(Device::device, {}, {}, 1, &rtPipelineInfo, nullptr, &pipeline));
2026-02-22 00:46:38 +01:00
2026-03-09 20:10:19 +01:00
std::size_t dataSize = Device::rayTracingProperties.shaderGroupHandleSize * rtPipelineInfo.groupCount;
2026-02-22 00:46:38 +01:00
shaderHandles.resize(dataSize);
2026-03-09 20:10:19 +01:00
Device::CheckVkResult(Device::vkGetRayTracingShaderGroupHandlesKHR(Device::device, pipeline, 0, rtPipelineInfo.groupCount, dataSize, shaderHandles.data()));
2026-02-22 00:46:38 +01:00
2026-03-09 20:10:19 +01:00
std::uint32_t sbtStride = AlignUp(Device::rayTracingProperties.shaderGroupHandleSize, Device::rayTracingProperties.shaderGroupHandleAlignment);
2026-02-22 00:46:38 +01:00
raygenRegion.stride = sbtStride;
raygenRegion.deviceAddress = 0;
raygenRegion.size = raygenGroups.size() * sbtStride;
missRegion.stride = sbtStride;
2026-03-09 20:10:19 +01:00
missRegion.deviceAddress = AlignUp(raygenRegion.size, Device::rayTracingProperties.shaderGroupBaseAlignment);
2026-02-22 00:46:38 +01:00
missRegion.size = missGroups.size() * sbtStride;
hitRegion.stride = sbtStride;
2026-03-09 20:10:19 +01:00
hitRegion.deviceAddress = AlignUp(missRegion.deviceAddress + missRegion.size, Device::rayTracingProperties.shaderGroupBaseAlignment);
2026-02-22 00:46:38 +01:00
hitRegion.size = hitGroups.size() * sbtStride;
std::size_t bufferSize = hitRegion.deviceAddress + hitRegion.size;
sbtBuffer.Create(VK_BUFFER_USAGE_2_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, bufferSize);
std::uint8_t* offset = sbtBuffer.value;
std::uint8_t* handleOffset = shaderHandles.data();
2026-03-09 20:10:19 +01:00
std::memcpy(offset, handleOffset, raygenGroups.size() * Device::rayTracingProperties.shaderGroupHandleSize);
offset += AlignUp(raygenRegion.size, Device::rayTracingProperties.shaderGroupBaseAlignment);
handleOffset += raygenGroups.size() * Device::rayTracingProperties.shaderGroupHandleSize;
2026-02-22 00:46:38 +01:00
2026-03-09 20:10:19 +01:00
std::memcpy(offset, handleOffset, missGroups.size() * Device::rayTracingProperties.shaderGroupHandleSize);
offset += AlignUp(missRegion.size, Device::rayTracingProperties.shaderGroupBaseAlignment);
handleOffset += missGroups.size() * Device::rayTracingProperties.shaderGroupHandleSize;
2026-02-22 00:46:38 +01:00
2026-03-09 20:10:19 +01:00
std::memcpy(offset, handleOffset, hitGroups.size() * Device::rayTracingProperties.shaderGroupHandleSize);
2026-02-22 00:46:38 +01:00
sbtBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR);
raygenRegion.deviceAddress += sbtBuffer.address;
missRegion.deviceAddress += sbtBuffer.address;
hitRegion.deviceAddress += sbtBuffer.address;
callableRegion.deviceAddress = 0;
callableRegion.stride = 0;
callableRegion.size = 0;
}
2026-04-10 20:51:16 +02:00
~PipelineRTVulkan() {
vkDestroyPipeline(Device::device, pipeline, nullptr);
}
2026-02-22 00:46:38 +01:00
};
2026-05-18 02:07:48 +02:00
}
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM