Crafter.Graphics/interfaces/Crafter.Graphics-ShaderVulkan.cppm
catbot 950059c86e fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15)
Reading an acceleration structure through VK_EXT_descriptor_heap aborts
with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension
driver fault isolated in #7 (engine setup is correct and validation-clean;
images/buffers through the same heap work, and both traceRayEXT and inline
rayQuery fault identically on the AS read).

An acceleration structure can equally be reached by its device address via
OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never
touches the faulting heap path. glslang has no GLSL spelling for that
conversion, so VulkanShader rewrites the compiled SPIR-V at module-load
time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS
device address from a synthesized push-constant block followed by the
convert. RTPass pushes the active frame's TLAS address into that push
constant. User GLSL and example code are unchanged; acceleration structures
still bind into the heap normally.

The workaround is gated on Device::workaroundDescriptorHeapAS (true only on
the NVIDIA proprietary driver) and confined to one fenced block in
Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64
feature toggle — delete those once a fixed NVIDIA driver ships and the heap
AS read becomes the direct path again.

Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090),
validation-layer-clean, no device loss. The SPIR-V rewrite was independently
validated with spirv-val on both the VulkanTriangle and Sponza raygen
modules.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00

242 lines
11 KiB
C++

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
module;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
#include "vulkan/vulkan.h"
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
export module Crafter.Graphics:ShaderVulkan;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import :Device;
import :Types;
// ─── BEGIN NVIDIA descriptor-heap AS-read workaround (issue #15 / #7) ─────
// Remove this whole block (and its call below, Device::workaroundDescriptorHeapAS,
// and the RTPass push-data) once NVIDIA ships a driver that fixes the
// VK_EXT_descriptor_heap acceleration-structure read fault.
//
// On the affected driver, reading an `accelerationStructureEXT` out of the
// descriptor heap aborts the device. The build, the heap descriptor write and
// everything else are correct (proven in #7); only the in-shader heap AS read
// is broken — buffers/images through the same heap work. Acceleration
// structures can equally be addressed by their device address, and
// OpConvertUToAccelerationStructureKHR (which reads no descriptor) sidesteps
// the faulting path entirely.
//
// glslang has no GLSL spelling for that conversion, so we rewrite the compiled
// SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a
// load of the TLAS device address from a synthesized push-constant block
// followed by OpConvertUToAccelerationStructureKHR. RTPass pushes the active
// frame's TLAS address into that push constant. Shaders that never touch an
// acceleration structure (no OpTypeAccelerationStructureKHR) are left untouched.
namespace WorkaroundNvidiaAS {
// SPIR-V numeric opcodes / enums used below.
enum : std::uint32_t {
OpEntryPoint = 15, OpCapability = 17,
OpTypeInt = 21, OpTypeStruct = 30, OpTypePointer = 32,
OpConstant = 43, OpVariable = 59, OpLoad = 61, OpAccessChain = 65,
OpDecorate = 71, OpMemberDecorate = 72,
OpConvertUToAccelerationStructureKHR = 4447,
OpTypeAccelerationStructureKHR = 5341,
CapabilityInt64 = 11,
StorageClassPushConstant = 9,
DecorationBlock = 2, DecorationOffset = 35,
};
inline bool IsAnnotation(std::uint32_t op) {
// OpDecorate/OpMemberDecorate/OpDecorationGroup/OpGroupDecorate/
// OpGroupMemberDecorate/OpDecorateId/OpDecorate(Member)String.
return op == 71 || op == 72 || op == 73 || op == 74 || op == 75
|| op == 332 || op == 5632 || op == 5633;
}
using Instr = std::vector<std::uint32_t>;
inline void Patch(std::vector<std::uint32_t>& words) {
if (words.size() < 5) return; // not a SPIR-V module we understand.
// Split header (5 words) from the instruction stream.
std::uint32_t bound = words[3];
std::vector<Instr> instrs;
for (std::size_t i = 5; i < words.size();) {
std::uint32_t len = words[i] >> 16;
if (len == 0 || i + len > words.size()) return; // malformed — bail.
instrs.emplace_back(words.begin() + i, words.begin() + i + len);
i += len;
}
// ── Scan for the AS type, reusable int/long types+constants, and the
// section boundaries we need to insert into.
std::uint32_t asTypeId = 0, ulongTypeId = 0, uintTypeId = 0, uintZeroId = 0;
std::size_t lastCapIdx = 0, lastAnnotIdx = 0, firstFuncIdx = instrs.size();
std::size_t entryIdx = instrs.size();
for (std::size_t k = 0; k < instrs.size(); ++k) {
std::uint32_t op = instrs[k][0] & 0xFFFFu;
switch (op) {
case OpTypeAccelerationStructureKHR: asTypeId = instrs[k][1]; break;
case OpTypeInt:
if (instrs[k][2] == 64 && instrs[k][3] == 0) ulongTypeId = instrs[k][1];
else if (instrs[k][2] == 32 && instrs[k][3] == 0) uintTypeId = instrs[k][1];
break;
case OpConstant:
if (uintTypeId && instrs[k][1] == uintTypeId && instrs[k][3] == 0)
uintZeroId = instrs[k][2];
break;
case OpCapability: lastCapIdx = k; break;
case OpEntryPoint: if (entryIdx == instrs.size()) entryIdx = k; break;
default: break;
}
if (IsAnnotation(op)) lastAnnotIdx = k;
if (op == 54 /*OpFunction*/ && firstFuncIdx == instrs.size()) firstFuncIdx = k;
}
if (asTypeId == 0) return; // shader never reads an acceleration structure.
auto newId = [&] { return bound++; };
auto mk = [](std::initializer_list<std::uint32_t> ops) {
Instr in(ops);
in[0] = static_cast<std::uint32_t>(in.size() << 16) | (in[0] & 0xFFFFu);
return in;
};
// ── Synthesize the types/constants/push-constant we need, reusing any
// the module already defines (SPIR-V forbids duplicate type defs).
std::vector<Instr> typeDefs;
if (uintTypeId == 0) {
uintTypeId = newId();
typeDefs.push_back(mk({OpTypeInt, uintTypeId, 32, 0}));
}
if (uintZeroId == 0) {
uintZeroId = newId();
typeDefs.push_back(mk({OpConstant, uintTypeId, uintZeroId, 0}));
}
if (ulongTypeId == 0) {
ulongTypeId = newId();
typeDefs.push_back(mk({OpTypeInt, ulongTypeId, 64, 0}));
}
std::uint32_t pcStructId = newId();
std::uint32_t ptrPushStructId = newId();
std::uint32_t ptrPushUlongId = newId();
std::uint32_t pcVarId = newId();
typeDefs.push_back(mk({OpTypeStruct, pcStructId, ulongTypeId}));
typeDefs.push_back(mk({OpTypePointer, ptrPushStructId, StorageClassPushConstant, pcStructId}));
typeDefs.push_back(mk({OpTypePointer, ptrPushUlongId, StorageClassPushConstant, ulongTypeId}));
typeDefs.push_back(mk({OpVariable, ptrPushStructId, pcVarId, StorageClassPushConstant}));
std::vector<Instr> decorations = {
mk({OpMemberDecorate, pcStructId, 0, DecorationOffset, 0}),
mk({OpDecorate, pcStructId, DecorationBlock}),
};
// ── Rewrite each `OpLoad %asType <ptr>` into address-load + convert.
std::vector<Instr> rebuilt;
rebuilt.reserve(instrs.size() + 8);
for (const Instr& in : instrs) {
std::uint32_t op = in[0] & 0xFFFFu;
if (op == OpLoad && in[1] == asTypeId) {
std::uint32_t resultId = in[2];
std::uint32_t chainId = newId();
std::uint32_t addrId = newId();
rebuilt.push_back(mk({OpAccessChain, ptrPushUlongId, chainId, pcVarId, uintZeroId}));
rebuilt.push_back(mk({OpLoad, ulongTypeId, addrId, chainId}));
rebuilt.push_back(mk({OpConvertUToAccelerationStructureKHR, asTypeId, resultId, addrId}));
} else {
rebuilt.push_back(in);
}
}
instrs.swap(rebuilt);
// Recompute structural anchors (the rewrite above shifted indices).
lastCapIdx = 0; lastAnnotIdx = 0; firstFuncIdx = instrs.size(); entryIdx = instrs.size();
for (std::size_t k = 0; k < instrs.size(); ++k) {
std::uint32_t op = instrs[k][0] & 0xFFFFu;
if (op == OpCapability) lastCapIdx = k;
if (op == OpEntryPoint && entryIdx == instrs.size()) entryIdx = k;
if (IsAnnotation(op)) lastAnnotIdx = k;
if (op == 54 && firstFuncIdx == instrs.size()) firstFuncIdx = k;
}
// Append the push-constant variable to the entry point's interface
// list (required for SPIR-V ≥ 1.4 — both raygen modules are 1.4).
if (entryIdx != instrs.size() && words[1] >= 0x00010400u) {
instrs[entryIdx].push_back(pcVarId);
instrs[entryIdx][0] = static_cast<std::uint32_t>(instrs[entryIdx].size() << 16)
| OpEntryPoint;
}
// Insert highest-index-first so earlier anchors stay valid.
instrs.insert(instrs.begin() + firstFuncIdx, typeDefs.begin(), typeDefs.end());
instrs.insert(instrs.begin() + lastAnnotIdx + 1, decorations.begin(), decorations.end());
instrs.insert(instrs.begin() + lastCapIdx + 1, mk({OpCapability, CapabilityInt64}));
// ── Reassemble: header (with updated bound) + instruction stream.
std::vector<std::uint32_t> out(words.begin(), words.begin() + 5);
out[3] = bound;
for (const Instr& in : instrs) out.insert(out.end(), in.begin(), in.end());
words.swap(out);
}
}
// ─── END NVIDIA descriptor-heap AS-read workaround ────────────────────────
export namespace Crafter {
class VulkanShader {
public:
std::vector<VkSpecializationMapEntry> specilizations;
VkSpecializationInfo* specilizationInfo;
VkShaderStageFlagBits stage;
std::string entrypoint;
VkShaderModule shader;
VulkanShader(const std::filesystem::path& path, std::string entrypoint, VkShaderStageFlagBits stage, VkSpecializationInfo* specilizationInfo) : stage(stage), entrypoint(entrypoint), specilizationInfo(specilizationInfo) {
std::ifstream file(path, std::ios::binary);
if (!file) {
std::cerr << "Error: Could not open file " << path << std::endl;
}
// Move to the end of the file to determine its size
file.seekg(0, std::ios::end);
std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<std::uint32_t> spirv(size / sizeof(std::uint32_t));
// Read the data into the vector
if (!file.read(reinterpret_cast<char*>(spirv.data()), size)) {
std::cerr << "Error: Could not read data from file" << std::endl;
}
file.close();
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7).
// No-op on every other driver and on shaders that don't read an
// acceleration structure. Remove with the rest of the workaround
// once a fixed NVIDIA driver ships.
if (Device::workaroundDescriptorHeapAS) {
WorkaroundNvidiaAS::Patch(spirv);
}
VkShaderModuleCreateInfo module_info{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
module_info.codeSize = spirv.size() * sizeof(uint32_t);
module_info.pCode = spirv.data();
Device::CheckVkResult(vkCreateShaderModule(Device::device, &module_info, nullptr, &shader));
}
};
}
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM