/* Crafter®.Graphics Copyright (C) 2026 Catcrafts® catcrafts.net This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 3.0 as published by the Free Software Foundation; This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ module; #ifndef CRAFTER_GRAPHICS_WINDOW_DOM #include "vulkan/vulkan.h" #endif // !CRAFTER_GRAPHICS_WINDOW_DOM export module Crafter.Graphics:ShaderVulkan; #ifndef CRAFTER_GRAPHICS_WINDOW_DOM import std; import :Device; import :Types; // ─── BEGIN NVIDIA descriptor-heap AS-read workaround (issue #15 / #7) ───── // Remove this whole block (and its call below, Device::workaroundDescriptorHeapAS, // and the RTPass push-data) once NVIDIA ships a driver that fixes the // VK_EXT_descriptor_heap acceleration-structure read fault. // // On the affected driver, reading an `accelerationStructureEXT` out of the // descriptor heap aborts the device. The build, the heap descriptor write and // everything else are correct (proven in #7); only the in-shader heap AS read // is broken — buffers/images through the same heap work. Acceleration // structures can equally be addressed by their device address, and // OpConvertUToAccelerationStructureKHR (which reads no descriptor) sidesteps // the faulting path entirely. // // glslang has no GLSL spelling for that conversion, so we rewrite the compiled // SPIR-V at module-load time: every `OpLoad %accelStruct ` becomes a // load of the TLAS device address from a synthesized push-constant block // followed by OpConvertUToAccelerationStructureKHR. RTPass pushes the active // frame's TLAS address into that push constant. Shaders that never touch an // acceleration structure (no OpTypeAccelerationStructureKHR) are left untouched. namespace WorkaroundNvidiaAS { // SPIR-V numeric opcodes / enums used below. enum : std::uint32_t { OpEntryPoint = 15, OpCapability = 17, OpTypeInt = 21, OpTypeStruct = 30, OpTypePointer = 32, OpConstant = 43, OpVariable = 59, OpLoad = 61, OpAccessChain = 65, OpDecorate = 71, OpMemberDecorate = 72, OpConvertUToAccelerationStructureKHR = 4447, OpTypeAccelerationStructureKHR = 5341, CapabilityInt64 = 11, StorageClassPushConstant = 9, DecorationBlock = 2, DecorationOffset = 35, }; inline bool IsAnnotation(std::uint32_t op) { // OpDecorate/OpMemberDecorate/OpDecorationGroup/OpGroupDecorate/ // OpGroupMemberDecorate/OpDecorateId/OpDecorate(Member)String. return op == 71 || op == 72 || op == 73 || op == 74 || op == 75 || op == 332 || op == 5632 || op == 5633; } using Instr = std::vector; inline void Patch(std::vector& words) { if (words.size() < 5) return; // not a SPIR-V module we understand. // Split header (5 words) from the instruction stream. std::uint32_t bound = words[3]; std::vector instrs; for (std::size_t i = 5; i < words.size();) { std::uint32_t len = words[i] >> 16; if (len == 0 || i + len > words.size()) return; // malformed — bail. instrs.emplace_back(words.begin() + i, words.begin() + i + len); i += len; } // ── Scan for the AS type, reusable int/long types+constants, and the // section boundaries we need to insert into. std::uint32_t asTypeId = 0, ulongTypeId = 0, uintTypeId = 0, uintZeroId = 0; std::size_t lastCapIdx = 0, lastAnnotIdx = 0, firstFuncIdx = instrs.size(); std::size_t entryIdx = instrs.size(); for (std::size_t k = 0; k < instrs.size(); ++k) { std::uint32_t op = instrs[k][0] & 0xFFFFu; switch (op) { case OpTypeAccelerationStructureKHR: asTypeId = instrs[k][1]; break; case OpTypeInt: if (instrs[k][2] == 64 && instrs[k][3] == 0) ulongTypeId = instrs[k][1]; else if (instrs[k][2] == 32 && instrs[k][3] == 0) uintTypeId = instrs[k][1]; break; case OpConstant: if (uintTypeId && instrs[k][1] == uintTypeId && instrs[k][3] == 0) uintZeroId = instrs[k][2]; break; case OpCapability: lastCapIdx = k; break; case OpEntryPoint: if (entryIdx == instrs.size()) entryIdx = k; break; default: break; } if (IsAnnotation(op)) lastAnnotIdx = k; if (op == 54 /*OpFunction*/ && firstFuncIdx == instrs.size()) firstFuncIdx = k; } if (asTypeId == 0) return; // shader never reads an acceleration structure. auto newId = [&] { return bound++; }; auto mk = [](std::initializer_list ops) { Instr in(ops); in[0] = static_cast(in.size() << 16) | (in[0] & 0xFFFFu); return in; }; // ── Synthesize the types/constants/push-constant we need, reusing any // the module already defines (SPIR-V forbids duplicate type defs). std::vector typeDefs; if (uintTypeId == 0) { uintTypeId = newId(); typeDefs.push_back(mk({OpTypeInt, uintTypeId, 32, 0})); } if (uintZeroId == 0) { uintZeroId = newId(); typeDefs.push_back(mk({OpConstant, uintTypeId, uintZeroId, 0})); } if (ulongTypeId == 0) { ulongTypeId = newId(); typeDefs.push_back(mk({OpTypeInt, ulongTypeId, 64, 0})); } std::uint32_t pcStructId = newId(); std::uint32_t ptrPushStructId = newId(); std::uint32_t ptrPushUlongId = newId(); std::uint32_t pcVarId = newId(); typeDefs.push_back(mk({OpTypeStruct, pcStructId, ulongTypeId})); typeDefs.push_back(mk({OpTypePointer, ptrPushStructId, StorageClassPushConstant, pcStructId})); typeDefs.push_back(mk({OpTypePointer, ptrPushUlongId, StorageClassPushConstant, ulongTypeId})); typeDefs.push_back(mk({OpVariable, ptrPushStructId, pcVarId, StorageClassPushConstant})); std::vector decorations = { mk({OpMemberDecorate, pcStructId, 0, DecorationOffset, 0}), mk({OpDecorate, pcStructId, DecorationBlock}), }; // ── Rewrite each `OpLoad %asType ` into address-load + convert. std::vector rebuilt; rebuilt.reserve(instrs.size() + 8); for (const Instr& in : instrs) { std::uint32_t op = in[0] & 0xFFFFu; if (op == OpLoad && in[1] == asTypeId) { std::uint32_t resultId = in[2]; std::uint32_t chainId = newId(); std::uint32_t addrId = newId(); rebuilt.push_back(mk({OpAccessChain, ptrPushUlongId, chainId, pcVarId, uintZeroId})); rebuilt.push_back(mk({OpLoad, ulongTypeId, addrId, chainId})); rebuilt.push_back(mk({OpConvertUToAccelerationStructureKHR, asTypeId, resultId, addrId})); } else { rebuilt.push_back(in); } } instrs.swap(rebuilt); // Recompute structural anchors (the rewrite above shifted indices). lastCapIdx = 0; lastAnnotIdx = 0; firstFuncIdx = instrs.size(); entryIdx = instrs.size(); for (std::size_t k = 0; k < instrs.size(); ++k) { std::uint32_t op = instrs[k][0] & 0xFFFFu; if (op == OpCapability) lastCapIdx = k; if (op == OpEntryPoint && entryIdx == instrs.size()) entryIdx = k; if (IsAnnotation(op)) lastAnnotIdx = k; if (op == 54 && firstFuncIdx == instrs.size()) firstFuncIdx = k; } // Append the push-constant variable to the entry point's interface // list (required for SPIR-V ≥ 1.4 — both raygen modules are 1.4). if (entryIdx != instrs.size() && words[1] >= 0x00010400u) { instrs[entryIdx].push_back(pcVarId); instrs[entryIdx][0] = static_cast(instrs[entryIdx].size() << 16) | OpEntryPoint; } // Insert highest-index-first so earlier anchors stay valid. instrs.insert(instrs.begin() + firstFuncIdx, typeDefs.begin(), typeDefs.end()); instrs.insert(instrs.begin() + lastAnnotIdx + 1, decorations.begin(), decorations.end()); instrs.insert(instrs.begin() + lastCapIdx + 1, mk({OpCapability, CapabilityInt64})); // ── Reassemble: header (with updated bound) + instruction stream. std::vector out(words.begin(), words.begin() + 5); out[3] = bound; for (const Instr& in : instrs) out.insert(out.end(), in.begin(), in.end()); words.swap(out); } } // ─── END NVIDIA descriptor-heap AS-read workaround ──────────────────────── export namespace Crafter { class VulkanShader { public: std::vector specilizations; VkSpecializationInfo* specilizationInfo; VkShaderStageFlagBits stage; std::string entrypoint; VkShaderModule shader; VulkanShader(const std::filesystem::path& path, std::string entrypoint, VkShaderStageFlagBits stage, VkSpecializationInfo* specilizationInfo) : stage(stage), entrypoint(entrypoint), specilizationInfo(specilizationInfo) { std::ifstream file(path, std::ios::binary); if (!file) { std::cerr << "Error: Could not open file " << path << std::endl; } // Move to the end of the file to determine its size file.seekg(0, std::ios::end); std::streamsize size = file.tellg(); file.seekg(0, std::ios::beg); std::vector spirv(size / sizeof(std::uint32_t)); // Read the data into the vector if (!file.read(reinterpret_cast(spirv.data()), size)) { std::cerr << "Error: Could not read data from file" << std::endl; } file.close(); // NVIDIA descriptor-heap AS-read workaround (issue #15 / #7). // No-op on every other driver and on shaders that don't read an // acceleration structure. Remove with the rest of the workaround // once a fixed NVIDIA driver ships. if (Device::workaroundDescriptorHeapAS) { WorkaroundNvidiaAS::Patch(spirv); } VkShaderModuleCreateInfo module_info{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO}; module_info.codeSize = spirv.size() * sizeof(uint32_t); module_info.pCode = spirv.data(); Device::CheckVkResult(vkCreateShaderModule(Device::device, &module_info, nullptr, &shader)); } }; } #endif // !CRAFTER_GRAPHICS_WINDOW_DOM