2026-01-28 23:37:12 +01:00
|
|
|
|
/*
|
|
|
|
|
|
Crafter®.Graphics
|
|
|
|
|
|
Copyright (C) 2026 Catcrafts®
|
|
|
|
|
|
catcrafts.net
|
|
|
|
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
|
|
License version 3.0 as published by the Free Software Foundation;
|
|
|
|
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
module;
|
2026-05-18 02:07:48 +02:00
|
|
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-03-02 23:53:13 +01:00
|
|
|
|
#include "vulkan/vulkan.h"
|
2026-05-18 02:07:48 +02:00
|
|
|
|
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-01-28 23:37:12 +01:00
|
|
|
|
export module Crafter.Graphics:ShaderVulkan;
|
2026-05-18 02:07:48 +02:00
|
|
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-01-28 23:37:12 +01:00
|
|
|
|
import std;
|
2026-03-09 20:10:19 +01:00
|
|
|
|
import :Device;
|
2026-01-28 23:37:12 +01:00
|
|
|
|
import :Types;
|
|
|
|
|
|
|
2026-06-03 01:59:54 +00:00
|
|
|
|
// ─── BEGIN NVIDIA descriptor-heap AS-read workaround (issue #15 / #7) ─────
|
|
|
|
|
|
// Remove this whole block (and its call below, Device::workaroundDescriptorHeapAS,
|
|
|
|
|
|
// and the RTPass push-data) once NVIDIA ships a driver that fixes the
|
|
|
|
|
|
// VK_EXT_descriptor_heap acceleration-structure read fault.
|
|
|
|
|
|
//
|
|
|
|
|
|
// On the affected driver, reading an `accelerationStructureEXT` out of the
|
|
|
|
|
|
// descriptor heap aborts the device. The build, the heap descriptor write and
|
|
|
|
|
|
// everything else are correct (proven in #7); only the in-shader heap AS read
|
|
|
|
|
|
// is broken — buffers/images through the same heap work. Acceleration
|
|
|
|
|
|
// structures can equally be addressed by their device address, and
|
|
|
|
|
|
// OpConvertUToAccelerationStructureKHR (which reads no descriptor) sidesteps
|
|
|
|
|
|
// the faulting path entirely.
|
|
|
|
|
|
//
|
|
|
|
|
|
// glslang has no GLSL spelling for that conversion, so we rewrite the compiled
|
|
|
|
|
|
// SPIR-V at module-load time: every `OpLoad %accelStruct <heap-ptr>` becomes a
|
2026-06-03 02:28:02 +00:00
|
|
|
|
// load of the TLAS device address from a push-constant block followed by
|
|
|
|
|
|
// OpConvertUToAccelerationStructureKHR. RTPass pushes the active frame's TLAS
|
|
|
|
|
|
// address into that push constant. Shaders that never touch an acceleration
|
|
|
|
|
|
// structure (no OpTypeAccelerationStructureKHR) are left untouched.
|
|
|
|
|
|
//
|
|
|
|
|
|
// SPIR-V allows at most one push-constant variable per entry point, so we never
|
|
|
|
|
|
// add a second one: if the shader already declares a push-constant block we
|
|
|
|
|
|
// append a ulong member (the TLAS address) to the *existing* block and read
|
|
|
|
|
|
// from there; only shaders with no push constant of their own get a freshly
|
|
|
|
|
|
// synthesized single-member block. Its byte offset is the offset of that
|
2026-06-03 18:35:39 +00:00
|
|
|
|
// member, returned in PatchResult::tlasPushOffset so the caller (RTPass for the
|
|
|
|
|
|
// RT pipeline, ComputeShader::Dispatch for a compute pipeline) can feed it to
|
|
|
|
|
|
// vkCmdPushDataEXT — landing the address exactly where the rewritten load reads
|
|
|
|
|
|
// it. The offset is per-shader rather than a global: a global is clobbered by
|
|
|
|
|
|
// whichever shader was patched last and so cannot serve several shaders whose
|
|
|
|
|
|
// push-constant layouts differ.
|
2026-06-03 02:28:02 +00:00
|
|
|
|
//
|
|
|
|
|
|
// Exported so tests/PushConstantRewrite can drive Patch() over real compiled
|
|
|
|
|
|
// SPIR-V and check the result with spirv-val; nothing in the engine calls it
|
|
|
|
|
|
// from outside this file. Goes away with the rest of the workaround.
|
|
|
|
|
|
export namespace WorkaroundNvidiaAS {
|
2026-06-03 01:59:54 +00:00
|
|
|
|
// SPIR-V numeric opcodes / enums used below.
|
|
|
|
|
|
enum : std::uint32_t {
|
|
|
|
|
|
OpEntryPoint = 15, OpCapability = 17,
|
2026-06-03 02:28:02 +00:00
|
|
|
|
OpTypeInt = 21, OpTypeFloat = 22, OpTypeVector = 23, OpTypeMatrix = 24,
|
|
|
|
|
|
OpTypeArray = 28, OpTypeStruct = 30, OpTypePointer = 32,
|
2026-06-03 01:59:54 +00:00
|
|
|
|
OpConstant = 43, OpVariable = 59, OpLoad = 61, OpAccessChain = 65,
|
|
|
|
|
|
OpDecorate = 71, OpMemberDecorate = 72,
|
|
|
|
|
|
OpConvertUToAccelerationStructureKHR = 4447,
|
|
|
|
|
|
OpTypeAccelerationStructureKHR = 5341,
|
|
|
|
|
|
CapabilityInt64 = 11,
|
|
|
|
|
|
StorageClassPushConstant = 9,
|
2026-06-03 02:28:02 +00:00
|
|
|
|
DecorationBlock = 2, DecorationMatrixStride = 7,
|
|
|
|
|
|
DecorationArrayStride = 6, DecorationOffset = 35,
|
2026-06-03 01:59:54 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
inline bool IsAnnotation(std::uint32_t op) {
|
|
|
|
|
|
// OpDecorate/OpMemberDecorate/OpDecorationGroup/OpGroupDecorate/
|
|
|
|
|
|
// OpGroupMemberDecorate/OpDecorateId/OpDecorate(Member)String.
|
|
|
|
|
|
return op == 71 || op == 72 || op == 73 || op == 74 || op == 75
|
|
|
|
|
|
|| op == 332 || op == 5632 || op == 5633;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
using Instr = std::vector<std::uint32_t>;
|
|
|
|
|
|
|
2026-06-03 02:28:02 +00:00
|
|
|
|
inline std::uint32_t AlignUp(std::uint32_t v, std::uint32_t a) {
|
|
|
|
|
|
return (v + a - 1u) & ~(a - 1u);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 18:35:39 +00:00
|
|
|
|
// Outcome of patching one shader module. `patched` is true only when the
|
|
|
|
|
|
// shader read an acceleration structure and was rewritten; `tlasPushOffset`
|
|
|
|
|
|
// is then the byte offset of the TLAS-address member in the (possibly
|
|
|
|
|
|
// pre-existing) push-constant block the caller must write.
|
|
|
|
|
|
struct PatchResult {
|
|
|
|
|
|
bool patched = false;
|
|
|
|
|
|
std::uint32_t tlasPushOffset = 0;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
inline PatchResult Patch(std::vector<std::uint32_t>& words) {
|
|
|
|
|
|
if (words.size() < 5) return {}; // not a SPIR-V module we understand.
|
2026-06-03 01:59:54 +00:00
|
|
|
|
|
|
|
|
|
|
// Split header (5 words) from the instruction stream.
|
|
|
|
|
|
std::uint32_t bound = words[3];
|
|
|
|
|
|
std::vector<Instr> instrs;
|
|
|
|
|
|
for (std::size_t i = 5; i < words.size();) {
|
|
|
|
|
|
std::uint32_t len = words[i] >> 16;
|
2026-06-03 18:35:39 +00:00
|
|
|
|
if (len == 0 || i + len > words.size()) return {}; // malformed — bail.
|
2026-06-03 01:59:54 +00:00
|
|
|
|
instrs.emplace_back(words.begin() + i, words.begin() + i + len);
|
|
|
|
|
|
i += len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 02:28:02 +00:00
|
|
|
|
// ── Scan for the AS type, reusable int/long types+constants, any
|
|
|
|
|
|
// existing push-constant block, the type/decoration/constant tables
|
|
|
|
|
|
// needed to size that block, and the section boundaries to insert into.
|
2026-06-03 01:59:54 +00:00
|
|
|
|
std::uint32_t asTypeId = 0, ulongTypeId = 0, uintTypeId = 0, uintZeroId = 0;
|
2026-06-03 02:28:02 +00:00
|
|
|
|
std::uint32_t existingPcVarId = 0, existingPcStructId = 0, existingPtrUlongId = 0;
|
2026-06-03 01:59:54 +00:00
|
|
|
|
std::size_t lastCapIdx = 0, lastAnnotIdx = 0, firstFuncIdx = instrs.size();
|
|
|
|
|
|
std::size_t entryIdx = instrs.size();
|
2026-06-03 02:28:02 +00:00
|
|
|
|
std::map<std::uint32_t, const Instr*> typeInstr; // type-result-id → defining instr
|
|
|
|
|
|
std::map<std::uint32_t, std::uint32_t> constU32; // OpConstant id → 32-bit value
|
|
|
|
|
|
std::map<std::uint32_t, std::uint32_t> uintConstByValue; // uint value → OpConstant id
|
|
|
|
|
|
std::map<std::uint32_t, std::uint32_t> arrayStride; // array type id → ArrayStride
|
|
|
|
|
|
std::map<std::uint64_t, std::uint32_t> memberOffset; // (struct<<32|idx) → Offset
|
|
|
|
|
|
std::map<std::uint64_t, std::uint32_t> memberMatStride; // (struct<<32|idx) → MatrixStride
|
|
|
|
|
|
std::map<std::uint32_t, std::uint32_t> ptrPointee; // pointer type id → pointee type id
|
2026-06-03 01:59:54 +00:00
|
|
|
|
for (std::size_t k = 0; k < instrs.size(); ++k) {
|
2026-06-03 02:28:02 +00:00
|
|
|
|
const Instr& in = instrs[k];
|
|
|
|
|
|
std::uint32_t op = in[0] & 0xFFFFu;
|
2026-06-03 01:59:54 +00:00
|
|
|
|
switch (op) {
|
2026-06-03 02:28:02 +00:00
|
|
|
|
case OpTypeAccelerationStructureKHR: asTypeId = in[1]; typeInstr[in[1]] = ∈ break;
|
2026-06-03 01:59:54 +00:00
|
|
|
|
case OpTypeInt:
|
2026-06-03 02:28:02 +00:00
|
|
|
|
if (in[2] == 64 && in[3] == 0) ulongTypeId = in[1];
|
|
|
|
|
|
else if (in[2] == 32 && in[3] == 0) uintTypeId = in[1];
|
|
|
|
|
|
typeInstr[in[1]] = ∈
|
|
|
|
|
|
break;
|
|
|
|
|
|
case OpTypeFloat: case OpTypeVector: case OpTypeMatrix:
|
|
|
|
|
|
case OpTypeArray: case OpTypeStruct:
|
|
|
|
|
|
typeInstr[in[1]] = ∈
|
|
|
|
|
|
break;
|
|
|
|
|
|
case OpTypePointer:
|
|
|
|
|
|
typeInstr[in[1]] = ∈ ptrPointee[in[1]] = in[3];
|
|
|
|
|
|
if (in[2] == StorageClassPushConstant && in[3] == ulongTypeId)
|
|
|
|
|
|
existingPtrUlongId = in[1];
|
2026-06-03 01:59:54 +00:00
|
|
|
|
break;
|
|
|
|
|
|
case OpConstant:
|
2026-06-03 02:28:02 +00:00
|
|
|
|
if (in.size() >= 4) constU32[in[2]] = in[3];
|
|
|
|
|
|
if (uintTypeId && in[1] == uintTypeId && in.size() >= 4) {
|
|
|
|
|
|
uintConstByValue.emplace(in[3], in[2]);
|
|
|
|
|
|
if (in[3] == 0) uintZeroId = in[2];
|
|
|
|
|
|
}
|
2026-06-03 01:59:54 +00:00
|
|
|
|
break;
|
2026-06-03 02:28:02 +00:00
|
|
|
|
case OpVariable:
|
|
|
|
|
|
if (in[3] == StorageClassPushConstant) {
|
|
|
|
|
|
existingPcVarId = in[2];
|
|
|
|
|
|
existingPcStructId = ptrPointee.count(in[1]) ? ptrPointee[in[1]] : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
case OpDecorate:
|
|
|
|
|
|
if (in.size() >= 4 && in[2] == DecorationArrayStride) arrayStride[in[1]] = in[3];
|
|
|
|
|
|
break;
|
|
|
|
|
|
case OpMemberDecorate: {
|
|
|
|
|
|
std::uint64_t key = (static_cast<std::uint64_t>(in[1]) << 32) | in[2];
|
|
|
|
|
|
if (in.size() >= 5 && in[3] == DecorationOffset) memberOffset[key] = in[4];
|
|
|
|
|
|
if (in.size() >= 5 && in[3] == DecorationMatrixStride) memberMatStride[key] = in[4];
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2026-06-03 01:59:54 +00:00
|
|
|
|
case OpCapability: lastCapIdx = k; break;
|
|
|
|
|
|
case OpEntryPoint: if (entryIdx == instrs.size()) entryIdx = k; break;
|
|
|
|
|
|
default: break;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (IsAnnotation(op)) lastAnnotIdx = k;
|
|
|
|
|
|
if (op == 54 /*OpFunction*/ && firstFuncIdx == instrs.size()) firstFuncIdx = k;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 18:35:39 +00:00
|
|
|
|
if (asTypeId == 0) return {}; // shader never reads an acceleration structure.
|
|
|
|
|
|
|
|
|
|
|
|
// Set on whichever path runs below; returned to the caller.
|
|
|
|
|
|
std::uint32_t tlasPushOffset = 0;
|
2026-06-03 01:59:54 +00:00
|
|
|
|
|
|
|
|
|
|
auto newId = [&] { return bound++; };
|
|
|
|
|
|
auto mk = [](std::initializer_list<std::uint32_t> ops) {
|
|
|
|
|
|
Instr in(ops);
|
|
|
|
|
|
in[0] = static_cast<std::uint32_t>(in.size() << 16) | (in[0] & 0xFFFFu);
|
|
|
|
|
|
return in;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-06-03 02:28:02 +00:00
|
|
|
|
// Byte footprint of a type, honouring the explicit Array/Matrix strides
|
|
|
|
|
|
// glslang emits so the result is correct under both scalar and std140
|
|
|
|
|
|
// block layout. Used only to find where an existing push block ends.
|
|
|
|
|
|
std::function<std::uint32_t(std::uint32_t)> footprint =
|
|
|
|
|
|
[&](std::uint32_t tid) -> std::uint32_t {
|
|
|
|
|
|
auto it = typeInstr.find(tid);
|
|
|
|
|
|
if (it == typeInstr.end()) return 0;
|
|
|
|
|
|
const Instr& t = *it->second;
|
|
|
|
|
|
switch (t[0] & 0xFFFFu) {
|
|
|
|
|
|
case OpTypeInt: case OpTypeFloat: return t[2] / 8u;
|
|
|
|
|
|
case OpTypeVector: return t[3] * footprint(t[2]);
|
|
|
|
|
|
case OpTypeMatrix: return t[3] * footprint(t[2]); // cols × column-vec
|
|
|
|
|
|
case OpTypeArray: {
|
|
|
|
|
|
std::uint32_t len = constU32.count(t[3]) ? constU32[t[3]] : 0;
|
|
|
|
|
|
std::uint32_t stride = arrayStride.count(tid) ? arrayStride[tid]
|
|
|
|
|
|
: footprint(t[2]);
|
|
|
|
|
|
return len * stride;
|
|
|
|
|
|
}
|
|
|
|
|
|
case OpTypeStruct: {
|
|
|
|
|
|
std::uint32_t end = 0;
|
|
|
|
|
|
for (std::size_t m = 2; m < t.size(); ++m) {
|
|
|
|
|
|
std::uint32_t idx = static_cast<std::uint32_t>(m - 2);
|
|
|
|
|
|
std::uint64_t key = (static_cast<std::uint64_t>(t[1]) << 32) | idx;
|
|
|
|
|
|
std::uint32_t off = memberOffset.count(key) ? memberOffset[key] : 0;
|
|
|
|
|
|
std::uint32_t sz;
|
|
|
|
|
|
auto mt = typeInstr.find(t[m]);
|
|
|
|
|
|
if (mt != typeInstr.end() && (mt->second->at(0) & 0xFFFFu) == OpTypeMatrix
|
|
|
|
|
|
&& memberMatStride.count(key))
|
|
|
|
|
|
sz = memberMatStride[key] * (*mt->second)[3];
|
|
|
|
|
|
else
|
|
|
|
|
|
sz = footprint(t[m]);
|
|
|
|
|
|
end = std::max(end, off + sz);
|
|
|
|
|
|
}
|
|
|
|
|
|
return end;
|
|
|
|
|
|
}
|
|
|
|
|
|
case OpTypePointer: return 8;
|
|
|
|
|
|
default: return 0;
|
|
|
|
|
|
}
|
2026-06-03 01:59:54 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
2026-06-03 02:28:02 +00:00
|
|
|
|
bool merge = existingPcVarId != 0 && existingPcStructId != 0
|
|
|
|
|
|
&& typeInstr.count(existingPcStructId)
|
|
|
|
|
|
&& (typeInstr[existingPcStructId]->at(0) & 0xFFFFu) == OpTypeStruct;
|
|
|
|
|
|
|
|
|
|
|
|
// ── Synthesize/ensure the int/long types and constants we need, reusing
|
|
|
|
|
|
// any the module already defines (SPIR-V forbids duplicate type defs).
|
|
|
|
|
|
std::vector<Instr> typeDefs;
|
|
|
|
|
|
if (uintTypeId == 0) { uintTypeId = newId(); typeDefs.push_back(mk({OpTypeInt, uintTypeId, 32, 0})); }
|
|
|
|
|
|
if (ulongTypeId == 0) { ulongTypeId = newId(); typeDefs.push_back(mk({OpTypeInt, ulongTypeId, 64, 0})); }
|
|
|
|
|
|
|
|
|
|
|
|
std::uint32_t pcVarId, ptrPushUlongId, memberIdxConstId, memberIdx;
|
|
|
|
|
|
std::vector<Instr> decorations;
|
|
|
|
|
|
|
|
|
|
|
|
if (merge) {
|
|
|
|
|
|
// Append a ulong member to the user's existing block; read from it.
|
|
|
|
|
|
pcVarId = existingPcVarId;
|
|
|
|
|
|
const Instr* structInstr = typeInstr[existingPcStructId];
|
|
|
|
|
|
memberIdx = static_cast<std::uint32_t>(structInstr->size() - 2);
|
2026-06-03 18:35:39 +00:00
|
|
|
|
tlasPushOffset = AlignUp(footprint(existingPcStructId), 8);
|
2026-06-03 02:28:02 +00:00
|
|
|
|
|
|
|
|
|
|
ptrPushUlongId = existingPtrUlongId;
|
|
|
|
|
|
if (ptrPushUlongId == 0) {
|
|
|
|
|
|
ptrPushUlongId = newId();
|
|
|
|
|
|
typeDefs.push_back(mk({OpTypePointer, ptrPushUlongId, StorageClassPushConstant, ulongTypeId}));
|
|
|
|
|
|
}
|
|
|
|
|
|
// Member index constant for the access chain — reuse an existing
|
|
|
|
|
|
// uint constant of the right value, else mint one (must be an
|
|
|
|
|
|
// integer constant, so only uint-typed ones qualify for reuse).
|
|
|
|
|
|
auto found = uintConstByValue.find(memberIdx);
|
|
|
|
|
|
if (found != uintConstByValue.end()) {
|
|
|
|
|
|
memberIdxConstId = found->second;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
memberIdxConstId = newId();
|
|
|
|
|
|
typeDefs.push_back(mk({OpConstant, uintTypeId, memberIdxConstId, memberIdx}));
|
|
|
|
|
|
}
|
2026-06-03 18:35:39 +00:00
|
|
|
|
decorations.push_back(mk({OpMemberDecorate, existingPcStructId, memberIdx, DecorationOffset, tlasPushOffset}));
|
2026-06-03 02:28:02 +00:00
|
|
|
|
} else {
|
|
|
|
|
|
// No user push constant — synthesize a fresh single-member block.
|
|
|
|
|
|
if (uintZeroId == 0) { uintZeroId = newId(); typeDefs.push_back(mk({OpConstant, uintTypeId, uintZeroId, 0})); }
|
|
|
|
|
|
std::uint32_t pcStructId = newId();
|
|
|
|
|
|
std::uint32_t ptrPushStructId = newId();
|
|
|
|
|
|
ptrPushUlongId = newId();
|
|
|
|
|
|
pcVarId = newId();
|
|
|
|
|
|
typeDefs.push_back(mk({OpTypeStruct, pcStructId, ulongTypeId}));
|
|
|
|
|
|
typeDefs.push_back(mk({OpTypePointer, ptrPushStructId, StorageClassPushConstant, pcStructId}));
|
|
|
|
|
|
typeDefs.push_back(mk({OpTypePointer, ptrPushUlongId, StorageClassPushConstant, ulongTypeId}));
|
|
|
|
|
|
typeDefs.push_back(mk({OpVariable, ptrPushStructId, pcVarId, StorageClassPushConstant}));
|
|
|
|
|
|
decorations.push_back(mk({OpMemberDecorate, pcStructId, 0, DecorationOffset, 0}));
|
|
|
|
|
|
decorations.push_back(mk({OpDecorate, pcStructId, DecorationBlock}));
|
|
|
|
|
|
memberIdxConstId = uintZeroId;
|
2026-06-03 18:35:39 +00:00
|
|
|
|
tlasPushOffset = 0;
|
2026-06-03 02:28:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Rewrite each `OpLoad %asType <ptr>` into address-load + convert, and
|
|
|
|
|
|
// (when merging) append the ulong member to the existing struct type.
|
2026-06-03 01:59:54 +00:00
|
|
|
|
std::vector<Instr> rebuilt;
|
|
|
|
|
|
rebuilt.reserve(instrs.size() + 8);
|
2026-06-03 02:28:02 +00:00
|
|
|
|
for (Instr in : instrs) {
|
2026-06-03 01:59:54 +00:00
|
|
|
|
std::uint32_t op = in[0] & 0xFFFFu;
|
|
|
|
|
|
if (op == OpLoad && in[1] == asTypeId) {
|
|
|
|
|
|
std::uint32_t resultId = in[2];
|
|
|
|
|
|
std::uint32_t chainId = newId();
|
|
|
|
|
|
std::uint32_t addrId = newId();
|
2026-06-03 02:28:02 +00:00
|
|
|
|
rebuilt.push_back(mk({OpAccessChain, ptrPushUlongId, chainId, pcVarId, memberIdxConstId}));
|
2026-06-03 01:59:54 +00:00
|
|
|
|
rebuilt.push_back(mk({OpLoad, ulongTypeId, addrId, chainId}));
|
|
|
|
|
|
rebuilt.push_back(mk({OpConvertUToAccelerationStructureKHR, asTypeId, resultId, addrId}));
|
|
|
|
|
|
} else {
|
2026-06-03 02:28:02 +00:00
|
|
|
|
if (merge && op == OpTypeStruct && in[1] == existingPcStructId) {
|
|
|
|
|
|
in.push_back(ulongTypeId);
|
|
|
|
|
|
in[0] = static_cast<std::uint32_t>(in.size() << 16) | OpTypeStruct;
|
|
|
|
|
|
}
|
|
|
|
|
|
rebuilt.push_back(std::move(in));
|
2026-06-03 01:59:54 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
instrs.swap(rebuilt);
|
|
|
|
|
|
|
|
|
|
|
|
// Recompute structural anchors (the rewrite above shifted indices).
|
|
|
|
|
|
lastCapIdx = 0; lastAnnotIdx = 0; firstFuncIdx = instrs.size(); entryIdx = instrs.size();
|
2026-06-03 02:28:02 +00:00
|
|
|
|
std::size_t structIdx = instrs.size();
|
2026-06-03 01:59:54 +00:00
|
|
|
|
for (std::size_t k = 0; k < instrs.size(); ++k) {
|
|
|
|
|
|
std::uint32_t op = instrs[k][0] & 0xFFFFu;
|
|
|
|
|
|
if (op == OpCapability) lastCapIdx = k;
|
|
|
|
|
|
if (op == OpEntryPoint && entryIdx == instrs.size()) entryIdx = k;
|
|
|
|
|
|
if (IsAnnotation(op)) lastAnnotIdx = k;
|
|
|
|
|
|
if (op == 54 && firstFuncIdx == instrs.size()) firstFuncIdx = k;
|
2026-06-03 02:28:02 +00:00
|
|
|
|
if (merge && op == OpTypeStruct && instrs[k][1] == existingPcStructId) structIdx = k;
|
2026-06-03 01:59:54 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 02:28:02 +00:00
|
|
|
|
// The newly-defined types (notably ulong) must precede every use. When
|
|
|
|
|
|
// merging, the user's struct — now carrying the appended ulong member —
|
|
|
|
|
|
// already sits in the type section, so the defs go in just before it;
|
|
|
|
|
|
// for a fresh block the whole bundle can go at the end of the type
|
|
|
|
|
|
// section (right before the first function).
|
|
|
|
|
|
std::size_t typeDefsIdx = (merge && structIdx != instrs.size()) ? structIdx : firstFuncIdx;
|
|
|
|
|
|
|
|
|
|
|
|
// A freshly synthesized push-constant variable must join the entry
|
|
|
|
|
|
// point's interface list (required for SPIR-V ≥ 1.4 — raygen is 1.4).
|
|
|
|
|
|
// A merged-into variable is already used, so it is already listed.
|
|
|
|
|
|
if (!merge && entryIdx != instrs.size() && words[1] >= 0x00010400u) {
|
2026-06-03 01:59:54 +00:00
|
|
|
|
instrs[entryIdx].push_back(pcVarId);
|
|
|
|
|
|
instrs[entryIdx][0] = static_cast<std::uint32_t>(instrs[entryIdx].size() << 16)
|
|
|
|
|
|
| OpEntryPoint;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 02:28:02 +00:00
|
|
|
|
// Insert highest-index-first so earlier anchors stay valid (typeDefsIdx
|
|
|
|
|
|
// ≥ lastAnnotIdx+1 ≥ lastCapIdx+1 in both the merge and synthesize cases).
|
|
|
|
|
|
instrs.insert(instrs.begin() + typeDefsIdx, typeDefs.begin(), typeDefs.end());
|
2026-06-03 01:59:54 +00:00
|
|
|
|
instrs.insert(instrs.begin() + lastAnnotIdx + 1, decorations.begin(), decorations.end());
|
|
|
|
|
|
instrs.insert(instrs.begin() + lastCapIdx + 1, mk({OpCapability, CapabilityInt64}));
|
|
|
|
|
|
|
|
|
|
|
|
// ── Reassemble: header (with updated bound) + instruction stream.
|
|
|
|
|
|
std::vector<std::uint32_t> out(words.begin(), words.begin() + 5);
|
|
|
|
|
|
out[3] = bound;
|
|
|
|
|
|
for (const Instr& in : instrs) out.insert(out.end(), in.begin(), in.end());
|
|
|
|
|
|
words.swap(out);
|
2026-06-03 18:35:39 +00:00
|
|
|
|
|
|
|
|
|
|
return {true, tlasPushOffset};
|
2026-06-03 01:59:54 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// ─── END NVIDIA descriptor-heap AS-read workaround ────────────────────────
|
|
|
|
|
|
|
2026-01-28 23:37:12 +01:00
|
|
|
|
export namespace Crafter {
|
2026-02-22 00:46:38 +01:00
|
|
|
|
class VulkanShader {
|
|
|
|
|
|
public:
|
2026-04-05 22:53:59 +02:00
|
|
|
|
std::vector<VkSpecializationMapEntry> specilizations;
|
|
|
|
|
|
VkSpecializationInfo* specilizationInfo;
|
2026-02-22 00:46:38 +01:00
|
|
|
|
VkShaderStageFlagBits stage;
|
|
|
|
|
|
std::string entrypoint;
|
|
|
|
|
|
VkShaderModule shader;
|
2026-06-03 18:35:39 +00:00
|
|
|
|
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7): set when
|
|
|
|
|
|
// this module read an acceleration structure and was rewritten to fetch
|
|
|
|
|
|
// the TLAS device address from a push constant. `tlasPushOffset` is the
|
|
|
|
|
|
// byte offset of that member, which whoever records the dispatch
|
|
|
|
|
|
// (RTPass / ComputeShader) must write with vkCmdPushDataEXT. Per-shader
|
|
|
|
|
|
// rather than a global because each shader's push-constant layout — and
|
|
|
|
|
|
// therefore the offset — can differ. Both false/0 on every other driver.
|
|
|
|
|
|
bool patchedAS = false;
|
|
|
|
|
|
std::uint32_t tlasPushOffset = 0;
|
2026-04-05 22:53:59 +02:00
|
|
|
|
VulkanShader(const std::filesystem::path& path, std::string entrypoint, VkShaderStageFlagBits stage, VkSpecializationInfo* specilizationInfo) : stage(stage), entrypoint(entrypoint), specilizationInfo(specilizationInfo) {
|
2026-02-22 00:46:38 +01:00
|
|
|
|
std::ifstream file(path, std::ios::binary);
|
|
|
|
|
|
if (!file) {
|
|
|
|
|
|
std::cerr << "Error: Could not open file " << path << std::endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Move to the end of the file to determine its size
|
|
|
|
|
|
file.seekg(0, std::ios::end);
|
|
|
|
|
|
std::streamsize size = file.tellg();
|
|
|
|
|
|
file.seekg(0, std::ios::beg);
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::uint32_t> spirv(size / sizeof(std::uint32_t));
|
|
|
|
|
|
|
|
|
|
|
|
// Read the data into the vector
|
|
|
|
|
|
if (!file.read(reinterpret_cast<char*>(spirv.data()), size)) {
|
|
|
|
|
|
std::cerr << "Error: Could not read data from file" << std::endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
file.close();
|
2026-06-03 01:59:54 +00:00
|
|
|
|
|
|
|
|
|
|
// NVIDIA descriptor-heap AS-read workaround (issue #15 / #7).
|
|
|
|
|
|
// No-op on every other driver and on shaders that don't read an
|
|
|
|
|
|
// acceleration structure. Remove with the rest of the workaround
|
|
|
|
|
|
// once a fixed NVIDIA driver ships.
|
|
|
|
|
|
if (Device::workaroundDescriptorHeapAS) {
|
2026-06-03 18:35:39 +00:00
|
|
|
|
WorkaroundNvidiaAS::PatchResult patch = WorkaroundNvidiaAS::Patch(spirv);
|
|
|
|
|
|
patchedAS = patch.patched;
|
|
|
|
|
|
tlasPushOffset = patch.tlasPushOffset;
|
2026-06-03 01:59:54 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-22 00:46:38 +01:00
|
|
|
|
VkShaderModuleCreateInfo module_info{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
|
|
|
|
|
|
module_info.codeSize = spirv.size() * sizeof(uint32_t);
|
|
|
|
|
|
module_info.pCode = spirv.data();
|
|
|
|
|
|
|
2026-03-09 20:10:19 +01:00
|
|
|
|
Device::CheckVkResult(vkCreateShaderModule(Device::device, &module_info, nullptr, &shader));
|
2026-02-22 00:46:38 +01:00
|
|
|
|
}
|
|
|
|
|
|
};
|
2026-05-18 02:07:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|