This commit is contained in:
Jorijn van der Graaf 2026-05-05 23:49:29 +02:00
commit b3db40ebec
6 changed files with 212 additions and 64 deletions

View file

@ -63,6 +63,8 @@ const char* const deviceExtensionNames[] = {
"VK_KHR_shader_float_controls", "VK_KHR_shader_float_controls",
"VK_KHR_acceleration_structure", "VK_KHR_acceleration_structure",
"VK_KHR_ray_tracing_pipeline", "VK_KHR_ray_tracing_pipeline",
"VK_KHR_ray_query",
"VK_EXT_shader_atomic_float",
"VK_EXT_descriptor_heap", "VK_EXT_descriptor_heap",
"VK_KHR_deferred_host_operations", "VK_KHR_deferred_host_operations",
"VK_KHR_maintenance5", "VK_KHR_maintenance5",
@ -560,14 +562,30 @@ void Device::Initialize() {
app.pEngineName = "Crafter.Graphics"; app.pEngineName = "Crafter.Graphics";
app.apiVersion = VK_MAKE_VERSION(1, 4, 0); app.apiVersion = VK_MAKE_VERSION(1, 4, 0);
VkValidationFeatureEnableEXT enables[] = { // TODO(re-enable GPU-AV): once Vulkan SDK > 1.4.341 is the floor.
VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT //
}; // GPU-Assisted Validation is opt-in via the enable list — leaving it
// out disables it. SDK 1.4.341's GPU-AV does not handle
// descriptor_heap pipelines (VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT
// with layout = VK_NULL_HANDLE): `PipelineSubState::GetPipelineLayoutUnion`
// null-derefs on the first dispatch/draw against such a pipeline.
//
// Tracked + fixed upstream:
// https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/12103
// Per spencer-lunarg (LunarG): broken in 1.4.341, fixed and landing
// in the next SDK release. Once we bump our Vulkan-Headers / SDK
// dependency past 1.4.341, restore the original enable list:
//
// VkValidationFeatureEnableEXT enables[] = {
// VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT
// };
// validationFeatures.enabledValidationFeatureCount = 1;
// validationFeatures.pEnabledValidationFeatures = enables;
//
// Standard validation (the layer itself) is still on; only the GPU-AV
// out-of-bounds / shader-instrumentation checks are temporarily off.
VkValidationFeaturesEXT validationFeatures = { VkValidationFeaturesEXT validationFeatures = {
.sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
.enabledValidationFeatureCount = 1,
.pEnabledValidationFeatures = enables
}; };
VkInstanceCreateInfo instanceCreateInfo = {}; VkInstanceCreateInfo instanceCreateInfo = {};
@ -733,9 +751,15 @@ void Device::Initialize() {
.bufferDeviceAddress = VK_TRUE .bufferDeviceAddress = VK_TRUE
}; };
VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR,
.pNext = &features12,
.rayQuery = VK_TRUE
};
VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures{ VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR,
.pNext = &features12, .pNext = &physicalDeviceRayQueryFeatures,
.rayTracingPipeline = VK_TRUE .rayTracingPipeline = VK_TRUE
}; };
@ -809,6 +833,7 @@ void Device::Initialize() {
vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast<PFN_vkGetAccelerationStructureBuildSizesKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureBuildSizesKHR")); vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast<PFN_vkGetAccelerationStructureBuildSizesKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureBuildSizesKHR"));
vkCreateAccelerationStructureKHR = reinterpret_cast<PFN_vkCreateAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkCreateAccelerationStructureKHR")); vkCreateAccelerationStructureKHR = reinterpret_cast<PFN_vkCreateAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkCreateAccelerationStructureKHR"));
vkDestroyAccelerationStructureKHR = reinterpret_cast<PFN_vkDestroyAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkDestroyAccelerationStructureKHR"));
vkCmdBuildAccelerationStructuresKHR = reinterpret_cast<PFN_vkCmdBuildAccelerationStructuresKHR>(vkGetInstanceProcAddr(instance, "vkCmdBuildAccelerationStructuresKHR")); vkCmdBuildAccelerationStructuresKHR = reinterpret_cast<PFN_vkCmdBuildAccelerationStructuresKHR>(vkGetInstanceProcAddr(instance, "vkCmdBuildAccelerationStructuresKHR"));
vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureDeviceAddressKHR")); vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureDeviceAddressKHR"));
vkCreateRayTracingPipelinesKHR = reinterpret_cast<PFN_vkCreateRayTracingPipelinesKHR>(vkGetInstanceProcAddr(instance, "vkCreateRayTracingPipelinesKHR")); vkCreateRayTracingPipelinesKHR = reinterpret_cast<PFN_vkCreateRayTracingPipelinesKHR>(vkGetInstanceProcAddr(instance, "vkCreateRayTracingPipelinesKHR"));

View file

@ -28,7 +28,38 @@ using namespace Crafter;
std::vector<RenderingElement3D*> RenderingElement3D::elements; std::vector<RenderingElement3D*> RenderingElement3D::elements;
void RenderingElement3D::Add(RenderingElement3D* e) {
e->indexInElements = static_cast<std::uint32_t>(elements.size());
elements.push_back(e);
}
void RenderingElement3D::Remove(RenderingElement3D* e) {
// Idempotent: callers like Builder ghost flow toggle elements in/out
// and may try to remove an already-removed element.
std::uint32_t idx = e->indexInElements;
if (idx == std::numeric_limits<std::uint32_t>::max()) return;
std::uint32_t last = static_cast<std::uint32_t>(elements.size() - 1);
if (idx != last) {
elements[idx] = elements[last];
elements[idx]->indexInElements = idx;
}
elements.pop_back();
e->indexInElements = std::numeric_limits<std::uint32_t>::max();
}
void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) { void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
auto& tlas = tlases[index];
const std::uint32_t primitiveCount = static_cast<std::uint32_t>(elements.size());
// Refit (UPDATE) is allowed when the count matches the count this AS
// was last built for. A change forces a full rebuild because the AS
// storage and instance buffer were sized for the old count. Refit is
// dramatically cheaper at scale (millions of instances) — it walks the
// existing BVH and updates AABBs rather than reconstructing topology.
const bool topologyChanged =
tlas.accelerationStructure == VK_NULL_HANDLE
|| primitiveCount != tlas.builtInstanceCount;
{ {
VkMemoryBarrier asBarrier { VkMemoryBarrier asBarrier {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@ -41,18 +72,38 @@ void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
0, 1, &asBarrier, 0, nullptr, 0, nullptr); 0, 1, &asBarrier, 0, nullptr, 0, nullptr);
} }
tlases[index].instanceBuffer.Resize(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, elements.size()); if (topologyChanged) {
// Resize the host-visible inputs to match the new count.
for(std::uint32_t i = 0; i < elements.size(); i++) { // STORAGE_BUFFER_BIT is required because the application's compute
tlases[index].instanceBuffer.value[i] = elements[i]->instance; // shaders bind this buffer as a storage SSBO (e.g. to write
// per-instance transforms directly into the TLAS instance data).
tlas.instanceBuffer.Resize(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, primitiveCount);
tlas.metadataBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, primitiveCount);
} }
tlases[index].instanceBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); for(std::uint32_t i = 0; i < primitiveCount; i++) {
if (elements[i]->transformOwnedByGpu) {
// Skip the transform field — the application's compute shader
// writes it earlier in this submission. Copy everything else.
auto& dst = tlas.instanceBuffer.value[i];
const auto& src = elements[i]->instance;
dst.instanceCustomIndex = src.instanceCustomIndex;
dst.mask = src.mask;
dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset;
dst.flags = src.flags;
dst.accelerationStructureReference = src.accelerationStructureReference;
} else {
tlas.instanceBuffer.value[i] = elements[i]->instance;
}
tlas.metadataBuffer.value[i] = elements[i]->userMetadata;
}
VkAccelerationStructureGeometryInstancesDataKHR instancesData = VkAccelerationStructureGeometryInstancesDataKHR { tlas.instanceBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
VkAccelerationStructureGeometryInstancesDataKHR instancesData {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,
.arrayOfPointers = VK_FALSE, .arrayOfPointers = VK_FALSE,
.data = {tlases[index].instanceBuffer.address} .data = {tlas.instanceBuffer.address}
}; };
VkAccelerationStructureGeometryDataKHR geometryData; VkAccelerationStructureGeometryDataKHR geometryData;
@ -64,17 +115,22 @@ void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
.geometry = geometryData .geometry = geometryData
}; };
VkAccelerationStructureBuildGeometryInfoKHR tlasBuildGeometryInfo{ VkAccelerationStructureBuildGeometryInfoKHR tlasBuildGeometryInfo {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, // ALLOW_UPDATE is required for any subsequent UPDATE-mode (refit)
// build. Set it on every build so the AS we keep around can be
// refit on later frames.
.flags = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,
.mode = topologyChanged
? VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR
: VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR,
.geometryCount = 1, .geometryCount = 1,
.pGeometries = &tlasGeometry .pGeometries = &tlasGeometry
}; };
// Query the memory sizes that will be needed for this TLAS if (topologyChanged) {
auto primitiveCount = static_cast<uint32_t>(elements.size()); // Query sizes for the fresh build, allocate AS storage + scratch.
VkAccelerationStructureBuildSizesInfoKHR tlasBuildSizes { VkAccelerationStructureBuildSizesInfoKHR tlasBuildSizes {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
}; };
@ -86,47 +142,59 @@ void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
&tlasBuildSizes &tlasBuildSizes
); );
tlases[index].scratchBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.buildScratchSize); // Scratch buffer must hold at least max(buildScratchSize, updateScratchSize).
tlasBuildGeometryInfo.scratchData.deviceAddress = tlases[index].scratchBuffer.address; // Sizing for buildScratchSize covers both — refit is always smaller.
tlas.scratchBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.buildScratchSize);
tlas.buffer.Resize(VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.accelerationStructureSize);
tlases[index].buffer.Resize(VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.accelerationStructureSize); // Destroy the previous AS handle before creating a new one — the
// pre-refit path leaked here on every frame.
if (tlas.accelerationStructure != VK_NULL_HANDLE) {
Device::vkDestroyAccelerationStructureKHR(Device::device, tlas.accelerationStructure, nullptr);
tlas.accelerationStructure = VK_NULL_HANDLE;
}
// Create and store the TLAS handle
VkAccelerationStructureCreateInfoKHR tlasCreateInfo { VkAccelerationStructureCreateInfoKHR tlasCreateInfo {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
.buffer = tlases[index].buffer.buffer, .buffer = tlas.buffer.buffer,
.offset = 0, .offset = 0,
.size = tlasBuildSizes.accelerationStructureSize, .size = tlasBuildSizes.accelerationStructureSize,
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
}; };
Device::CheckVkResult(Device::vkCreateAccelerationStructureKHR(Device::device, &tlasCreateInfo, nullptr, &tlas.accelerationStructure));
Device::CheckVkResult(Device::vkCreateAccelerationStructureKHR(Device::device, &tlasCreateInfo, nullptr, &tlases[index].accelerationStructure)); VkAccelerationStructureDeviceAddressInfoKHR addrInfo {
tlasBuildGeometryInfo.dstAccelerationStructure = tlases[index].accelerationStructure; .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
.accelerationStructure = tlas.accelerationStructure
};
tlas.address = Device::vkGetAccelerationStructureDeviceAddressKHR(Device::device, &addrInfo);
tlas.builtInstanceCount = primitiveCount;
}
// For UPDATE mode, src == dst (in-place refit). For BUILD, src is
// VK_NULL_HANDLE and dst is the freshly-created handle.
tlasBuildGeometryInfo.scratchData.deviceAddress = tlas.scratchBuffer.address;
tlasBuildGeometryInfo.dstAccelerationStructure = tlas.accelerationStructure;
tlasBuildGeometryInfo.srcAccelerationStructure =
topologyChanged ? VK_NULL_HANDLE : tlas.accelerationStructure;
// Prepare the build range for the TLAS
VkAccelerationStructureBuildRangeInfoKHR tlasRangeInfo { VkAccelerationStructureBuildRangeInfoKHR tlasRangeInfo {
.primitiveCount = primitiveCount, .primitiveCount = primitiveCount,
.primitiveOffset = 0, .primitiveOffset = 0,
.firstVertex = 0, .firstVertex = 0,
.transformOffset = 0 .transformOffset = 0
}; };
VkAccelerationStructureBuildRangeInfoKHR* tlasRangeInfoPP = &tlasRangeInfo; VkAccelerationStructureBuildRangeInfoKHR* tlasRangeInfoPP = &tlasRangeInfo;
Device::vkCmdBuildAccelerationStructuresKHR(cmd, 1, &tlasBuildGeometryInfo, &tlasRangeInfoPP); Device::vkCmdBuildAccelerationStructuresKHR(cmd, 1, &tlasBuildGeometryInfo, &tlasRangeInfoPP);
vkCmdPipelineBarrier( vkCmdPipelineBarrier(
cmd, cmd,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0,
0, nullptr, 0, nullptr,
0, nullptr, 0, nullptr,
0, nullptr 0, nullptr
); );
VkAccelerationStructureDeviceAddressInfoKHR addrInfo {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
.accelerationStructure = tlases[index].accelerationStructure
};
tlases[index].address = Device::vkGetAccelerationStructureDeviceAddressKHR(Device::device, &addrInfo);
} }

View file

@ -732,16 +732,12 @@ void Window::Render() {
// widget's OnTextInput / OnKeyDown sees them in the same frame. // widget's OnTextInput / OnKeyDown sees them in the same frame.
Device::TickKeyRepeats(); Device::TickKeyRepeats();
onUpdate.Invoke({startTime, startTime-lastFrameBegin}); // Bind the descriptor heaps BEFORE the user's update event fires.
#ifdef CRAFTER_TIMING // Any compute work the update lambda records (e.g. physics dispatches)
totalUpdate = std::chrono::nanoseconds(0); // needs the heaps bound at execution time; recording order in the cmd
updateTimings.clear(); // buffer dictates GPU execution order, so the bind must come first.
for (const std::pair<const EventListener<FrameTime>*, std::chrono::nanoseconds>& entry : onUpdate.listenerTimes) { // Pass-side dispatches still run with the same heaps bound — moving
updateTimings.push_back(entry); // the bind earlier doesn't change anything for them.
totalUpdate += entry.second;
}
#endif
if (descriptorHeap) { if (descriptorHeap) {
VkBindHeapInfoEXT resourceHeapInfo = { VkBindHeapInfoEXT resourceHeapInfo = {
.sType = VK_STRUCTURE_TYPE_BIND_HEAP_INFO_EXT, .sType = VK_STRUCTURE_TYPE_BIND_HEAP_INFO_EXT,
@ -766,6 +762,16 @@ void Window::Render() {
Device::vkCmdBindSamplerHeapEXT(drawCmdBuffers[currentBuffer], &samplerHeapInfo); Device::vkCmdBindSamplerHeapEXT(drawCmdBuffers[currentBuffer], &samplerHeapInfo);
} }
onUpdate.Invoke({startTime, startTime-lastFrameBegin});
#ifdef CRAFTER_TIMING
totalUpdate = std::chrono::nanoseconds(0);
updateTimings.clear();
for (const std::pair<const EventListener<FrameTime>*, std::chrono::nanoseconds>& entry : onUpdate.listenerTimes) {
updateTimings.push_back(entry);
totalUpdate += entry.second;
}
#endif
// Note: vkCmdClearColorImage is unavailable here — the swapchain is // Note: vkCmdClearColorImage is unavailable here — the swapchain is
// created with VK_IMAGE_USAGE_STORAGE_BIT only (no TRANSFER_DST_BIT). // created with VK_IMAGE_USAGE_STORAGE_BIT only (no TRANSFER_DST_BIT).
// Passes that need a background should write one explicitly (UIScene // Passes that need a background should write one explicitly (UIScene

View file

@ -123,6 +123,7 @@ export namespace Crafter {
inline static VkSwapchainKHR swapchain = VK_NULL_HANDLE; inline static VkSwapchainKHR swapchain = VK_NULL_HANDLE;
inline static PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR; inline static PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR;
inline static PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR; inline static PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR;
inline static PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR;
inline static PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR; inline static PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR;
inline static PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR; inline static PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR;
inline static PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR; inline static PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR;

View file

@ -28,18 +28,58 @@ import :Window;
export namespace Crafter { export namespace Crafter {
struct TlasWithBuffer { struct TlasWithBuffer {
VkDeviceAddress address; VkDeviceAddress address = 0;
VulkanBuffer<char, false> buffer; VulkanBuffer<char, false> buffer;
VkAccelerationStructureKHR accelerationStructure; VkAccelerationStructureKHR accelerationStructure = VK_NULL_HANDLE;
VulkanBuffer<VkAccelerationStructureInstanceKHR, true> instanceBuffer; VulkanBuffer<VkAccelerationStructureInstanceKHR, true> instanceBuffer;
VulkanBuffer<char, false> scratchBuffer; VulkanBuffer<char, false> scratchBuffer;
// Parallel to instanceBuffer, indexed by TLAS instance ID. Filled
// from each element's userMetadata during BuildTLAS. Consumers
// (e.g. ray-query collision) bind this in the descriptor heap and
// look up via rayQueryGetIntersectionInstanceIdEXT to recover
// application-side per-instance data without touching the
// Vulkan-mandated instanceCustomIndex (which renderers may already
// use for their own encoding).
VulkanBuffer<std::uint32_t, true> metadataBuffer;
// Last instance count this TLAS was built (not refit) for. When
// elements.size() matches this, BuildTLAS does an in-place refit
// (UPDATE mode) which is dramatically cheaper than a full rebuild
// — refit walks the existing BVH and updates AABBs, while rebuild
// reconstructs the topology from scratch. A change in count forces
// a fresh rebuild because the AS is sized for that primitive count.
std::uint32_t builtInstanceCount = 0;
}; };
class RenderingElement3D { class RenderingElement3D {
public: public:
VkAccelerationStructureInstanceKHR instance; VkAccelerationStructureInstanceKHR instance;
// Position in `elements`, maintained by Add/Remove for O(1) swap-and-pop.
// Sentinel value = not currently registered.
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
// Application-defined per-instance tag, copied verbatim into
// tlases[*].metadataBuffer at this element's TLAS instance ID
// every BuildTLAS. Crafter doesn't interpret it.
std::uint32_t userMetadata = 0;
// When true, BuildTLAS skips copying instance.transform into the
// TLAS instance buffer — the application's compute shader writes
// the transform field directly into instanceBuffer at this
// element's TLAS instance ID. Other instance fields (mask,
// customIndex, SBT offset, BLAS reference) are still copied from
// the CPU instance struct.
//
// Used to take per-frame transform updates off the CPU for bodies
// whose transforms derive from GPU-side state (physics nodes that
// already live on the GPU).
bool transformOwnedByGpu = false;
static std::vector<RenderingElement3D*> elements; static std::vector<RenderingElement3D*> elements;
inline static TlasWithBuffer tlases[Window::numFrames]; inline static TlasWithBuffer tlases[Window::numFrames];
static void BuildTLAS(VkCommandBuffer cmd, std::uint32_t index); static void BuildTLAS(VkCommandBuffer cmd, std::uint32_t index);
// Register / unregister with `elements`. Use these instead of touching
// the vector directly: linear find+erase is O(n) and pathological at
// the body counts physics targets (millions of braces).
static void Add(RenderingElement3D* e);
static void Remove(RenderingElement3D* e);
}; };
} }

View file

@ -247,6 +247,14 @@ export namespace Crafter {
std::uint16_t FontAtlasImageSlot() const noexcept { return fontAtlasImageSlot_; } std::uint16_t FontAtlasImageSlot() const noexcept { return fontAtlasImageSlot_; }
std::uint16_t FontAtlasSamplerSlot() const noexcept { return fontAtlasSamplerSlot_; } std::uint16_t FontAtlasSamplerSlot() const noexcept { return fontAtlasSamplerSlot_; }
// Heap slot whose descriptor in each per-frame heap points at that
// frame's swapchain image. Other passes (e.g. a ray-tracing pass
// that wants to render the world directly into the swapchain) can
// write to the same image by referencing this slot. Order in
// window.passes controls compositing — push such passes BEFORE
// the UI pass so UI overlays render on top.
std::uint16_t OutImageSlot() const noexcept { return outImageSlot_; }
private: private:
Window* window_ = nullptr; Window* window_ = nullptr;
DescriptorHeapVulkan* heap_ = nullptr; DescriptorHeapVulkan* heap_ = nullptr;