/* Crafter®.Graphics Copyright (C) 2026 Catcrafts® Catcrafts.net This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ module; #include module Crafter.Graphics:RenderingElement3D_impl; import :RenderingElement3D; import std; using namespace Crafter; std::vector RenderingElement3D::elements; void RenderingElement3D::Add(RenderingElement3D* e) { e->indexInElements = static_cast(elements.size()); elements.push_back(e); } void RenderingElement3D::Remove(RenderingElement3D* e) { // Idempotent: callers like Builder ghost flow toggle elements in/out // and may try to remove an already-removed element. std::uint32_t idx = e->indexInElements; if (idx == std::numeric_limits::max()) return; std::uint32_t last = static_cast(elements.size() - 1); if (idx != last) { elements[idx] = elements[last]; elements[idx]->indexInElements = idx; } elements.pop_back(); e->indexInElements = std::numeric_limits::max(); } void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) { auto& tlas = tlases[index]; const std::uint32_t primitiveCount = static_cast(elements.size()); // Refit (UPDATE) is allowed when the count matches the count this AS // was last built for. A change forces a full rebuild because the AS // storage and instance buffer were sized for the old count. Refit is // dramatically cheaper at scale (millions of instances) — it walks the // existing BVH and updates AABBs rather than reconstructing topology. const bool topologyChanged = tlas.accelerationStructure == VK_NULL_HANDLE || primitiveCount != tlas.builtInstanceCount; { VkMemoryBarrier asBarrier { .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &asBarrier, 0, nullptr, 0, nullptr); } if (topologyChanged) { // Resize the host-visible inputs to match the new count. // STORAGE_BUFFER_BIT is required because the application's compute // shaders bind this buffer as a storage SSBO (e.g. to write // per-instance transforms directly into the TLAS instance data). tlas.instanceBuffer.Resize(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, primitiveCount); tlas.metadataBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, primitiveCount); } for(std::uint32_t i = 0; i < primitiveCount; i++) { if (elements[i]->transformOwnedByGpu) { // Skip the transform field — the application's compute shader // writes it earlier in this submission. Copy everything else. auto& dst = tlas.instanceBuffer.value[i]; const auto& src = elements[i]->instance; dst.instanceCustomIndex = src.instanceCustomIndex; dst.mask = src.mask; dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset; dst.flags = src.flags; dst.accelerationStructureReference = src.accelerationStructureReference; } else { tlas.instanceBuffer.value[i] = elements[i]->instance; } tlas.metadataBuffer.value[i] = elements[i]->userMetadata; } tlas.instanceBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); VkAccelerationStructureGeometryInstancesDataKHR instancesData { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, .arrayOfPointers = VK_FALSE, .data = {tlas.instanceBuffer.address} }; VkAccelerationStructureGeometryDataKHR geometryData; geometryData.instances = instancesData; VkAccelerationStructureGeometryKHR tlasGeometry { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR, .geometry = geometryData }; VkAccelerationStructureBuildGeometryInfoKHR tlasBuildGeometryInfo { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, // ALLOW_UPDATE is required for any subsequent UPDATE-mode (refit) // build. Set it on every build so the AS we keep around can be // refit on later frames. .flags = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR, .mode = topologyChanged ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR, .geometryCount = 1, .pGeometries = &tlasGeometry }; if (topologyChanged) { // Query sizes for the fresh build, allocate AS storage + scratch. VkAccelerationStructureBuildSizesInfoKHR tlasBuildSizes { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR }; Device::vkGetAccelerationStructureBuildSizesKHR( Device::device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &tlasBuildGeometryInfo, &primitiveCount, &tlasBuildSizes ); // Scratch buffer must hold at least max(buildScratchSize, updateScratchSize). // Sizing for buildScratchSize covers both — refit is always smaller. tlas.scratchBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.buildScratchSize); tlas.buffer.Resize(VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.accelerationStructureSize); // Destroy the previous AS handle before creating a new one — the // pre-refit path leaked here on every frame. if (tlas.accelerationStructure != VK_NULL_HANDLE) { Device::vkDestroyAccelerationStructureKHR(Device::device, tlas.accelerationStructure, nullptr); tlas.accelerationStructure = VK_NULL_HANDLE; } VkAccelerationStructureCreateInfoKHR tlasCreateInfo { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, .buffer = tlas.buffer.buffer, .offset = 0, .size = tlasBuildSizes.accelerationStructureSize, .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, }; Device::CheckVkResult(Device::vkCreateAccelerationStructureKHR(Device::device, &tlasCreateInfo, nullptr, &tlas.accelerationStructure)); VkAccelerationStructureDeviceAddressInfoKHR addrInfo { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, .accelerationStructure = tlas.accelerationStructure }; tlas.address = Device::vkGetAccelerationStructureDeviceAddressKHR(Device::device, &addrInfo); tlas.builtInstanceCount = primitiveCount; } // For UPDATE mode, src == dst (in-place refit). For BUILD, src is // VK_NULL_HANDLE and dst is the freshly-created handle. tlasBuildGeometryInfo.scratchData.deviceAddress = tlas.scratchBuffer.address; tlasBuildGeometryInfo.dstAccelerationStructure = tlas.accelerationStructure; tlasBuildGeometryInfo.srcAccelerationStructure = topologyChanged ? VK_NULL_HANDLE : tlas.accelerationStructure; VkAccelerationStructureBuildRangeInfoKHR tlasRangeInfo { .primitiveCount = primitiveCount, .primitiveOffset = 0, .firstVertex = 0, .transformOffset = 0 }; VkAccelerationStructureBuildRangeInfoKHR* tlasRangeInfoPP = &tlasRangeInfo; Device::vkCmdBuildAccelerationStructuresKHR(cmd, 1, &tlasBuildGeometryInfo, &tlasRangeInfoPP); vkCmdPipelineBarrier( cmd, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr ); }