/* Crafter®.Graphics Copyright (C) 2026 Catcrafts® catcrafts.net This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 3.0 as published by the Free Software Foundation; This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ module; #include "vulkan/vulkan.h" module Crafter.Graphics:Mesh_impl; import Crafter.Math; import Crafter.Asset; import :Mesh; import :Device; import :Decompress; import :Types; import std; using namespace Crafter; namespace { // Buffer-usage flag set shared by both Build paths. The compressed path // appends VK_BUFFER_USAGE_2_MEMORY_DECOMPRESSION_BIT_EXT. constexpr VkBufferUsageFlags2 kVertexUsageBase = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR; constexpr VkBufferUsageFlags2 kIndexUsageBase = kVertexUsageBase | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; void RecordBLASBuild(Mesh& self, std::uint32_t vertexCount, std::uint32_t indexCount, VkCommandBuffer cmd) { VkDeviceOrHostAddressConstKHR vertexAddr; vertexAddr.deviceAddress = self.vertexBuffer.address; VkDeviceOrHostAddressConstKHR indexAddr; indexAddr.deviceAddress = self.indexBuffer.address; auto trianglesData = VkAccelerationStructureGeometryTrianglesDataKHR { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, .vertexData = vertexAddr, .vertexStride = sizeof(Vector), .maxVertex = vertexCount - 1, .indexType = VK_INDEX_TYPE_UINT32, .indexData = indexAddr, .transformData = {.deviceAddress = 0} }; VkAccelerationStructureGeometryDataKHR geometryData(trianglesData); VkAccelerationStructureGeometryKHR blasGeometry { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, .geometry = geometryData, .flags = VK_GEOMETRY_OPAQUE_BIT_KHR }; VkAccelerationStructureBuildGeometryInfoKHR blasBuildGeometryInfo{ .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, .geometryCount = 1, .pGeometries = &blasGeometry, }; auto primitiveCount = indexCount / 3; VkAccelerationStructureBuildSizesInfoKHR blasBuildSizes = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR }; Device::vkGetAccelerationStructureBuildSizesKHR( Device::device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &blasBuildGeometryInfo, &primitiveCount, &blasBuildSizes ); self.scratchBuffer.Resize( VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, blasBuildSizes.buildScratchSize); blasBuildGeometryInfo.scratchData.deviceAddress = self.scratchBuffer.address; self.blasBuffer.Resize( VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, blasBuildSizes.accelerationStructureSize); VkAccelerationStructureCreateInfoKHR blasCreateInfo{ .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, .buffer = self.blasBuffer.buffer, .offset = 0, .size = blasBuildSizes.accelerationStructureSize, .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, }; Device::CheckVkResult(Device::vkCreateAccelerationStructureKHR(Device::device, &blasCreateInfo, nullptr, &self.accelerationStructure)); blasBuildGeometryInfo.dstAccelerationStructure = self.accelerationStructure; VkAccelerationStructureBuildRangeInfoKHR blasRangeInfo { .primitiveCount = primitiveCount, .primitiveOffset = 0, .firstVertex = 0, .transformOffset = 0 }; VkAccelerationStructureBuildRangeInfoKHR* blasRangeInfoPP = &blasRangeInfo; Device::vkCmdBuildAccelerationStructuresKHR(cmd, 1, &blasBuildGeometryInfo, &blasRangeInfoPP); VkAccelerationStructureDeviceAddressInfoKHR addrInfo { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, .accelerationStructure = self.accelerationStructure }; self.blasAddr = Device::vkGetAccelerationStructureDeviceAddressKHR(Device::device, &addrInfo); } } void Mesh::Build(std::span> verticies, std::span indicies, VkCommandBuffer cmd) { vertexBuffer.Resize(kVertexUsageBase, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, verticies.size()); indexBuffer.Resize(kIndexUsageBase, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, indicies.size()); std::memcpy(vertexBuffer.value, verticies.data(), verticies.size() * sizeof(Vector)); std::memcpy(indexBuffer.value, indicies.data(), indicies.size() * sizeof(std::uint32_t)); vertexBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); indexBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); RecordBLASBuild(*this, static_cast(verticies.size()), static_cast(indicies.size()), cmd); } void Mesh::Build(const CompressedMeshAsset& asset, VkCommandBuffer cmd) { if (!Device::memoryDecompressionSupported) { // CPU fallback: decompress into temporary host vectors, then take // the existing uncompressed path. The data region is decompressed // into a discard buffer (consumer is expected to handle data-stream // decoding via Compression::DecompressCPU on its own buffer). std::vector> vertices(asset.vertexCount); std::vector indices(asset.indexCount); std::vector dataDiscard( asset.blob.regions.size() >= 3 ? asset.blob.regions[2].decompressedSize : 0); std::array, 3> outputs = { std::as_writable_bytes(std::span(vertices)), std::as_writable_bytes(std::span(indices)), std::span(dataDiscard), }; Compression::DecompressCPU(asset.blob, std::span(outputs).first(asset.blob.regions.size())); Build(vertices, indices, cmd); return; } vertexBuffer.Resize( kVertexUsageBase | VK_BUFFER_USAGE_2_MEMORY_DECOMPRESSION_BIT_EXT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, asset.vertexCount); indexBuffer.Resize( kIndexUsageBase | VK_BUFFER_USAGE_2_MEMORY_DECOMPRESSION_BIT_EXT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, asset.indexCount); compressedStaging.Resize( VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_2_MEMORY_DECOMPRESSION_BIT_EXT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, static_cast(asset.blob.bytes.size())); std::memcpy(compressedStaging.value, asset.blob.bytes.data(), asset.blob.bytes.size()); compressedStaging.FlushDevice(); std::array dstAddresses = { vertexBuffer.address, indexBuffer.address, 0, // data region is not consumed by Mesh; caller handles it separately. }; std::vector regions; for (std::size_t i = 0; i < asset.blob.regions.size() && i < 2; ++i) { const Compression::RegionMeta& r = asset.blob.regions[i]; if (r.decompressedSize == 0) continue; std::span streamBytes( asset.blob.bytes.data() + r.srcOffset, static_cast(r.compressedSize)); Decompress::ExpandStreamToTileRegions( streamBytes, compressedStaging.address + r.srcOffset, dstAddresses[i], regions); } Decompress::DecompressOnGPU( cmd, regions, VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR); RecordBLASBuild(*this, asset.vertexCount, asset.indexCount, cmd); }