update
This commit is contained in:
parent
825da78f7f
commit
b3db40ebec
6 changed files with 212 additions and 64 deletions
|
|
@ -63,6 +63,8 @@ const char* const deviceExtensionNames[] = {
|
|||
"VK_KHR_shader_float_controls",
|
||||
"VK_KHR_acceleration_structure",
|
||||
"VK_KHR_ray_tracing_pipeline",
|
||||
"VK_KHR_ray_query",
|
||||
"VK_EXT_shader_atomic_float",
|
||||
"VK_EXT_descriptor_heap",
|
||||
"VK_KHR_deferred_host_operations",
|
||||
"VK_KHR_maintenance5",
|
||||
|
|
@ -560,14 +562,30 @@ void Device::Initialize() {
|
|||
app.pEngineName = "Crafter.Graphics";
|
||||
app.apiVersion = VK_MAKE_VERSION(1, 4, 0);
|
||||
|
||||
VkValidationFeatureEnableEXT enables[] = {
|
||||
VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT
|
||||
};
|
||||
|
||||
// TODO(re-enable GPU-AV): once Vulkan SDK > 1.4.341 is the floor.
|
||||
//
|
||||
// GPU-Assisted Validation is opt-in via the enable list — leaving it
|
||||
// out disables it. SDK 1.4.341's GPU-AV does not handle
|
||||
// descriptor_heap pipelines (VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT
|
||||
// with layout = VK_NULL_HANDLE): `PipelineSubState::GetPipelineLayoutUnion`
|
||||
// null-derefs on the first dispatch/draw against such a pipeline.
|
||||
//
|
||||
// Tracked + fixed upstream:
|
||||
// https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/12103
|
||||
// Per spencer-lunarg (LunarG): broken in 1.4.341, fixed and landing
|
||||
// in the next SDK release. Once we bump our Vulkan-Headers / SDK
|
||||
// dependency past 1.4.341, restore the original enable list:
|
||||
//
|
||||
// VkValidationFeatureEnableEXT enables[] = {
|
||||
// VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT
|
||||
// };
|
||||
// validationFeatures.enabledValidationFeatureCount = 1;
|
||||
// validationFeatures.pEnabledValidationFeatures = enables;
|
||||
//
|
||||
// Standard validation (the layer itself) is still on; only the GPU-AV
|
||||
// out-of-bounds / shader-instrumentation checks are temporarily off.
|
||||
VkValidationFeaturesEXT validationFeatures = {
|
||||
.sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
|
||||
.enabledValidationFeatureCount = 1,
|
||||
.pEnabledValidationFeatures = enables
|
||||
};
|
||||
|
||||
VkInstanceCreateInfo instanceCreateInfo = {};
|
||||
|
|
@ -733,9 +751,15 @@ void Device::Initialize() {
|
|||
.bufferDeviceAddress = VK_TRUE
|
||||
};
|
||||
|
||||
VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR,
|
||||
.pNext = &features12,
|
||||
.rayQuery = VK_TRUE
|
||||
};
|
||||
|
||||
VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR,
|
||||
.pNext = &features12,
|
||||
.pNext = &physicalDeviceRayQueryFeatures,
|
||||
.rayTracingPipeline = VK_TRUE
|
||||
};
|
||||
|
||||
|
|
@ -809,6 +833,7 @@ void Device::Initialize() {
|
|||
|
||||
vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast<PFN_vkGetAccelerationStructureBuildSizesKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureBuildSizesKHR"));
|
||||
vkCreateAccelerationStructureKHR = reinterpret_cast<PFN_vkCreateAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkCreateAccelerationStructureKHR"));
|
||||
vkDestroyAccelerationStructureKHR = reinterpret_cast<PFN_vkDestroyAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkDestroyAccelerationStructureKHR"));
|
||||
vkCmdBuildAccelerationStructuresKHR = reinterpret_cast<PFN_vkCmdBuildAccelerationStructuresKHR>(vkGetInstanceProcAddr(instance, "vkCmdBuildAccelerationStructuresKHR"));
|
||||
vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureDeviceAddressKHR"));
|
||||
vkCreateRayTracingPipelinesKHR = reinterpret_cast<PFN_vkCreateRayTracingPipelinesKHR>(vkGetInstanceProcAddr(instance, "vkCreateRayTracingPipelinesKHR"));
|
||||
|
|
|
|||
|
|
@ -28,7 +28,38 @@ using namespace Crafter;
|
|||
|
||||
std::vector<RenderingElement3D*> RenderingElement3D::elements;
|
||||
|
||||
void RenderingElement3D::Add(RenderingElement3D* e) {
|
||||
e->indexInElements = static_cast<std::uint32_t>(elements.size());
|
||||
elements.push_back(e);
|
||||
}
|
||||
|
||||
void RenderingElement3D::Remove(RenderingElement3D* e) {
|
||||
// Idempotent: callers like Builder ghost flow toggle elements in/out
|
||||
// and may try to remove an already-removed element.
|
||||
std::uint32_t idx = e->indexInElements;
|
||||
if (idx == std::numeric_limits<std::uint32_t>::max()) return;
|
||||
std::uint32_t last = static_cast<std::uint32_t>(elements.size() - 1);
|
||||
if (idx != last) {
|
||||
elements[idx] = elements[last];
|
||||
elements[idx]->indexInElements = idx;
|
||||
}
|
||||
elements.pop_back();
|
||||
e->indexInElements = std::numeric_limits<std::uint32_t>::max();
|
||||
}
|
||||
|
||||
void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
|
||||
auto& tlas = tlases[index];
|
||||
const std::uint32_t primitiveCount = static_cast<std::uint32_t>(elements.size());
|
||||
|
||||
// Refit (UPDATE) is allowed when the count matches the count this AS
|
||||
// was last built for. A change forces a full rebuild because the AS
|
||||
// storage and instance buffer were sized for the old count. Refit is
|
||||
// dramatically cheaper at scale (millions of instances) — it walks the
|
||||
// existing BVH and updates AABBs rather than reconstructing topology.
|
||||
const bool topologyChanged =
|
||||
tlas.accelerationStructure == VK_NULL_HANDLE
|
||||
|| primitiveCount != tlas.builtInstanceCount;
|
||||
|
||||
{
|
||||
VkMemoryBarrier asBarrier {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
|
|
@ -41,18 +72,38 @@ void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
|
|||
0, 1, &asBarrier, 0, nullptr, 0, nullptr);
|
||||
}
|
||||
|
||||
tlases[index].instanceBuffer.Resize(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, elements.size());
|
||||
|
||||
for(std::uint32_t i = 0; i < elements.size(); i++) {
|
||||
tlases[index].instanceBuffer.value[i] = elements[i]->instance;
|
||||
if (topologyChanged) {
|
||||
// Resize the host-visible inputs to match the new count.
|
||||
// STORAGE_BUFFER_BIT is required because the application's compute
|
||||
// shaders bind this buffer as a storage SSBO (e.g. to write
|
||||
// per-instance transforms directly into the TLAS instance data).
|
||||
tlas.instanceBuffer.Resize(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, primitiveCount);
|
||||
tlas.metadataBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, primitiveCount);
|
||||
}
|
||||
|
||||
tlases[index].instanceBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
|
||||
for(std::uint32_t i = 0; i < primitiveCount; i++) {
|
||||
if (elements[i]->transformOwnedByGpu) {
|
||||
// Skip the transform field — the application's compute shader
|
||||
// writes it earlier in this submission. Copy everything else.
|
||||
auto& dst = tlas.instanceBuffer.value[i];
|
||||
const auto& src = elements[i]->instance;
|
||||
dst.instanceCustomIndex = src.instanceCustomIndex;
|
||||
dst.mask = src.mask;
|
||||
dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset;
|
||||
dst.flags = src.flags;
|
||||
dst.accelerationStructureReference = src.accelerationStructureReference;
|
||||
} else {
|
||||
tlas.instanceBuffer.value[i] = elements[i]->instance;
|
||||
}
|
||||
tlas.metadataBuffer.value[i] = elements[i]->userMetadata;
|
||||
}
|
||||
|
||||
VkAccelerationStructureGeometryInstancesDataKHR instancesData = VkAccelerationStructureGeometryInstancesDataKHR {
|
||||
tlas.instanceBuffer.FlushDevice(cmd, VK_ACCESS_MEMORY_READ_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
|
||||
|
||||
VkAccelerationStructureGeometryInstancesDataKHR instancesData {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,
|
||||
.arrayOfPointers = VK_FALSE,
|
||||
.data = {tlases[index].instanceBuffer.address}
|
||||
.data = {tlas.instanceBuffer.address}
|
||||
};
|
||||
|
||||
VkAccelerationStructureGeometryDataKHR geometryData;
|
||||
|
|
@ -64,69 +115,86 @@ void RenderingElement3D::BuildTLAS(VkCommandBuffer cmd, std::uint32_t index) {
|
|||
.geometry = geometryData
|
||||
};
|
||||
|
||||
VkAccelerationStructureBuildGeometryInfoKHR tlasBuildGeometryInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
|
||||
VkAccelerationStructureBuildGeometryInfoKHR tlasBuildGeometryInfo {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
|
||||
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
|
||||
.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
|
||||
// ALLOW_UPDATE is required for any subsequent UPDATE-mode (refit)
|
||||
// build. Set it on every build so the AS we keep around can be
|
||||
// refit on later frames.
|
||||
.flags = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,
|
||||
.mode = topologyChanged
|
||||
? VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR
|
||||
: VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR,
|
||||
.geometryCount = 1,
|
||||
.pGeometries = &tlasGeometry
|
||||
};
|
||||
|
||||
// Query the memory sizes that will be needed for this TLAS
|
||||
auto primitiveCount = static_cast<uint32_t>(elements.size());
|
||||
if (topologyChanged) {
|
||||
// Query sizes for the fresh build, allocate AS storage + scratch.
|
||||
VkAccelerationStructureBuildSizesInfoKHR tlasBuildSizes {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
|
||||
};
|
||||
Device::vkGetAccelerationStructureBuildSizesKHR(
|
||||
Device::device,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
|
||||
&tlasBuildGeometryInfo,
|
||||
&primitiveCount,
|
||||
&tlasBuildSizes
|
||||
);
|
||||
|
||||
VkAccelerationStructureBuildSizesInfoKHR tlasBuildSizes {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
|
||||
};
|
||||
Device::vkGetAccelerationStructureBuildSizesKHR(
|
||||
Device::device,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
|
||||
&tlasBuildGeometryInfo,
|
||||
&primitiveCount,
|
||||
&tlasBuildSizes
|
||||
);
|
||||
// Scratch buffer must hold at least max(buildScratchSize, updateScratchSize).
|
||||
// Sizing for buildScratchSize covers both — refit is always smaller.
|
||||
tlas.scratchBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.buildScratchSize);
|
||||
tlas.buffer.Resize(VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.accelerationStructureSize);
|
||||
|
||||
tlases[index].scratchBuffer.Resize(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.buildScratchSize);
|
||||
tlasBuildGeometryInfo.scratchData.deviceAddress = tlases[index].scratchBuffer.address;
|
||||
// Destroy the previous AS handle before creating a new one — the
|
||||
// pre-refit path leaked here on every frame.
|
||||
if (tlas.accelerationStructure != VK_NULL_HANDLE) {
|
||||
Device::vkDestroyAccelerationStructureKHR(Device::device, tlas.accelerationStructure, nullptr);
|
||||
tlas.accelerationStructure = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
tlases[index].buffer.Resize(VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, tlasBuildSizes.accelerationStructureSize);
|
||||
VkAccelerationStructureCreateInfoKHR tlasCreateInfo {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
|
||||
.buffer = tlas.buffer.buffer,
|
||||
.offset = 0,
|
||||
.size = tlasBuildSizes.accelerationStructureSize,
|
||||
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
|
||||
};
|
||||
Device::CheckVkResult(Device::vkCreateAccelerationStructureKHR(Device::device, &tlasCreateInfo, nullptr, &tlas.accelerationStructure));
|
||||
|
||||
// Create and store the TLAS handle
|
||||
VkAccelerationStructureCreateInfoKHR tlasCreateInfo {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
|
||||
.buffer = tlases[index].buffer.buffer,
|
||||
.offset = 0,
|
||||
.size = tlasBuildSizes.accelerationStructureSize,
|
||||
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
|
||||
};
|
||||
VkAccelerationStructureDeviceAddressInfoKHR addrInfo {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
|
||||
.accelerationStructure = tlas.accelerationStructure
|
||||
};
|
||||
tlas.address = Device::vkGetAccelerationStructureDeviceAddressKHR(Device::device, &addrInfo);
|
||||
|
||||
Device::CheckVkResult(Device::vkCreateAccelerationStructureKHR(Device::device, &tlasCreateInfo, nullptr, &tlases[index].accelerationStructure));
|
||||
tlasBuildGeometryInfo.dstAccelerationStructure = tlases[index].accelerationStructure;
|
||||
tlas.builtInstanceCount = primitiveCount;
|
||||
}
|
||||
|
||||
// For UPDATE mode, src == dst (in-place refit). For BUILD, src is
|
||||
// VK_NULL_HANDLE and dst is the freshly-created handle.
|
||||
tlasBuildGeometryInfo.scratchData.deviceAddress = tlas.scratchBuffer.address;
|
||||
tlasBuildGeometryInfo.dstAccelerationStructure = tlas.accelerationStructure;
|
||||
tlasBuildGeometryInfo.srcAccelerationStructure =
|
||||
topologyChanged ? VK_NULL_HANDLE : tlas.accelerationStructure;
|
||||
|
||||
// Prepare the build range for the TLAS
|
||||
VkAccelerationStructureBuildRangeInfoKHR tlasRangeInfo {
|
||||
.primitiveCount = primitiveCount,
|
||||
.primitiveOffset = 0,
|
||||
.firstVertex = 0,
|
||||
.transformOffset = 0
|
||||
};
|
||||
|
||||
VkAccelerationStructureBuildRangeInfoKHR* tlasRangeInfoPP = &tlasRangeInfo;
|
||||
Device::vkCmdBuildAccelerationStructuresKHR(cmd, 1, &tlasBuildGeometryInfo, &tlasRangeInfoPP);
|
||||
|
||||
vkCmdPipelineBarrier(
|
||||
cmd,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0,
|
||||
0, nullptr,
|
||||
0, nullptr,
|
||||
0, nullptr
|
||||
);
|
||||
|
||||
VkAccelerationStructureDeviceAddressInfoKHR addrInfo {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
|
||||
.accelerationStructure = tlases[index].accelerationStructure
|
||||
};
|
||||
tlases[index].address = Device::vkGetAccelerationStructureDeviceAddressKHR(Device::device, &addrInfo);
|
||||
}
|
||||
|
|
@ -732,16 +732,12 @@ void Window::Render() {
|
|||
// widget's OnTextInput / OnKeyDown sees them in the same frame.
|
||||
Device::TickKeyRepeats();
|
||||
|
||||
onUpdate.Invoke({startTime, startTime-lastFrameBegin});
|
||||
#ifdef CRAFTER_TIMING
|
||||
totalUpdate = std::chrono::nanoseconds(0);
|
||||
updateTimings.clear();
|
||||
for (const std::pair<const EventListener<FrameTime>*, std::chrono::nanoseconds>& entry : onUpdate.listenerTimes) {
|
||||
updateTimings.push_back(entry);
|
||||
totalUpdate += entry.second;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bind the descriptor heaps BEFORE the user's update event fires.
|
||||
// Any compute work the update lambda records (e.g. physics dispatches)
|
||||
// needs the heaps bound at execution time; recording order in the cmd
|
||||
// buffer dictates GPU execution order, so the bind must come first.
|
||||
// Pass-side dispatches still run with the same heaps bound — moving
|
||||
// the bind earlier doesn't change anything for them.
|
||||
if (descriptorHeap) {
|
||||
VkBindHeapInfoEXT resourceHeapInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_BIND_HEAP_INFO_EXT,
|
||||
|
|
@ -766,6 +762,16 @@ void Window::Render() {
|
|||
Device::vkCmdBindSamplerHeapEXT(drawCmdBuffers[currentBuffer], &samplerHeapInfo);
|
||||
}
|
||||
|
||||
onUpdate.Invoke({startTime, startTime-lastFrameBegin});
|
||||
#ifdef CRAFTER_TIMING
|
||||
totalUpdate = std::chrono::nanoseconds(0);
|
||||
updateTimings.clear();
|
||||
for (const std::pair<const EventListener<FrameTime>*, std::chrono::nanoseconds>& entry : onUpdate.listenerTimes) {
|
||||
updateTimings.push_back(entry);
|
||||
totalUpdate += entry.second;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Note: vkCmdClearColorImage is unavailable here — the swapchain is
|
||||
// created with VK_IMAGE_USAGE_STORAGE_BIT only (no TRANSFER_DST_BIT).
|
||||
// Passes that need a background should write one explicitly (UIScene
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue