Crafter.Graphics/implementations/Crafter.Graphics-Device.cpp
catbot 950059c86e fix(vulkan-rt): work around NVIDIA descriptor-heap AS-read device-loss (#15)
Reading an acceleration structure through VK_EXT_descriptor_heap aborts
with VK_ERROR_DEVICE_LOST on NVIDIA 610.43.02 — a brand-new-extension
driver fault isolated in #7 (engine setup is correct and validation-clean;
images/buffers through the same heap work, and both traceRayEXT and inline
rayQuery fault identically on the AS read).

An acceleration structure can equally be reached by its device address via
OpConvertUToAccelerationStructureKHR, which reads no descriptor and so never
touches the faulting heap path. glslang has no GLSL spelling for that
conversion, so VulkanShader rewrites the compiled SPIR-V at module-load
time: every `OpLoad %accelStruct <heap-ptr>` becomes a load of the TLAS
device address from a synthesized push-constant block followed by the
convert. RTPass pushes the active frame's TLAS address into that push
constant. User GLSL and example code are unchanged; acceleration structures
still bind into the heap normally.

The workaround is gated on Device::workaroundDescriptorHeapAS (true only on
the NVIDIA proprietary driver) and confined to one fenced block in
Crafter.Graphics-ShaderVulkan.cppm plus the RTPass push and the shaderInt64
feature toggle — delete those once a fixed NVIDIA driver ships and the heap
AS read becomes the direct path again.

Verified: VulkanTriangle ray-traces correctly on native NVIDIA (RTX 4090),
validation-layer-clean, no device loss. The SPIR-V rewrite was independently
validated with spirv-val on both the VulkanTriangle and Sponza raygen
modules.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 01:59:54 +00:00

800 lines
No EOL
34 KiB
C++

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
module;
#include "vulkan/vulkan.h"
#include "vulkan/vk_enum_string_helper.h"
#define GET_EXTENSION_FUNCTION(_id) ((PFN_##_id)(vkGetInstanceProcAddr(instance, #_id)))
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
#include <linux/input-event-codes.h>
#include <xkbcommon/xkbcommon.h>
#include <string.h>
#include "../lib/xdg-shell-client-protocol.h"
#include "../lib/wayland-xdg-decoration-unstable-v1-client-protocol.h"
#include "../lib/fractional-scale-v1.h"
#include "../lib/viewporter.h"
#include <wayland-client.h>
#include <wayland-client-protocol.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#endif
module Crafter.Graphics:Device_impl;
import :Device;
import :Window;
import :Types;
import :Clipboard;
import std;
using namespace Crafter;
const char* const instanceExtensionNames[] = {
"VK_EXT_debug_utils",
"VK_KHR_surface",
#ifdef CRAFTER_GRAPHICS_WINDOW_WIN32
"VK_KHR_win32_surface"
#endif
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
"VK_KHR_wayland_surface"
#endif
};
const char* const deviceExtensionNames[] = {
"VK_KHR_swapchain",
"VK_KHR_spirv_1_4",
"VK_KHR_shader_float_controls",
"VK_KHR_acceleration_structure",
"VK_KHR_ray_tracing_pipeline",
"VK_KHR_ray_query",
"VK_EXT_shader_atomic_float",
"VK_EXT_descriptor_heap",
"VK_KHR_deferred_host_operations",
"VK_KHR_maintenance5",
"VK_KHR_shader_untyped_pointers",
"VK_EXT_device_fault"
};
const char* const layerNames[] = {
"VK_LAYER_KHRONOS_validation"
};
void Device::CheckVkResult(VkResult result) {
if (result != VK_SUCCESS)
{
if(result == VK_ERROR_DEVICE_LOST) {
VkDeviceFaultCountsEXT faultCounts = {
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT,
.pNext = NULL,
};
Device::vkGetDeviceFaultInfoEXT(device, &faultCounts, NULL);
std::vector<VkDeviceFaultAddressInfoEXT> addressInfos(faultCounts.addressInfoCount);
std::vector<VkDeviceFaultVendorInfoEXT> vendorInfos(faultCounts.vendorInfoCount);
std::vector<char> vendorBinaryData(faultCounts.vendorBinarySize);
VkDeviceFaultInfoEXT faultInfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT,
.pNext = NULL,
.pAddressInfos = addressInfos.data(),
.pVendorInfos = vendorInfos.data(),
.pVendorBinaryData = vendorBinaryData.data(),
};
Device::vkGetDeviceFaultInfoEXT(device, &faultCounts, &faultInfo);
std::println("{}", faultInfo.description);
std::println("{} AddressInfos:", addressInfos.size());
for(const VkDeviceFaultAddressInfoEXT& info : addressInfos) {
std::println("\t{} {}", static_cast<uint32_t>(info.addressType), info.reportedAddress);
}
std::println("{} vendorInfos:", vendorInfos.size());
for(const VkDeviceFaultVendorInfoEXT& info : vendorInfos) {
std::println("\t{} {} {}", info.description, info.vendorFaultCode, info.vendorFaultData);
}
if(!vendorBinaryData.empty()) {
std::string ext = ".bin";
if(vendorBinaryData.size() >= sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT)) {
VkDeviceFaultVendorBinaryHeaderVersionOneEXT header;
std::memcpy(&header, vendorBinaryData.data(), sizeof(header));
if(header.vendorID == 0x10DE) { // NVIDIA
ext = ".nv-gpudmp";
}
}
const auto now = std::chrono::system_clock::now();
const std::string dumpPath = std::format("gpu_crash_dump-{:%Y%m%d-%H%M%S}{}", now, ext);
std::ofstream file(dumpPath, std::ios::binary);
if(file.write(vendorBinaryData.data(), vendorBinaryData.size())) {
std::println("Vendor binary saved to: {}", std::filesystem::canonical(dumpPath).string());
} else {
std::println(stderr, "Failed to write vendor binary to: {}", dumpPath);
}
}
}
throw std::runtime_error(string_VkResult(result));
}
}
VkBool32 onError(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* callbackData, void* userData)
{
printf("Vulkan ");
switch (type)
{
case VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT :
printf("general ");
break;
case VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT :
printf("validation ");
break;
case VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT :
printf("performance ");
break;
}
switch (severity)
{
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT :
printf("(verbose): ");
break;
default :
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT :
printf("(info): ");
break;
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT :
printf("(warning): ");
break;
case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT :
printf("(error): ");
break;
}
printf("%s\n", callbackData->pMessage);
return 0;
}
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
void Device::xdg_wm_base_handle_ping(void* data, xdg_wm_base* xdg_wm_base, std::uint32_t serial) {
xdg_wm_base_pong(xdg_wm_base, serial);
}
void Device::handle_global(void *data, wl_registry *registry, std::uint32_t name, const char *interface, std::uint32_t version) {
if (strcmp(interface, wl_shm_interface.name) == 0) {
shm = reinterpret_cast<wl_shm*>(wl_registry_bind(registry, name, &wl_shm_interface, 1));
} else if (strcmp(interface, wl_seat_interface.name) == 0) {
// Assign to Device::seat (not a fresh local) so SetClipboardText
// and any other code that needs the seat post-init can find it.
seat = reinterpret_cast<wl_seat*>(wl_registry_bind(registry, name, &wl_seat_interface, 1));
wl_seat_add_listener(seat, &seat_listener, nullptr);
// If the manager came in first, the data device couldn't be
// created yet — do it now that we have the seat.
if (dataDeviceManager != nullptr && dataDevice == nullptr) {
dataDevice = wl_data_device_manager_get_data_device(dataDeviceManager, seat);
}
} else if (compositor == nullptr && strcmp(interface, wl_compositor_interface.name) == 0) {
compositor = reinterpret_cast<wl_compositor*>(wl_registry_bind(registry, name, &wl_compositor_interface, 3));
} else if (strcmp(interface, xdg_wm_base_interface.name) == 0) {
xdgWmBase = reinterpret_cast<xdg_wm_base*>(wl_registry_bind(registry, name, &xdg_wm_base_interface, 1));
xdg_wm_base_add_listener(xdgWmBase, &xdgWmBaseListener, nullptr);
} else if (strcmp(interface, zxdg_decoration_manager_v1_interface.name) == 0) {
manager = reinterpret_cast<zxdg_decoration_manager_v1*>(wl_registry_bind(registry, name, &zxdg_decoration_manager_v1_interface, 1));
} else if (strcmp(interface, wp_viewporter_interface.name) == 0) {
wpViewporter = reinterpret_cast<wp_viewporter*>(wl_registry_bind(registry, name, &wp_viewporter_interface, 1));
} else if (strcmp(interface, wp_fractional_scale_manager_v1_interface.name) == 0) {
fractionalScaleManager = reinterpret_cast<wp_fractional_scale_manager_v1*>(wl_registry_bind(registry, name, &wp_fractional_scale_manager_v1_interface, 1));
} else if (strcmp(interface, wl_data_device_manager_interface.name) == 0) {
// v3 gives us the full set of data_source events (target / send /
// cancelled / dnd_*). Universally supported by the compositors
// we target — fall back path is the per-source listener simply
// not getting the v3-only callbacks.
dataDeviceManager = reinterpret_cast<wl_data_device_manager*>(
wl_registry_bind(registry, name, &wl_data_device_manager_interface, 3));
if (seat != nullptr && dataDevice == nullptr) {
dataDevice = wl_data_device_manager_get_data_device(dataDeviceManager, seat);
}
}
}
void Device::handle_global_remove(void* data, wl_registry* registry, uint32_t name) {
}
void Device::pointer_handle_button(void* data, wl_pointer* pointer, std::uint32_t serial, std::uint32_t time, std::uint32_t button, std::uint32_t state) {
if (button == BTN_LEFT) {
if(state == WL_POINTER_BUTTON_STATE_PRESSED) {
Device::focusedWindow->mouseLeftHeld = true;
Device::focusedWindow->onMouseLeftClick.Invoke();
} else {
Device::focusedWindow->mouseLeftHeld = false;
Device::focusedWindow->onMouseLeftRelease.Invoke();
}
} else if(button == BTN_RIGHT){
if(state == WL_POINTER_BUTTON_STATE_PRESSED) {
Device::focusedWindow->mouseRightHeld = true;
Device::focusedWindow->onMouseRightClick.Invoke();
} else {
Device::focusedWindow->mouseRightHeld = false;
Device::focusedWindow->onMouseRightRelease.Invoke();
}
}
}
void Device::PointerListenerHandleMotion(void* data, wl_pointer* wl_pointer, std::uint32_t time, wl_fixed_t surface_x, wl_fixed_t surface_y) {
Vector<float, 2> pos(wl_fixed_to_double(surface_x), wl_fixed_to_double(surface_y));
//Device::focusedWindow->lastMousePos = Device::focusedWindow->currentMousePos;
Device::focusedWindow->currentMousePos = pos * Device::focusedWindow->scale;
//Device::focusedWindow->mouseDelta = {Device::focusedWindow->currentMousePos.x-Device::focusedWindow->lastMousePos.x, Device::focusedWindow->currentMousePos.y-Device::focusedWindow->lastMousePos.y};
Device::focusedWindow->onMouseMove.Invoke();
}
void Device::PointerListenerHandleEnter(void* data, wl_pointer* wl_pointer, std::uint32_t serial, wl_surface* surface, wl_fixed_t surface_x, wl_fixed_t surface_y) {
Device::wlPointer = wl_pointer;
for(Window* window : windows) {
if(window->surface == surface) {
window->lastPointerSerial_ = serial;
if(window->cursorSurface != nullptr) {
wl_pointer_set_cursor(wl_pointer, serial, window->cursorSurface,
window->cursorHotspotX_, window->cursorHotspotY_);
}
focusedWindow = window;
window->onMouseEnter.Invoke();
return;
}
}
}
void Device::PointerListenerHandleLeave(void* data, wl_pointer*, std::uint32_t, wl_surface*) {
Device::focusedWindow->onMouseLeave.Invoke();
focusedWindow = nullptr;
}
void Device::PointerListenerHandleAxis(void*, wl_pointer*, std::uint32_t, std::uint32_t, wl_fixed_t value) {
}
void Device::keyboard_keymap(void *data, wl_keyboard *keyboard, uint32_t format, int fd, uint32_t size) {
if (format != WL_KEYBOARD_KEYMAP_FORMAT_XKB_V1) {
close(fd);
fprintf(stderr, "Unsupported keymap format\n");
return;
}
void *map = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("mmap");
return;
}
xkb_context = xkb_context_new(XKB_CONTEXT_NO_FLAGS);
xkb_keymap = xkb_keymap_new_from_string(xkb_context, (const char *)map, XKB_KEYMAP_FORMAT_TEXT_V1,XKB_KEYMAP_COMPILE_NO_FLAGS);
munmap(map, size);
close(fd);
xkb_state = xkb_state_new(xkb_keymap);
}
void Device::keyboard_enter(void *data, wl_keyboard *keyboard, uint32_t serial, wl_surface *surface, wl_array *keys) {
}
void Device::keyboard_leave(void *data, wl_keyboard *keyboard, uint32_t serial, wl_surface *surface) {
}
void Device::keyboard_key(void *data, wl_keyboard *keyboard, uint32_t serial, uint32_t time, uint32_t key, uint32_t state) {
// `key` is the kernel input-event-code (KEY_*). That is exactly what
// :Keys returns for Wayland builds, so we store it verbatim with no
// translation. The +8 X11 offset is only needed for the XKB layer,
// which we still consult to produce UTF-8 text.
KeyCode code = key;
xkb_keycode_t xkbKeycode = key + 8;
if (state == WL_KEYBOARD_KEY_STATE_PRESSED) {
if (focusedWindow->heldKeys.contains(code)) {
focusedWindow->onRawKeyHold.Invoke(code);
} else {
focusedWindow->heldKeys.insert(code);
focusedWindow->onRawKeyDown.Invoke(code);
}
std::string buf;
buf.resize(16);
int n = xkb_state_key_get_utf8(xkb_state, xkbKeycode, buf.data(), 16);
std::string utf8;
if (n > 0 && (unsigned char)buf[0] >= 0x20 && buf[0] != 0x7f) {
buf.resize(n);
utf8 = buf;
focusedWindow->onTextInput.Invoke(utf8);
}
// Replace the active repeat with this key — most recent press wins,
// matching xkbcommon's typical behaviour and most desktop apps.
keyRepeat.active = (keyRepeat.rate > 0);
keyRepeat.key = code;
keyRepeat.utf8 = std::move(utf8);
keyRepeat.pressTime = std::chrono::steady_clock::now();
keyRepeat.lastFireTime = keyRepeat.pressTime;
} else {
focusedWindow->heldKeys.erase(code);
focusedWindow->onRawKeyUp.Invoke(code);
// If the released key was the one repeating, stop. Otherwise leave
// the existing repeat alone (user pressed/released a modifier
// mid-repeat etc.).
if (keyRepeat.active && keyRepeat.key == code) {
keyRepeat.active = false;
keyRepeat.utf8.clear();
}
}
}
void Device::keyboard_modifiers(void *data, wl_keyboard *keyboard, uint32_t serial, uint32_t mods_depressed, uint32_t mods_latched, uint32_t mods_locked, uint32_t group) {
xkb_state_update_mask(xkb_state, mods_depressed, mods_latched, mods_locked, 0, 0, group);
}
void Device::keyboard_repeat_info(void *data, wl_keyboard *keyboard, int32_t rate, int32_t delay) {
keyRepeat.rate = rate;
keyRepeat.delay = delay;
if (rate <= 0) keyRepeat.active = false; // compositor disabled repeat
}
void Device::TickKeyRepeats() {
if (!keyRepeat.active || !focusedWindow) return;
if (keyRepeat.rate <= 0) return;
auto now = std::chrono::steady_clock::now();
using ms = std::chrono::milliseconds;
auto sincePress = std::chrono::duration_cast<ms>(now - keyRepeat.pressTime).count();
if (sincePress < keyRepeat.delay) return;
auto period = std::chrono::milliseconds(1000 / keyRepeat.rate);
auto sinceLastFire = std::chrono::duration_cast<ms>(now - keyRepeat.lastFireTime).count();
if (sinceLastFire < period.count()) return;
// Catch up — emit one event per missed period so a paused frame doesn't
// make the repeat permanently lag behind.
while (now - keyRepeat.lastFireTime >= period) {
focusedWindow->onRawKeyDown.Invoke(keyRepeat.key);
focusedWindow->onRawKeyHold.Invoke(keyRepeat.key);
if (!keyRepeat.utf8.empty()) {
focusedWindow->onTextInput.Invoke(keyRepeat.utf8);
}
keyRepeat.lastFireTime += period;
}
}
void Device::seat_handle_capabilities(void* data, wl_seat* seat, uint32_t capabilities) {
seat = seat;
if (capabilities & WL_SEAT_CAPABILITY_POINTER) {
wl_pointer* pointer = wl_seat_get_pointer(seat);
wl_pointer_add_listener(pointer, &pointer_listener, nullptr);
}
if (capabilities & WL_SEAT_CAPABILITY_KEYBOARD) {
wl_keyboard* keyboard = wl_seat_get_keyboard(seat);
wl_keyboard_add_listener(keyboard, &keyboard_listener, nullptr);
}
}
#endif
void Device::Initialize() {
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
display = wl_display_connect(NULL);
if (display == nullptr) {
throw std::runtime_error("Could not connect to wayland display");
}
wl_registry* registry = wl_display_get_registry(display);
wl_registry_add_listener(registry, &registry_listener, nullptr);
if (wl_display_roundtrip(display) == -1) {
exit(EXIT_FAILURE);
}
if (shm == NULL || compositor == NULL || xdgWmBase == NULL) {
throw std::runtime_error("No wl_shm, wl_compositor or xdg_wm_base support");
}
// After the registry roundtrip the data_device (if the compositor
// exposes one) is bound. Clipboard::Initialize attaches the
// selection listener that Clipboard::GetText reads from; doing it
// before the first wl_data_device.selection arrives is what lets
// GetText work the instant a frame is rendered.
Clipboard::Initialize();
#endif
VkApplicationInfo app{VK_STRUCTURE_TYPE_APPLICATION_INFO};
app.pApplicationName = "";
app.pEngineName = "Crafter.Graphics";
app.apiVersion = VK_MAKE_VERSION(1, 4, 0);
// TODO(re-enable GPU-AV): once Vulkan SDK > 1.4.341 is the floor.
//
// GPU-Assisted Validation is opt-in via the enable list — leaving it
// out disables it. SDK 1.4.341's GPU-AV does not handle
// descriptor_heap pipelines (VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT
// with layout = VK_NULL_HANDLE): `PipelineSubState::GetPipelineLayoutUnion`
// null-derefs on the first dispatch/draw against such a pipeline.
//
// Tracked + fixed upstream:
// https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/12103
// Per spencer-lunarg (LunarG): broken in 1.4.341, fixed and landing
// in the next SDK release. Once we bump our Vulkan-Headers / SDK
// dependency past 1.4.341, restore the original enable list:
//
// VkValidationFeatureEnableEXT enables[] = {
// VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT
// };
// validationFeatures.enabledValidationFeatureCount = 1;
// validationFeatures.pEnabledValidationFeatures = enables;
//
// Standard validation (the layer itself) is still on; only the GPU-AV
// out-of-bounds / shader-instrumentation checks are temporarily off.
VkValidationFeaturesEXT validationFeatures = {
.sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
};
VkInstanceCreateInfo instanceCreateInfo = {};
instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instanceCreateInfo.pNext = &validationFeatures;
instanceCreateInfo.pApplicationInfo = &app;
instanceCreateInfo.enabledExtensionCount = sizeof(instanceExtensionNames) / sizeof(const char*);
instanceCreateInfo.ppEnabledExtensionNames = instanceExtensionNames;
size_t foundInstanceLayers = 0;
std::uint32_t instanceLayerCount;
CheckVkResult(vkEnumerateInstanceLayerProperties(&instanceLayerCount, NULL));
std::vector<VkLayerProperties> instanceLayerProperties(instanceLayerCount);
CheckVkResult(vkEnumerateInstanceLayerProperties(&instanceLayerCount, instanceLayerProperties.data()));
for (uint32_t i = 0; i < instanceLayerCount; i++)
{
for (size_t j = 0; j < sizeof(layerNames) / sizeof(const char*); j++)
{
if (std::strcmp(instanceLayerProperties[i].layerName, layerNames[j]) == 0)
{
foundInstanceLayers++;
}
}
}
if (foundInstanceLayers >= sizeof(layerNames) / sizeof(const char*))
{
instanceCreateInfo.enabledLayerCount = sizeof(layerNames) / sizeof(const char*);
instanceCreateInfo.ppEnabledLayerNames = layerNames;
}
CheckVkResult(vkCreateInstance(&instanceCreateInfo, NULL, &instance));
VkDebugUtilsMessengerCreateInfoEXT debugUtilsMessengerCreateInfo = {};
debugUtilsMessengerCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
debugUtilsMessengerCreateInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
debugUtilsMessengerCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
debugUtilsMessengerCreateInfo.pfnUserCallback = onError;
CheckVkResult(GET_EXTENSION_FUNCTION(vkCreateDebugUtilsMessengerEXT)(instance, &debugUtilsMessengerCreateInfo, NULL, &debugMessenger));
uint32_t physDeviceCount;
vkEnumeratePhysicalDevices(instance, &physDeviceCount, NULL);
std::vector<VkPhysicalDevice> physDevices(physDeviceCount);
vkEnumeratePhysicalDevices(instance, &physDeviceCount, physDevices.data());
uint32_t bestScore = 0;
for (uint32_t i = 0; i < physDeviceCount; i++)
{
VkPhysicalDevice device = physDevices[i];
uint32_t score;
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(device, &properties);
switch (properties.deviceType)
{
default :
continue;
case VK_PHYSICAL_DEVICE_TYPE_OTHER :
score = 1;
break;
case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU :
score = 4;
break;
case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
score = 5;
break;
case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU :
score = 3;
break;
case VK_PHYSICAL_DEVICE_TYPE_CPU :
score = 2;
break;
}
if (score > bestScore)
{
physDevice = device;
bestScore = score;
}
}
// Enumerate available device extensions so we can opt into
// VK_EXT_memory_decompression when the driver advertises it. Drivers
// without it (AMD, Intel as of early 2026) get the CPU-decode fallback.
{
std::uint32_t extCount = 0;
vkEnumerateDeviceExtensionProperties(physDevice, nullptr, &extCount, nullptr);
std::vector<VkExtensionProperties> exts(extCount);
vkEnumerateDeviceExtensionProperties(physDevice, nullptr, &extCount, exts.data());
for (const VkExtensionProperties& e : exts) {
if (std::strcmp(e.extensionName, VK_EXT_MEMORY_DECOMPRESSION_EXTENSION_NAME) == 0) {
memoryDecompressionSupported = true;
break;
}
}
}
// Properties query: chain memory-decompression props only when supported,
// otherwise sType validation flags it as an unrecognized struct on
// drivers that don't expose the extension.
if (memoryDecompressionSupported) {
memoryDecompressionProperties.pNext = const_cast<void*>(rayTracingProperties.pNext);
rayTracingProperties.pNext = &memoryDecompressionProperties;
}
// Chain driver properties onto the tail of the query so we can detect
// the NVIDIA proprietary driver for the descriptor-heap AS-read
// workaround (issue #15 / #7).
descriptorHeapProperties.pNext = &driverProperties;
VkPhysicalDeviceProperties2 properties2 {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &rayTracingProperties
};
vkGetPhysicalDeviceProperties2(physDevice, &properties2);
// NVIDIA's brand-new VK_EXT_descriptor_heap acceleration-structure read
// path faults (see #7); enable the SPIR-V rewrite workaround there. Other
// drivers (and any future fixed NVIDIA driver, once this gate is removed)
// take the normal heap-bound AS path unchanged.
workaroundDescriptorHeapAS = (driverProperties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY);
// Sanity-gate: GDeflate 1.0 must actually be in the supported method set.
if (memoryDecompressionSupported &&
(memoryDecompressionProperties.decompressionMethods & VK_MEMORY_DECOMPRESSION_METHOD_GDEFLATE_1_0_BIT_EXT) == 0) {
memoryDecompressionSupported = false;
}
uint32_t queueFamilyCount;
vkGetPhysicalDeviceQueueFamilyProperties(physDevice, &queueFamilyCount, NULL);
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(physDevice, &queueFamilyCount, queueFamilies.data());
for (uint32_t i = 0; i < queueFamilyCount; i++)
{
if (queueFamilies[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)
{
queueFamilyIndex = i;
break;
}
}
float priority = 1;
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = queueFamilyIndex;
queueCreateInfo.queueCount = 1;
queueCreateInfo.pQueuePriorities = &priority;
VkPhysicalDeviceFaultFeaturesEXT faultFeatures2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT,
};
VkPhysicalDeviceFeatures2 features22 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &faultFeatures2,
};
vkGetPhysicalDeviceFeatures2(physDevice, &features22);
VkPhysicalDeviceFaultFeaturesEXT faultFeatures = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT,
.deviceFault = VK_TRUE,
.deviceFaultVendorBinary = faultFeatures2.deviceFaultVendorBinary,
};
VkPhysicalDeviceShaderUntypedPointersFeaturesKHR untypedPointersFeatures {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_UNTYPED_POINTERS_FEATURES_KHR,
.pNext = &faultFeatures,
.shaderUntypedPointers = VK_TRUE,
};
// Enables synchronization2 sentinels (VkMemoryBarrier2, VK_PIPELINE_STAGE_2_*,
// VK_ACCESS_2_*) — required for VK_EXT_memory_decompression's sync tokens.
VkPhysicalDeviceVulkan13Features features13 {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
.pNext = &untypedPointersFeatures,
.synchronization2 = VK_TRUE,
};
VkPhysicalDeviceMemoryDecompressionFeaturesEXT memoryDecompressionFeatures {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_FEATURES_EXT,
.pNext = &features13,
.memoryDecompression = VK_TRUE,
};
void* postDecompressChain = memoryDecompressionSupported
? static_cast<void*>(&memoryDecompressionFeatures)
: static_cast<void*>(&features13);
VkPhysicalDeviceDescriptorHeapFeaturesEXT desciptorHeapFeatures {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_HEAP_FEATURES_EXT,
.pNext = postDecompressChain,
.descriptorHeap = VK_TRUE,
};
VkPhysicalDevice16BitStorageFeatures bit16 {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
.pNext = &desciptorHeapFeatures,
.storageBuffer16BitAccess = VK_TRUE,
};
VkPhysicalDeviceVulkan12Features features12 {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
.pNext = &bit16,
.shaderFloat16 = VK_TRUE,
// Bindless / runtime descriptor array indexing — needed for the
// descriptor_heap shader path.
.shaderUniformBufferArrayNonUniformIndexing = VK_TRUE,
.shaderSampledImageArrayNonUniformIndexing = VK_TRUE,
.shaderStorageBufferArrayNonUniformIndexing = VK_TRUE,
.shaderStorageImageArrayNonUniformIndexing = VK_TRUE,
.runtimeDescriptorArray = VK_TRUE,
.scalarBlockLayout = VK_TRUE,
.bufferDeviceAddress = VK_TRUE
};
VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR,
.pNext = &features12,
.rayQuery = VK_TRUE
};
VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR,
.pNext = &physicalDeviceRayQueryFeatures,
.rayTracingPipeline = VK_TRUE
};
VkPhysicalDeviceAccelerationStructureFeaturesKHR deviceAccelerationStructureFeature = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR,
.pNext = &physicalDeviceRayTracingPipelineFeatures,
.accelerationStructure = VK_TRUE
};
VkPhysicalDeviceFeatures2 physical_features2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &deviceAccelerationStructureFeature,
.features = {
// Order matches VkPhysicalDeviceFeatures declaration so the
// designated-initializer-order warning stays quiet.
.samplerAnisotropy = VK_TRUE,
.shaderStorageImageReadWithoutFormat = VK_TRUE,
.shaderStorageImageWriteWithoutFormat = VK_TRUE,
// Bindless dynamic indexing — required to index `images[]`,
// `textures[]`, `samplers[]`, `itemHeap[]` with a runtime value.
.shaderSampledImageArrayDynamicIndexing = VK_TRUE,
.shaderStorageBufferArrayDynamicIndexing = VK_TRUE,
.shaderStorageImageArrayDynamicIndexing = VK_TRUE,
// shaderInt64: only needed for the NVIDIA descriptor-heap AS-read
// workaround (issue #15 / #7), which loads the TLAS device address
// as a 64-bit push constant. Gated so it isn't required on drivers
// that don't take the workaround path. Remove with the workaround.
.shaderInt64 = workaroundDescriptorHeapAS ? VK_TRUE : VK_FALSE,
.shaderInt16 = VK_TRUE
}
};
// Build the enabled-extension list dynamically so we can append the
// optional VK_EXT_memory_decompression entry only when the driver
// advertises it.
std::vector<const char*> enabledDeviceExtensions(
std::begin(deviceExtensionNames),
std::end(deviceExtensionNames));
if (memoryDecompressionSupported) {
enabledDeviceExtensions.push_back(VK_EXT_MEMORY_DECOMPRESSION_EXTENSION_NAME);
}
VkDeviceCreateInfo deviceCreateInfo = {};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
deviceCreateInfo.enabledExtensionCount = static_cast<std::uint32_t>(enabledDeviceExtensions.size());
deviceCreateInfo.ppEnabledExtensionNames = enabledDeviceExtensions.data();
deviceCreateInfo.pNext = &physical_features2;
// Device layers are deprecated and have been ignored since Vulkan 1.0;
// enabling them is a validation error. Layers are enabled at instance
// creation only, so leave enabledLayerCount at 0.
deviceCreateInfo.enabledLayerCount = 0;
deviceCreateInfo.ppEnabledLayerNames = nullptr;
CheckVkResult(vkCreateDevice(physDevice, &deviceCreateInfo, NULL, &device));
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
VkCommandPoolCreateInfo commandPoolcreateInfo = {};
commandPoolcreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
commandPoolcreateInfo.queueFamilyIndex = queueFamilyIndex;
commandPoolcreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
CheckVkResult(vkCreateCommandPool(device, &commandPoolcreateInfo, NULL, &commandPool));
vkGetPhysicalDeviceMemoryProperties(physDevice, &memoryProperties);
vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast<PFN_vkGetAccelerationStructureBuildSizesKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureBuildSizesKHR"));
vkCreateAccelerationStructureKHR = reinterpret_cast<PFN_vkCreateAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkCreateAccelerationStructureKHR"));
vkDestroyAccelerationStructureKHR = reinterpret_cast<PFN_vkDestroyAccelerationStructureKHR>(vkGetInstanceProcAddr(instance, "vkDestroyAccelerationStructureKHR"));
vkCmdBuildAccelerationStructuresKHR = reinterpret_cast<PFN_vkCmdBuildAccelerationStructuresKHR>(vkGetInstanceProcAddr(instance, "vkCmdBuildAccelerationStructuresKHR"));
vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(vkGetInstanceProcAddr(instance, "vkGetAccelerationStructureDeviceAddressKHR"));
vkCreateRayTracingPipelinesKHR = reinterpret_cast<PFN_vkCreateRayTracingPipelinesKHR>(vkGetInstanceProcAddr(instance, "vkCreateRayTracingPipelinesKHR"));
vkGetRayTracingShaderGroupHandlesKHR = reinterpret_cast<PFN_vkGetRayTracingShaderGroupHandlesKHR>(vkGetInstanceProcAddr(instance, "vkGetRayTracingShaderGroupHandlesKHR"));
vkCmdTraceRaysKHR = reinterpret_cast<PFN_vkCmdTraceRaysKHR>(vkGetInstanceProcAddr(instance, "vkCmdTraceRaysKHR"));
vkCmdBindResourceHeapEXT = reinterpret_cast<PFN_vkCmdBindResourceHeapEXT>(vkGetInstanceProcAddr(instance, "vkCmdBindResourceHeapEXT"));
vkCmdBindSamplerHeapEXT = reinterpret_cast<PFN_vkCmdBindSamplerHeapEXT>(vkGetInstanceProcAddr(instance, "vkCmdBindSamplerHeapEXT"));
vkWriteResourceDescriptorsEXT = reinterpret_cast<PFN_vkWriteResourceDescriptorsEXT>(vkGetInstanceProcAddr(instance, "vkWriteResourceDescriptorsEXT"));
vkWriteSamplerDescriptorsEXT = reinterpret_cast<PFN_vkWriteSamplerDescriptorsEXT>(vkGetInstanceProcAddr(instance, "vkWriteSamplerDescriptorsEXT"));
vkCmdPushDataEXT = reinterpret_cast<PFN_vkCmdPushDataEXT>(vkGetInstanceProcAddr(instance, "vkCmdPushDataEXT"));
vkGetPhysicalDeviceDescriptorSizeEXT = reinterpret_cast<PFN_vkGetPhysicalDeviceDescriptorSizeEXT>(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceDescriptorSizeEXT"));
vkGetDeviceFaultInfoEXT = reinterpret_cast<PFN_vkGetDeviceFaultInfoEXT>(vkGetInstanceProcAddr(instance, "vkGetDeviceFaultInfoEXT"));
if (memoryDecompressionSupported) {
// vkGetDeviceProcAddr skips the loader trampoline that vkGetInstanceProcAddr
// requires for device-level functions. The other PFNs above predate this
// realization; opportunistic adoption for new entry points only.
vkCmdDecompressMemoryEXT = reinterpret_cast<PFN_vkCmdDecompressMemoryEXT>(
vkGetDeviceProcAddr(device, "vkCmdDecompressMemoryEXT"));
if (vkCmdDecompressMemoryEXT == nullptr) {
// Driver advertised the extension but didn't expose the entry
// point — defensively fall back to CPU decode.
memoryDecompressionSupported = false;
}
}
}
std::uint32_t Device::GetMemoryType(uint32_t typeBits, VkMemoryPropertyFlags properties) {
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++)
{
if ((typeBits & 1) == 1)
{
if ((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
{
return i;
}
}
typeBits >>= 1;
}
throw std::runtime_error("Could not find a matching memory type");
}