Crafter.Graphics/interfaces/Crafter.Graphics-Device.cppm
catbot 1c310762a7 fix(vulkan-rt): configurable recursion depth + per-shader TLAS push for compute (#21)
Two gaps in the Vulkan RT path that fault the device on the NVIDIA
proprietary driver with a non-trivial pipeline (simple VulkanTriangle
never hit them):

1. maxPipelineRayRecursionDepth was hardcoded to 1, so any closest-hit
   shader that traces a secondary ray (shadow ray — a very common
   pattern) recursed past the pipeline limit (UB → device fault).
   PipelineRTVulkan::Init now takes a maxRecursionDepth parameter
   (default 1, clamped to the device's maxRayRecursionDepth).

2. The NVIDIA descriptor-heap AS-read workaround rewrites every shader
   that reads an accelerationStructureEXT from the heap — including
   compute shaders — to read the TLAS device address from a push
   constant, but only RTPass pushed that address. A compute shader that
   ray-queries the TLAS (rayQueryEXT) therefore ran against an unwritten
   push slot → garbage AS handle → VK_ERROR_DEVICE_LOST.

   WorkaroundNvidiaAS::Patch now returns a per-shader PatchResult
   {patched, tlasPushOffset} instead of writing the clobber-prone global
   Device::workaroundTlasPushOffset (removed). VulkanShader stores it;
   ShaderBindingTableVulkan/PipelineRTVulkan carry it for RTPass, and
   ComputeShader tracks its own offset and pushes the caller-supplied
   TLAS address in Dispatch (new defaulted tlasAddress parameter),
   mirroring RTPass::Record.

The PushConstantRewrite regression test now asserts Patch's returned
patched/offset and adds two ray-querying compute-shader cases, proving
the rewrite is stage-agnostic and the per-shader offset is correct.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 18:35:39 +00:00

212 lines
No EOL
12 KiB
C++

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
module;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
#include "vulkan/vulkan.h"
#endif
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
#include <wayland-client.h>
#include <wayland-client-protocol.h>
#include "../lib/xdg-shell-client-protocol.h"
#include "../lib/wayland-xdg-decoration-unstable-v1-client-protocol.h"
#include "../lib/fractional-scale-v1.h"
#include "../lib/viewporter.h"
#include <xkbcommon/xkbcommon.h>
#endif
export module Crafter.Graphics:Device;
import std;
import :Keys; // KeyCode for keyboard repeat state
export namespace Crafter {
struct Window;
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
// Wayland's wl_keyboard.key only fires on real press/release — the
// compositor expects the application to synthesize repeat events
// itself using the rate/delay it advertises via wl_keyboard.repeat_info.
struct KeyRepeatState {
int rate = 25; // chars/sec
int delay = 500; // ms before first repeat
bool active = false;
KeyCode key = 0;
std::string utf8; // UTF-8 to re-emit as onTextInput, if any
std::chrono::time_point<std::chrono::steady_clock> pressTime;
std::chrono::time_point<std::chrono::steady_clock> lastFireTime;
};
#endif
struct Device {
static void Initialize();
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
inline static wl_display* display = nullptr;
inline static wl_seat* seat = nullptr;
inline static xdg_wm_base* xdgWmBase = nullptr;
inline static wp_fractional_scale_manager_v1* fractionalScaleManager = nullptr;
inline static zxdg_decoration_manager_v1* manager = nullptr;
inline static xkb_keymap* xkb_keymap;
inline static wl_compositor* compositor = nullptr;
inline static Window* focusedWindow = nullptr;
inline static wl_shm* shm = nullptr;
inline static wp_viewporter* wpViewporter = nullptr;
inline static xkb_context* xkb_context = xkb_context_new(XKB_CONTEXT_NO_FLAGS);
inline static xkb_state* xkb_state;
inline static std::vector<Window*> windows;
inline static wl_pointer* wlPointer;
// wl_data_device_manager + wl_data_device drive copy/paste. Bound
// lazily in handle_global; the data device is created once both
// the manager and the seat are present (registry binding order
// isn't guaranteed). nullptr on compositors that don't expose
// the manager — Clipboard::SetText silently no-ops there.
inline static wl_data_device_manager* dataDeviceManager = nullptr;
inline static wl_data_device* dataDevice = nullptr;
static void seat_handle_capabilities(void* data, wl_seat* seat, uint32_t capabilities);
static void xdg_surface_handle_preferred_scale(void* data, wp_fractional_scale_v1*, std::uint32_t scale);
static void xdg_wm_base_handle_ping(void* data, xdg_wm_base* xdg_wm_base, std::uint32_t serial);
static void handle_global(void* data, wl_registry* registry, std::uint32_t name, const char* interface, std::uint32_t version);
static void handle_global_remove(void* data, wl_registry* registry, uint32_t name);
static void keyboard_keymap(void* data, wl_keyboard* keyboard, uint32_t format, int fd, uint32_t size);
static void keyboard_enter(void *data, wl_keyboard *keyboard, uint32_t serial, wl_surface *surface, wl_array *keys);
static void keyboard_leave(void *data, wl_keyboard *keyboard, uint32_t serial, wl_surface *surface);
static void keyboard_key(void *data, wl_keyboard *keyboard, uint32_t serial, uint32_t time, uint32_t key, uint32_t state);
static void keyboard_modifiers(void *data, wl_keyboard *keyboard, uint32_t serial, uint32_t mods_depressed, uint32_t mods_latched, uint32_t mods_locked, uint32_t group);
static void keyboard_repeat_info(void *data, wl_keyboard *keyboard, int32_t rate, int32_t delay);
static void pointer_handle_button(void* data, wl_pointer* pointer, std::uint32_t serial, std::uint32_t time, std::uint32_t button, std::uint32_t state);
static void PointerListenerHandleMotion(void* data, wl_pointer* wl_pointer, std::uint32_t time, wl_fixed_t surface_x, wl_fixed_t surface_y);
static void PointerListenerHandleAxis(void*, wl_pointer*, std::uint32_t, std::uint32_t, wl_fixed_t value);
static void PointerListenerHandleEnter(void* data, wl_pointer* wl_pointer, std::uint32_t serial, wl_surface* surface, wl_fixed_t surface_x, wl_fixed_t surface_y);
static void PointerListenerHandleLeave(void*, wl_pointer*, std::uint32_t, wl_surface*);
constexpr static wl_pointer_listener pointer_listener = {
.enter = PointerListenerHandleEnter,
.leave = PointerListenerHandleLeave,
.motion = PointerListenerHandleMotion,
.button = pointer_handle_button,
.axis = PointerListenerHandleAxis,
};
constexpr static wl_keyboard_listener keyboard_listener = {
.keymap = keyboard_keymap,
.enter = keyboard_enter,
.leave = keyboard_leave,
.key = keyboard_key,
.modifiers = keyboard_modifiers,
.repeat_info = keyboard_repeat_info,
};
constexpr static wl_seat_listener seat_listener = {
.capabilities = seat_handle_capabilities,
};
constexpr static wl_registry_listener registry_listener = {
.global = handle_global,
.global_remove = handle_global_remove,
};
constexpr static xdg_wm_base_listener xdgWmBaseListener = {
.ping = xdg_wm_base_handle_ping,
};
#endif
inline static VkInstance instance = VK_NULL_HANDLE;
inline static VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE;
inline static VkPhysicalDevice physDevice = VK_NULL_HANDLE;
inline static VkDevice device = VK_NULL_HANDLE;
inline static std::uint32_t queueFamilyIndex = 0;
inline static VkQueue queue = VK_NULL_HANDLE;
inline static VkCommandPool commandPool = VK_NULL_HANDLE;
inline static VkSwapchainKHR swapchain = VK_NULL_HANDLE;
inline static PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR;
inline static PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR;
inline static PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR;
inline static PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR;
inline static PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR;
inline static PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR;
inline static PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR;
inline static PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR;
inline static PFN_vkCmdBindResourceHeapEXT vkCmdBindResourceHeapEXT;
inline static PFN_vkCmdBindSamplerHeapEXT vkCmdBindSamplerHeapEXT;
inline static PFN_vkWriteResourceDescriptorsEXT vkWriteResourceDescriptorsEXT;
inline static PFN_vkWriteSamplerDescriptorsEXT vkWriteSamplerDescriptorsEXT;
inline static PFN_vkCmdPushDataEXT vkCmdPushDataEXT;
inline static PFN_vkGetPhysicalDeviceDescriptorSizeEXT vkGetPhysicalDeviceDescriptorSizeEXT;
inline static PFN_vkGetDeviceFaultInfoEXT vkGetDeviceFaultInfoEXT;
// VK_EXT_memory_decompression — opt-in. When the driver advertises it
// and exposes the GDeflate 1.0 method, GPU asset decompression is
// available; otherwise consumers fall back to CPU decode.
inline static bool memoryDecompressionSupported = false;
inline static PFN_vkCmdDecompressMemoryEXT vkCmdDecompressMemoryEXT = nullptr;
inline static VkPhysicalDeviceMemoryProperties memoryProperties;
inline static VkPhysicalDeviceDescriptorHeapPropertiesEXT descriptorHeapProperties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_HEAP_PROPERTIES_EXT
};
inline static VkPhysicalDeviceRayTracingPipelinePropertiesKHR rayTracingProperties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR,
.pNext = &descriptorHeapProperties
};
inline static VkPhysicalDeviceMemoryDecompressionPropertiesEXT memoryDecompressionProperties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_PROPERTIES_EXT
};
inline static VkPhysicalDeviceDriverProperties driverProperties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES
};
// ─── NVIDIA descriptor-heap AS-read workaround (issue #15 / #7) ──
// True only on the NVIDIA proprietary driver, where reading an
// acceleration structure through VK_EXT_descriptor_heap aborts with
// VK_ERROR_DEVICE_LOST (a brand-new-extension driver fault, verified
// engine-clean in #7). When set, VulkanShader rewrites heap AS reads
// into a TLAS-device-address + OpConvertUToAccelerationStructureKHR
// path and RTPass pushes the active TLAS address as push data. Delete
// this flag and everything keyed on it once a fixed driver ships.
inline static bool workaroundDescriptorHeapAS = false;
// The byte offset of the TLAS-address member inside a patched shader's
// push-constant block is tracked per-shader (VulkanShader::tlasPushOffset),
// not here: a single global is clobbered by whichever shader was patched
// last and so cannot serve several shaders with differing push layouts
// (e.g. an RT raygen and a ray-querying compute shader). RTPass and
// ComputeShader read the offset off the pipeline they record.
static void CheckVkResult(VkResult result);
static std::uint32_t GetMemoryType(std::uint32_t typeBits, VkMemoryPropertyFlags properties);
// ─── Wayland key repeat ────────────────────────────────────────
// TickKeyRepeats fires onRawKeyDown / onRawKeyHold / onTextInput on
// the focused window for whichever key is currently repeating.
// Called once per frame from Window::Render. KeyRepeatState lives
// at namespace scope so its member initializers don't trip C++'s
// "complete-type-needed" rule for the inline static below.
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
inline static KeyRepeatState keyRepeat;
static void TickKeyRepeats();
#else
static void TickKeyRepeats() {}
#endif
#else // CRAFTER_GRAPHICS_WINDOW_DOM
// DOM mode: Device collapses to just `Initialize()` (currently a
// no-op since the JS runtime initializes itself). The function is
// kept so user code calling `Device::Initialize()` still compiles
// cross-platform. Browser key repeat is delivered through the
// KeyboardEvent.repeat flag directly — no manual synthesis.
static void TickKeyRepeats() {}
#endif
};
}