The NVIDIA descriptor-heap AS-read workaround (#15) rewrote heap acceleration-structure reads into a load of the TLAS device address from a push-constant block. It always *synthesized a new* push-constant block, so any ray-tracing shader that already declared one ended up with two — which SPIR-V forbids ("at most one push constant block statically used per entry point"), and vkCreateShaderModule's spirv-val check rejected: Entry point id '4' uses more than one PushConstant interface. WorkaroundNvidiaAS::Patch now detects an existing PushConstant variable and, when present, appends a single ulong member (the TLAS address) to that block instead of adding a second one, reading the address through the shader's own push-constant variable. The append offset is the end of the user's block, computed from the members' explicit Offset/ArrayStride/ MatrixStride decorations (correct under both scalar and std140 layout) and rounded up to 8. Shaders with no push constant of their own keep getting a freshly synthesized single-member block at offset 0, exactly as before. That offset is published via Device::workaroundTlasPushOffset and RTPass feeds it to vkCmdPushDataEXT so the address lands where the rewritten load reads it (0 for the synthesized case, preserving prior behaviour). Verified on the affected driver (NVIDIA 610.43.02, RTX 4090): VulkanTriangle ray-traces correctly and validation-clean both with and without a user-declared raygen push constant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
212 lines
No EOL
12 KiB
C++
212 lines
No EOL
12 KiB
C++
/*
|
|
Crafter®.Graphics
|
|
Copyright (C) 2026 Catcrafts®
|
|
catcrafts.net
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License version 3.0 as published by the Free Software Foundation;
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
module;
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
|
#include "vulkan/vulkan.h"
|
|
#endif
|
|
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
|
|
#include <wayland-client.h>
|
|
#include <wayland-client-protocol.h>
|
|
#include "../lib/xdg-shell-client-protocol.h"
|
|
#include "../lib/wayland-xdg-decoration-unstable-v1-client-protocol.h"
|
|
#include "../lib/fractional-scale-v1.h"
|
|
#include "../lib/viewporter.h"
|
|
#include <xkbcommon/xkbcommon.h>
|
|
#endif
|
|
export module Crafter.Graphics:Device;
|
|
import std;
|
|
import :Keys; // KeyCode for keyboard repeat state
|
|
|
|
export namespace Crafter {
|
|
struct Window;
|
|
|
|
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
|
|
// Wayland's wl_keyboard.key only fires on real press/release — the
|
|
// compositor expects the application to synthesize repeat events
|
|
// itself using the rate/delay it advertises via wl_keyboard.repeat_info.
|
|
struct KeyRepeatState {
|
|
int rate = 25; // chars/sec
|
|
int delay = 500; // ms before first repeat
|
|
bool active = false;
|
|
KeyCode key = 0;
|
|
std::string utf8; // UTF-8 to re-emit as onTextInput, if any
|
|
std::chrono::time_point<std::chrono::steady_clock> pressTime;
|
|
std::chrono::time_point<std::chrono::steady_clock> lastFireTime;
|
|
};
|
|
#endif
|
|
|
|
struct Device {
|
|
static void Initialize();
|
|
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
|
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
|
|
inline static wl_display* display = nullptr;
|
|
inline static wl_seat* seat = nullptr;
|
|
inline static xdg_wm_base* xdgWmBase = nullptr;
|
|
inline static wp_fractional_scale_manager_v1* fractionalScaleManager = nullptr;
|
|
inline static zxdg_decoration_manager_v1* manager = nullptr;
|
|
inline static xkb_keymap* xkb_keymap;
|
|
inline static wl_compositor* compositor = nullptr;
|
|
inline static Window* focusedWindow = nullptr;
|
|
inline static wl_shm* shm = nullptr;
|
|
inline static wp_viewporter* wpViewporter = nullptr;
|
|
inline static xkb_context* xkb_context = xkb_context_new(XKB_CONTEXT_NO_FLAGS);
|
|
inline static xkb_state* xkb_state;
|
|
inline static std::vector<Window*> windows;
|
|
inline static wl_pointer* wlPointer;
|
|
// wl_data_device_manager + wl_data_device drive copy/paste. Bound
|
|
// lazily in handle_global; the data device is created once both
|
|
// the manager and the seat are present (registry binding order
|
|
// isn't guaranteed). nullptr on compositors that don't expose
|
|
// the manager — Clipboard::SetText silently no-ops there.
|
|
inline static wl_data_device_manager* dataDeviceManager = nullptr;
|
|
inline static wl_data_device* dataDevice = nullptr;
|
|
|
|
static void seat_handle_capabilities(void* data, wl_seat* seat, uint32_t capabilities);
|
|
static void xdg_surface_handle_preferred_scale(void* data, wp_fractional_scale_v1*, std::uint32_t scale);
|
|
static void xdg_wm_base_handle_ping(void* data, xdg_wm_base* xdg_wm_base, std::uint32_t serial);
|
|
static void handle_global(void* data, wl_registry* registry, std::uint32_t name, const char* interface, std::uint32_t version);
|
|
static void handle_global_remove(void* data, wl_registry* registry, uint32_t name);
|
|
static void keyboard_keymap(void* data, wl_keyboard* keyboard, uint32_t format, int fd, uint32_t size);
|
|
static void keyboard_enter(void *data, wl_keyboard *keyboard, uint32_t serial, wl_surface *surface, wl_array *keys);
|
|
static void keyboard_leave(void *data, wl_keyboard *keyboard, uint32_t serial, wl_surface *surface);
|
|
static void keyboard_key(void *data, wl_keyboard *keyboard, uint32_t serial, uint32_t time, uint32_t key, uint32_t state);
|
|
static void keyboard_modifiers(void *data, wl_keyboard *keyboard, uint32_t serial, uint32_t mods_depressed, uint32_t mods_latched, uint32_t mods_locked, uint32_t group);
|
|
static void keyboard_repeat_info(void *data, wl_keyboard *keyboard, int32_t rate, int32_t delay);
|
|
static void pointer_handle_button(void* data, wl_pointer* pointer, std::uint32_t serial, std::uint32_t time, std::uint32_t button, std::uint32_t state);
|
|
static void PointerListenerHandleMotion(void* data, wl_pointer* wl_pointer, std::uint32_t time, wl_fixed_t surface_x, wl_fixed_t surface_y);
|
|
static void PointerListenerHandleAxis(void*, wl_pointer*, std::uint32_t, std::uint32_t, wl_fixed_t value);
|
|
static void PointerListenerHandleEnter(void* data, wl_pointer* wl_pointer, std::uint32_t serial, wl_surface* surface, wl_fixed_t surface_x, wl_fixed_t surface_y);
|
|
static void PointerListenerHandleLeave(void*, wl_pointer*, std::uint32_t, wl_surface*);
|
|
|
|
constexpr static wl_pointer_listener pointer_listener = {
|
|
.enter = PointerListenerHandleEnter,
|
|
.leave = PointerListenerHandleLeave,
|
|
.motion = PointerListenerHandleMotion,
|
|
.button = pointer_handle_button,
|
|
.axis = PointerListenerHandleAxis,
|
|
};
|
|
constexpr static wl_keyboard_listener keyboard_listener = {
|
|
.keymap = keyboard_keymap,
|
|
.enter = keyboard_enter,
|
|
.leave = keyboard_leave,
|
|
.key = keyboard_key,
|
|
.modifiers = keyboard_modifiers,
|
|
.repeat_info = keyboard_repeat_info,
|
|
};
|
|
constexpr static wl_seat_listener seat_listener = {
|
|
.capabilities = seat_handle_capabilities,
|
|
};
|
|
constexpr static wl_registry_listener registry_listener = {
|
|
.global = handle_global,
|
|
.global_remove = handle_global_remove,
|
|
};
|
|
constexpr static xdg_wm_base_listener xdgWmBaseListener = {
|
|
.ping = xdg_wm_base_handle_ping,
|
|
};
|
|
#endif
|
|
|
|
inline static VkInstance instance = VK_NULL_HANDLE;
|
|
inline static VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE;
|
|
inline static VkPhysicalDevice physDevice = VK_NULL_HANDLE;
|
|
inline static VkDevice device = VK_NULL_HANDLE;
|
|
inline static std::uint32_t queueFamilyIndex = 0;
|
|
inline static VkQueue queue = VK_NULL_HANDLE;
|
|
inline static VkCommandPool commandPool = VK_NULL_HANDLE;
|
|
inline static VkSwapchainKHR swapchain = VK_NULL_HANDLE;
|
|
inline static PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR;
|
|
inline static PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR;
|
|
inline static PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR;
|
|
inline static PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR;
|
|
inline static PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR;
|
|
inline static PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR;
|
|
inline static PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR;
|
|
inline static PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR;
|
|
inline static PFN_vkCmdBindResourceHeapEXT vkCmdBindResourceHeapEXT;
|
|
inline static PFN_vkCmdBindSamplerHeapEXT vkCmdBindSamplerHeapEXT;
|
|
inline static PFN_vkWriteResourceDescriptorsEXT vkWriteResourceDescriptorsEXT;
|
|
inline static PFN_vkWriteSamplerDescriptorsEXT vkWriteSamplerDescriptorsEXT;
|
|
inline static PFN_vkCmdPushDataEXT vkCmdPushDataEXT;
|
|
inline static PFN_vkGetPhysicalDeviceDescriptorSizeEXT vkGetPhysicalDeviceDescriptorSizeEXT;
|
|
inline static PFN_vkGetDeviceFaultInfoEXT vkGetDeviceFaultInfoEXT;
|
|
|
|
// VK_EXT_memory_decompression — opt-in. When the driver advertises it
|
|
// and exposes the GDeflate 1.0 method, GPU asset decompression is
|
|
// available; otherwise consumers fall back to CPU decode.
|
|
inline static bool memoryDecompressionSupported = false;
|
|
inline static PFN_vkCmdDecompressMemoryEXT vkCmdDecompressMemoryEXT = nullptr;
|
|
|
|
inline static VkPhysicalDeviceMemoryProperties memoryProperties;
|
|
|
|
inline static VkPhysicalDeviceDescriptorHeapPropertiesEXT descriptorHeapProperties = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_HEAP_PROPERTIES_EXT
|
|
};
|
|
inline static VkPhysicalDeviceRayTracingPipelinePropertiesKHR rayTracingProperties = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR,
|
|
.pNext = &descriptorHeapProperties
|
|
};
|
|
inline static VkPhysicalDeviceMemoryDecompressionPropertiesEXT memoryDecompressionProperties = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_PROPERTIES_EXT
|
|
};
|
|
inline static VkPhysicalDeviceDriverProperties driverProperties = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES
|
|
};
|
|
|
|
// ─── NVIDIA descriptor-heap AS-read workaround (issue #15 / #7) ──
|
|
// True only on the NVIDIA proprietary driver, where reading an
|
|
// acceleration structure through VK_EXT_descriptor_heap aborts with
|
|
// VK_ERROR_DEVICE_LOST (a brand-new-extension driver fault, verified
|
|
// engine-clean in #7). When set, VulkanShader rewrites heap AS reads
|
|
// into a TLAS-device-address + OpConvertUToAccelerationStructureKHR
|
|
// path and RTPass pushes the active TLAS address as push data. Delete
|
|
// this flag and everything keyed on it once a fixed driver ships.
|
|
inline static bool workaroundDescriptorHeapAS = false;
|
|
// Byte offset of the TLAS-address member inside the patched raygen's
|
|
// push-constant block — 0 for a freshly synthesized block, or the end
|
|
// of the user's own block when the address is appended to it (the
|
|
// shader can't have two push-constant blocks). VulkanShader sets this
|
|
// at module load; RTPass feeds it to vkCmdPushDataEXT.
|
|
inline static std::uint32_t workaroundTlasPushOffset = 0;
|
|
|
|
static void CheckVkResult(VkResult result);
|
|
static std::uint32_t GetMemoryType(std::uint32_t typeBits, VkMemoryPropertyFlags properties);
|
|
|
|
// ─── Wayland key repeat ────────────────────────────────────────
|
|
// TickKeyRepeats fires onRawKeyDown / onRawKeyHold / onTextInput on
|
|
// the focused window for whichever key is currently repeating.
|
|
// Called once per frame from Window::Render. KeyRepeatState lives
|
|
// at namespace scope so its member initializers don't trip C++'s
|
|
// "complete-type-needed" rule for the inline static below.
|
|
#ifdef CRAFTER_GRAPHICS_WINDOW_WAYLAND
|
|
inline static KeyRepeatState keyRepeat;
|
|
static void TickKeyRepeats();
|
|
#else
|
|
static void TickKeyRepeats() {}
|
|
#endif
|
|
#else // CRAFTER_GRAPHICS_WINDOW_DOM
|
|
// DOM mode: Device collapses to just `Initialize()` (currently a
|
|
// no-op since the JS runtime initializes itself). The function is
|
|
// kept so user code calling `Device::Initialize()` still compiles
|
|
// cross-platform. Browser key repeat is delivered through the
|
|
// KeyboardEvent.repeat flag directly — no manual synthesis.
|
|
static void TickKeyRepeats() {}
|
|
#endif
|
|
};
|
|
} |