2026-01-28 01:07:41 +01:00
|
|
|
|
/*
|
|
|
|
|
|
Crafter®.Graphics
|
2026-01-28 18:51:11 +01:00
|
|
|
|
Copyright (C) 2026 Catcrafts®
|
2026-01-28 01:07:41 +01:00
|
|
|
|
catcrafts.net
|
|
|
|
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
|
|
License version 3.0 as published by the Free Software Foundation;
|
|
|
|
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
module;
|
2026-05-18 02:07:48 +02:00
|
|
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-03-02 23:53:13 +01:00
|
|
|
|
#include "vulkan/vulkan.h"
|
2026-05-18 02:07:48 +02:00
|
|
|
|
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-03-09 20:10:19 +01:00
|
|
|
|
export module Crafter.Graphics:RenderingElement3D;
|
2026-05-18 18:43:30 +02:00
|
|
|
|
import :RT;
|
2026-05-18 02:07:48 +02:00
|
|
|
|
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-01-28 01:07:41 +01:00
|
|
|
|
import std;
|
2026-01-28 18:51:11 +01:00
|
|
|
|
import :Mesh;
|
|
|
|
|
|
import :VulkanBuffer;
|
2026-01-29 20:35:55 +01:00
|
|
|
|
import Crafter.Math;
|
2026-04-05 22:53:59 +02:00
|
|
|
|
import :Window;
|
2026-01-28 01:07:41 +01:00
|
|
|
|
|
|
|
|
|
|
export namespace Crafter {
|
2026-01-29 19:46:53 +01:00
|
|
|
|
struct TlasWithBuffer {
|
2026-05-05 23:49:29 +02:00
|
|
|
|
VkDeviceAddress address = 0;
|
2026-04-10 22:26:15 +02:00
|
|
|
|
VulkanBuffer<char, false> buffer;
|
2026-05-05 23:49:29 +02:00
|
|
|
|
VkAccelerationStructureKHR accelerationStructure = VK_NULL_HANDLE;
|
2026-04-10 22:26:15 +02:00
|
|
|
|
VulkanBuffer<VkAccelerationStructureInstanceKHR, true> instanceBuffer;
|
2026-04-30 23:15:43 +02:00
|
|
|
|
VulkanBuffer<char, false> scratchBuffer;
|
2026-05-05 23:49:29 +02:00
|
|
|
|
// Parallel to instanceBuffer, indexed by TLAS instance ID. Filled
|
|
|
|
|
|
// from each element's userMetadata during BuildTLAS. Consumers
|
|
|
|
|
|
// (e.g. ray-query collision) bind this in the descriptor heap and
|
|
|
|
|
|
// look up via rayQueryGetIntersectionInstanceIdEXT to recover
|
|
|
|
|
|
// application-side per-instance data without touching the
|
|
|
|
|
|
// Vulkan-mandated instanceCustomIndex (which renderers may already
|
|
|
|
|
|
// use for their own encoding).
|
|
|
|
|
|
VulkanBuffer<std::uint32_t, true> metadataBuffer;
|
|
|
|
|
|
// Last instance count this TLAS was built (not refit) for. When
|
|
|
|
|
|
// elements.size() matches this, BuildTLAS does an in-place refit
|
|
|
|
|
|
// (UPDATE mode) which is dramatically cheaper than a full rebuild
|
|
|
|
|
|
// — refit walks the existing BVH and updates AABBs, while rebuild
|
|
|
|
|
|
// reconstructs the topology from scratch. A change in count forces
|
|
|
|
|
|
// a fresh rebuild because the AS is sized for that primitive count.
|
|
|
|
|
|
std::uint32_t builtInstanceCount = 0;
|
2026-01-29 19:46:53 +01:00
|
|
|
|
};
|
|
|
|
|
|
|
2026-03-09 20:10:19 +01:00
|
|
|
|
class RenderingElement3D {
|
2026-01-28 19:16:28 +01:00
|
|
|
|
public:
|
2026-05-18 18:43:30 +02:00
|
|
|
|
RTInstance instance;
|
2026-05-05 23:49:29 +02:00
|
|
|
|
// Position in `elements`, maintained by Add/Remove for O(1) swap-and-pop.
|
|
|
|
|
|
// Sentinel value = not currently registered.
|
|
|
|
|
|
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
|
|
|
|
|
|
// Application-defined per-instance tag, copied verbatim into
|
|
|
|
|
|
// tlases[*].metadataBuffer at this element's TLAS instance ID
|
|
|
|
|
|
// every BuildTLAS. Crafter doesn't interpret it.
|
|
|
|
|
|
std::uint32_t userMetadata = 0;
|
|
|
|
|
|
// When true, BuildTLAS skips copying instance.transform into the
|
|
|
|
|
|
// TLAS instance buffer — the application's compute shader writes
|
|
|
|
|
|
// the transform field directly into instanceBuffer at this
|
|
|
|
|
|
// element's TLAS instance ID. Other instance fields (mask,
|
|
|
|
|
|
// customIndex, SBT offset, BLAS reference) are still copied from
|
|
|
|
|
|
// the CPU instance struct.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Used to take per-frame transform updates off the CPU for bodies
|
|
|
|
|
|
// whose transforms derive from GPU-side state (physics nodes that
|
|
|
|
|
|
// already live on the GPU).
|
|
|
|
|
|
bool transformOwnedByGpu = false;
|
|
|
|
|
|
|
2026-03-09 20:10:19 +01:00
|
|
|
|
static std::vector<RenderingElement3D*> elements;
|
2026-04-05 22:53:59 +02:00
|
|
|
|
inline static TlasWithBuffer tlases[Window::numFrames];
|
2026-01-29 19:46:53 +01:00
|
|
|
|
static void BuildTLAS(VkCommandBuffer cmd, std::uint32_t index);
|
2026-05-05 23:49:29 +02:00
|
|
|
|
|
|
|
|
|
|
// Register / unregister with `elements`. Use these instead of touching
|
|
|
|
|
|
// the vector directly: linear find+erase is O(n) and pathological at
|
|
|
|
|
|
// the body counts physics targets (millions of braces).
|
|
|
|
|
|
static void Add(RenderingElement3D* e);
|
|
|
|
|
|
static void Remove(RenderingElement3D* e);
|
2026-01-28 18:51:11 +01:00
|
|
|
|
};
|
2026-05-18 02:07:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
2026-05-18 18:43:30 +02:00
|
|
|
|
|
|
|
|
|
|
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
|
|
|
|
|
|
import std;
|
|
|
|
|
|
import :Mesh;
|
|
|
|
|
|
import :WebGPU;
|
|
|
|
|
|
import :WebGPUBuffer;
|
|
|
|
|
|
import :Window;
|
|
|
|
|
|
|
|
|
|
|
|
export namespace Crafter {
|
|
|
|
|
|
// Per-frame TLAS storage. WebGPU has no real swapchain frame count
|
|
|
|
|
|
// (Window::numFrames = 1 on DOM), so this is effectively a singleton —
|
|
|
|
|
|
// the array form is kept for API symmetry with the Vulkan side so user
|
|
|
|
|
|
// code that indexes `tlases[frameIdx]` ports unchanged.
|
|
|
|
|
|
struct TlasWithBuffer {
|
|
|
|
|
|
// Host-visible instance buffer holding RTInstance entries — same
|
|
|
|
|
|
// layout as Vulkan's VkAccelerationStructureInstanceKHR, so user
|
|
|
|
|
|
// code touching .instance.mask / .flags / .transform.matrix is
|
|
|
|
|
|
// identical across backends. Also bound as a storage SSBO so
|
|
|
|
|
|
// application compute shaders (e.g. physics-tlas-transform.comp.wgsl)
|
|
|
|
|
|
// can write the .transform field directly when
|
|
|
|
|
|
// RenderingElement3D::transformOwnedByGpu is set.
|
|
|
|
|
|
WebGPUBuffer<RTInstance, true> instanceBuffer;
|
|
|
|
|
|
// Per-instance application metadata; parallel to instanceBuffer,
|
|
|
|
|
|
// identical semantics to the Vulkan-side counterpart.
|
|
|
|
|
|
WebGPUBuffer<std::uint32_t, true> metadataBuffer;
|
|
|
|
|
|
// GPU-built TLAS data: one TLASEntry per instance, written each
|
|
|
|
|
|
// BuildTLAS by a compute pass on the JS bridge. Read by traceRay /
|
|
|
|
|
|
// rayQuery as `@group(1) @binding(0) tlas: array<TLASEntry>`.
|
|
|
|
|
|
// TLASEntry layout: 96 bytes — aabbMin (12) + maskHGoffset (4) +
|
|
|
|
|
|
// aabbMax (12) + blasHandle (4) + invTransform 3x4 mat (48) +
|
|
|
|
|
|
// customIndex (4) + _pad (12). Defined in the WGSL traversal
|
|
|
|
|
|
// library; never directly read by C++.
|
|
|
|
|
|
WebGPUBuffer<char, false> buffer;
|
2026-05-24 13:32:08 +02:00
|
|
|
|
// GPU LBVH support — see additional/dom-webgpu.js's TLAS-build
|
|
|
|
|
|
// pipeline.
|
|
|
|
|
|
//
|
|
|
|
|
|
// entryOrder: per-frame permutation array of u32, indexing into
|
|
|
|
|
|
// `buffer` (the TLASEntry[] array). Populated by the radix-sort
|
|
|
|
|
|
// pass to spatially-coherent Morton order, then consumed by the
|
|
|
|
|
|
// BVH construction + traversal passes. In Stage 1 (this
|
|
|
|
|
|
// baseline) it's the identity permutation written by
|
|
|
|
|
|
// tlasBuildMain alongside the entries.
|
|
|
|
|
|
WebGPUBuffer<char, false> entryOrder;
|
|
|
|
|
|
// mortonCodes: per-instance 32-bit Morton codes computed from the
|
|
|
|
|
|
// world-AABB centroid, used as the radix-sort key. Written by
|
|
|
|
|
|
// tlasBuildMain.
|
|
|
|
|
|
WebGPUBuffer<char, false> mortonCodes;
|
|
|
|
|
|
// bvhNodes: 2N_PADDED - 1 sweep-tree BVH nodes built per frame
|
|
|
|
|
|
// by the LBVH-build compute pass. Each node 32 bytes (aabbMin +
|
|
|
|
|
|
// pad, aabbMax + pad). N_PADDED = 65536 (hardcoded in WGSL).
|
|
|
|
|
|
// Internal nodes [0, N_PADDED-1); leaves [N_PADDED-1, 2*N_PADDED-1).
|
|
|
|
|
|
// Node i's children are 2i+1, 2i+2 (implicit perfect binary
|
|
|
|
|
|
// tree). Cap: 65536 instances per scene.
|
|
|
|
|
|
WebGPUBuffer<char, false> bvhNodes;
|
|
|
|
|
|
// tlasBins: dead, kept allocated as a 64-byte placeholder so the
|
|
|
|
|
|
// existing wgpuBuildTLAS C++ signature doesn't need a churn.
|
|
|
|
|
|
// The pre-LBVH 64-bin partition was replaced by the full BVH.
|
|
|
|
|
|
WebGPUBuffer<char, false> tlasBins;
|
|
|
|
|
|
// Sort ping-pong buffers for the radix sort. Each pass reads
|
|
|
|
|
|
// from one and writes to the other, swapping role. Layout per
|
|
|
|
|
|
// element: 1 u32 packed key = (morton16 << 16) | tlasIndex16.
|
|
|
|
|
|
// Sized for N_PADDED.
|
|
|
|
|
|
WebGPUBuffer<char, false> sortTempA;
|
|
|
|
|
|
WebGPUBuffer<char, false> sortTempB;
|
2026-05-18 18:43:30 +02:00
|
|
|
|
|
|
|
|
|
|
std::uint32_t builtInstanceCount = 0;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class RenderingElement3D {
|
|
|
|
|
|
public:
|
|
|
|
|
|
RTInstance instance{};
|
|
|
|
|
|
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
|
|
|
|
|
|
std::uint32_t userMetadata = 0;
|
|
|
|
|
|
// Application compute shader writes the transform field of this
|
|
|
|
|
|
// element's instanceBuffer slot directly — BuildTLAS preserves it.
|
|
|
|
|
|
bool transformOwnedByGpu = false;
|
|
|
|
|
|
|
|
|
|
|
|
static std::vector<RenderingElement3D*> elements;
|
|
|
|
|
|
inline static TlasWithBuffer tlases[Window::numFrames];
|
|
|
|
|
|
|
|
|
|
|
|
// Repopulate the TLAS for frame `index`. WebGPU path always does
|
|
|
|
|
|
// a fresh build (no refit) — the GPU build pass is cheap at the
|
|
|
|
|
|
// ~10–100 instance counts the design targets; LBVH-for-TLAS is a
|
|
|
|
|
|
// future optimization for larger scenes.
|
2026-05-24 13:32:08 +02:00
|
|
|
|
//
|
|
|
|
|
|
// BuildTLAS is now split into Upload + Build so a physics
|
|
|
|
|
|
// compute pass (e.g. physics-tlas-transform) can run between the
|
|
|
|
|
|
// CPU mirror upload and the GPU LBVH build. The compute pass
|
|
|
|
|
|
// writes the per-instance transform bytes that BuildTLAS leaves
|
|
|
|
|
|
// intact for elements flagged transformOwnedByGpu, and those
|
|
|
|
|
|
// writes have to land before the LBVH reads them. The combined
|
|
|
|
|
|
// BuildTLAS is kept as a convenience for callers that don't
|
|
|
|
|
|
// interleave a compute pass (e.g. the ctor-time first build).
|
|
|
|
|
|
static void BuildTLASUpload(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
|
|
|
|
|
static void BuildTLASBuild(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
2026-05-18 18:43:30 +02:00
|
|
|
|
static void BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
|
|
|
|
|
|
|
|
|
|
|
static void Add(RenderingElement3D* e);
|
|
|
|
|
|
static void Remove(RenderingElement3D* e);
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif // CRAFTER_GRAPHICS_WINDOW_DOM
|