192 lines
9.3 KiB
C++
192 lines
9.3 KiB
C++
/*
|
||
Crafter®.Graphics
|
||
Copyright (C) 2026 Catcrafts®
|
||
catcrafts.net
|
||
|
||
This library is free software; you can redistribute it and/or
|
||
modify it under the terms of the GNU Lesser General Public
|
||
License version 3.0 as published by the Free Software Foundation;
|
||
|
||
This library is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
Lesser General Public License for more details.
|
||
|
||
You should have received a copy of the GNU Lesser General Public
|
||
License along with this library; if not, write to the Free Software
|
||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
*/
|
||
|
||
module;
|
||
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
||
#include "vulkan/vulkan.h"
|
||
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
||
export module Crafter.Graphics:RenderingElement3D;
|
||
import :RT;
|
||
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
|
||
import std;
|
||
import :Mesh;
|
||
import :VulkanBuffer;
|
||
import Crafter.Math;
|
||
import :Window;
|
||
|
||
export namespace Crafter {
|
||
struct TlasWithBuffer {
|
||
VkDeviceAddress address = 0;
|
||
VulkanBuffer<char, false> buffer;
|
||
VkAccelerationStructureKHR accelerationStructure = VK_NULL_HANDLE;
|
||
VulkanBuffer<VkAccelerationStructureInstanceKHR, true> instanceBuffer;
|
||
VulkanBuffer<char, false> scratchBuffer;
|
||
// Parallel to instanceBuffer, indexed by TLAS instance ID. Filled
|
||
// from each element's userMetadata during BuildTLAS. Consumers
|
||
// (e.g. ray-query collision) bind this in the descriptor heap and
|
||
// look up via rayQueryGetIntersectionInstanceIdEXT to recover
|
||
// application-side per-instance data without touching the
|
||
// Vulkan-mandated instanceCustomIndex (which renderers may already
|
||
// use for their own encoding).
|
||
VulkanBuffer<std::uint32_t, true> metadataBuffer;
|
||
// Last instance count this TLAS was built (not refit) for. When
|
||
// elements.size() matches this, BuildTLAS does an in-place refit
|
||
// (UPDATE mode) which is dramatically cheaper than a full rebuild
|
||
// — refit walks the existing BVH and updates AABBs, while rebuild
|
||
// reconstructs the topology from scratch. A change in count forces
|
||
// a fresh rebuild because the AS is sized for that primitive count.
|
||
std::uint32_t builtInstanceCount = 0;
|
||
};
|
||
|
||
class RenderingElement3D {
|
||
public:
|
||
RTInstance instance;
|
||
// Position in `elements`, maintained by Add/Remove for O(1) swap-and-pop.
|
||
// Sentinel value = not currently registered.
|
||
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
|
||
// Application-defined per-instance tag, copied verbatim into
|
||
// tlases[*].metadataBuffer at this element's TLAS instance ID
|
||
// every BuildTLAS. Crafter doesn't interpret it.
|
||
std::uint32_t userMetadata = 0;
|
||
// When true, BuildTLAS skips copying instance.transform into the
|
||
// TLAS instance buffer — the application's compute shader writes
|
||
// the transform field directly into instanceBuffer at this
|
||
// element's TLAS instance ID. Other instance fields (mask,
|
||
// customIndex, SBT offset, BLAS reference) are still copied from
|
||
// the CPU instance struct.
|
||
//
|
||
// Used to take per-frame transform updates off the CPU for bodies
|
||
// whose transforms derive from GPU-side state (physics nodes that
|
||
// already live on the GPU).
|
||
bool transformOwnedByGpu = false;
|
||
|
||
static std::vector<RenderingElement3D*> elements;
|
||
inline static TlasWithBuffer tlases[Window::numFrames];
|
||
static void BuildTLAS(VkCommandBuffer cmd, std::uint32_t index);
|
||
|
||
// Register / unregister with `elements`. Use these instead of touching
|
||
// the vector directly: linear find+erase is O(n) and pathological at
|
||
// the body counts physics targets (millions of braces).
|
||
static void Add(RenderingElement3D* e);
|
||
static void Remove(RenderingElement3D* e);
|
||
};
|
||
}
|
||
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
|
||
|
||
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
|
||
import std;
|
||
import :Mesh;
|
||
import :WebGPU;
|
||
import :WebGPUBuffer;
|
||
import :Window;
|
||
|
||
export namespace Crafter {
|
||
// Per-frame TLAS storage. WebGPU has no real swapchain frame count
|
||
// (Window::numFrames = 1 on DOM), so this is effectively a singleton —
|
||
// the array form is kept for API symmetry with the Vulkan side so user
|
||
// code that indexes `tlases[frameIdx]` ports unchanged.
|
||
struct TlasWithBuffer {
|
||
// Host-visible instance buffer holding RTInstance entries — same
|
||
// layout as Vulkan's VkAccelerationStructureInstanceKHR, so user
|
||
// code touching .instance.mask / .flags / .transform.matrix is
|
||
// identical across backends. Also bound as a storage SSBO so
|
||
// application compute shaders (e.g. physics-tlas-transform.comp.wgsl)
|
||
// can write the .transform field directly when
|
||
// RenderingElement3D::transformOwnedByGpu is set.
|
||
WebGPUBuffer<RTInstance, true> instanceBuffer;
|
||
// Per-instance application metadata; parallel to instanceBuffer,
|
||
// identical semantics to the Vulkan-side counterpart.
|
||
WebGPUBuffer<std::uint32_t, true> metadataBuffer;
|
||
// GPU-built TLAS data: one TLASEntry per instance, written each
|
||
// BuildTLAS by a compute pass on the JS bridge. Read by traceRay /
|
||
// rayQuery as `@group(1) @binding(0) tlas: array<TLASEntry>`.
|
||
// TLASEntry layout: 96 bytes — aabbMin (12) + maskHGoffset (4) +
|
||
// aabbMax (12) + blasHandle (4) + invTransform 3x4 mat (48) +
|
||
// customIndex (4) + _pad (12). Defined in the WGSL traversal
|
||
// library; never directly read by C++.
|
||
WebGPUBuffer<char, false> buffer;
|
||
// GPU LBVH support — see additional/dom-webgpu.js's TLAS-build
|
||
// pipeline.
|
||
//
|
||
// entryOrder: per-frame permutation array of u32, indexing into
|
||
// `buffer` (the TLASEntry[] array). Populated by the radix-sort
|
||
// pass to spatially-coherent Morton order, then consumed by the
|
||
// BVH construction + traversal passes. In Stage 1 (this
|
||
// baseline) it's the identity permutation written by
|
||
// tlasBuildMain alongside the entries.
|
||
WebGPUBuffer<char, false> entryOrder;
|
||
// mortonCodes: per-instance 32-bit Morton codes computed from the
|
||
// world-AABB centroid, used as the radix-sort key. Written by
|
||
// tlasBuildMain.
|
||
WebGPUBuffer<char, false> mortonCodes;
|
||
// bvhNodes: 2N_PADDED - 1 sweep-tree BVH nodes built per frame
|
||
// by the LBVH-build compute pass. Each node 32 bytes (aabbMin +
|
||
// pad, aabbMax + pad). N_PADDED = 65536 (hardcoded in WGSL).
|
||
// Internal nodes [0, N_PADDED-1); leaves [N_PADDED-1, 2*N_PADDED-1).
|
||
// Node i's children are 2i+1, 2i+2 (implicit perfect binary
|
||
// tree). Cap: 65536 instances per scene.
|
||
WebGPUBuffer<char, false> bvhNodes;
|
||
// tlasBins: dead, kept allocated as a 64-byte placeholder so the
|
||
// existing wgpuBuildTLAS C++ signature doesn't need a churn.
|
||
// The pre-LBVH 64-bin partition was replaced by the full BVH.
|
||
WebGPUBuffer<char, false> tlasBins;
|
||
// Sort ping-pong buffers for the radix sort. Each pass reads
|
||
// from one and writes to the other, swapping role. Layout per
|
||
// element: 1 u32 packed key = (morton16 << 16) | tlasIndex16.
|
||
// Sized for N_PADDED.
|
||
WebGPUBuffer<char, false> sortTempA;
|
||
WebGPUBuffer<char, false> sortTempB;
|
||
|
||
std::uint32_t builtInstanceCount = 0;
|
||
};
|
||
|
||
class RenderingElement3D {
|
||
public:
|
||
RTInstance instance{};
|
||
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
|
||
std::uint32_t userMetadata = 0;
|
||
// Application compute shader writes the transform field of this
|
||
// element's instanceBuffer slot directly — BuildTLAS preserves it.
|
||
bool transformOwnedByGpu = false;
|
||
|
||
static std::vector<RenderingElement3D*> elements;
|
||
inline static TlasWithBuffer tlases[Window::numFrames];
|
||
|
||
// Repopulate the TLAS for frame `index`. WebGPU path always does
|
||
// a fresh build (no refit) — the GPU build pass is cheap at the
|
||
// ~10–100 instance counts the design targets; LBVH-for-TLAS is a
|
||
// future optimization for larger scenes.
|
||
//
|
||
// BuildTLAS is now split into Upload + Build so a physics
|
||
// compute pass (e.g. physics-tlas-transform) can run between the
|
||
// CPU mirror upload and the GPU LBVH build. The compute pass
|
||
// writes the per-instance transform bytes that BuildTLAS leaves
|
||
// intact for elements flagged transformOwnedByGpu, and those
|
||
// writes have to land before the LBVH reads them. The combined
|
||
// BuildTLAS is kept as a convenience for callers that don't
|
||
// interleave a compute pass (e.g. the ctor-time first build).
|
||
static void BuildTLASUpload(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
||
static void BuildTLASBuild(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
||
static void BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
||
|
||
static void Add(RenderingElement3D* e);
|
||
static void Remove(RenderingElement3D* e);
|
||
};
|
||
}
|
||
#endif // CRAFTER_GRAPHICS_WINDOW_DOM
|