Crafter.Graphics/interfaces/Crafter.Graphics-RenderingElement3D.cppm

192 lines
9.3 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
module;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
#include "vulkan/vulkan.h"
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
export module Crafter.Graphics:RenderingElement3D;
import :RT;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import :Mesh;
import :VulkanBuffer;
import Crafter.Math;
import :Window;
export namespace Crafter {
struct TlasWithBuffer {
VkDeviceAddress address = 0;
VulkanBuffer<char, false> buffer;
VkAccelerationStructureKHR accelerationStructure = VK_NULL_HANDLE;
VulkanBuffer<VkAccelerationStructureInstanceKHR, true> instanceBuffer;
VulkanBuffer<char, false> scratchBuffer;
// Parallel to instanceBuffer, indexed by TLAS instance ID. Filled
// from each element's userMetadata during BuildTLAS. Consumers
// (e.g. ray-query collision) bind this in the descriptor heap and
// look up via rayQueryGetIntersectionInstanceIdEXT to recover
// application-side per-instance data without touching the
// Vulkan-mandated instanceCustomIndex (which renderers may already
// use for their own encoding).
VulkanBuffer<std::uint32_t, true> metadataBuffer;
// Last instance count this TLAS was built (not refit) for. When
// elements.size() matches this, BuildTLAS does an in-place refit
// (UPDATE mode) which is dramatically cheaper than a full rebuild
// — refit walks the existing BVH and updates AABBs, while rebuild
// reconstructs the topology from scratch. A change in count forces
// a fresh rebuild because the AS is sized for that primitive count.
std::uint32_t builtInstanceCount = 0;
};
class RenderingElement3D {
public:
RTInstance instance;
// Position in `elements`, maintained by Add/Remove for O(1) swap-and-pop.
// Sentinel value = not currently registered.
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
// Application-defined per-instance tag, copied verbatim into
// tlases[*].metadataBuffer at this element's TLAS instance ID
// every BuildTLAS. Crafter doesn't interpret it.
std::uint32_t userMetadata = 0;
// When true, BuildTLAS skips copying instance.transform into the
// TLAS instance buffer — the application's compute shader writes
// the transform field directly into instanceBuffer at this
// element's TLAS instance ID. Other instance fields (mask,
// customIndex, SBT offset, BLAS reference) are still copied from
// the CPU instance struct.
//
// Used to take per-frame transform updates off the CPU for bodies
// whose transforms derive from GPU-side state (physics nodes that
// already live on the GPU).
bool transformOwnedByGpu = false;
static std::vector<RenderingElement3D*> elements;
inline static TlasWithBuffer tlases[Window::numFrames];
static void BuildTLAS(VkCommandBuffer cmd, std::uint32_t index);
// Register / unregister with `elements`. Use these instead of touching
// the vector directly: linear find+erase is O(n) and pathological at
// the body counts physics targets (millions of braces).
static void Add(RenderingElement3D* e);
static void Remove(RenderingElement3D* e);
};
}
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import :Mesh;
import :WebGPU;
import :WebGPUBuffer;
import :Window;
export namespace Crafter {
// Per-frame TLAS storage. WebGPU has no real swapchain frame count
// (Window::numFrames = 1 on DOM), so this is effectively a singleton —
// the array form is kept for API symmetry with the Vulkan side so user
// code that indexes `tlases[frameIdx]` ports unchanged.
struct TlasWithBuffer {
// Host-visible instance buffer holding RTInstance entries — same
// layout as Vulkan's VkAccelerationStructureInstanceKHR, so user
// code touching .instance.mask / .flags / .transform.matrix is
// identical across backends. Also bound as a storage SSBO so
// application compute shaders (e.g. physics-tlas-transform.comp.wgsl)
// can write the .transform field directly when
// RenderingElement3D::transformOwnedByGpu is set.
WebGPUBuffer<RTInstance, true> instanceBuffer;
// Per-instance application metadata; parallel to instanceBuffer,
// identical semantics to the Vulkan-side counterpart.
WebGPUBuffer<std::uint32_t, true> metadataBuffer;
// GPU-built TLAS data: one TLASEntry per instance, written each
// BuildTLAS by a compute pass on the JS bridge. Read by traceRay /
// rayQuery as `@group(1) @binding(0) tlas: array<TLASEntry>`.
// TLASEntry layout: 96 bytes — aabbMin (12) + maskHGoffset (4) +
// aabbMax (12) + blasHandle (4) + invTransform 3x4 mat (48) +
// customIndex (4) + _pad (12). Defined in the WGSL traversal
// library; never directly read by C++.
WebGPUBuffer<char, false> buffer;
// GPU LBVH support — see additional/dom-webgpu.js's TLAS-build
// pipeline.
//
// entryOrder: per-frame permutation array of u32, indexing into
// `buffer` (the TLASEntry[] array). Populated by the radix-sort
// pass to spatially-coherent Morton order, then consumed by the
// BVH construction + traversal passes. In Stage 1 (this
// baseline) it's the identity permutation written by
// tlasBuildMain alongside the entries.
WebGPUBuffer<char, false> entryOrder;
// mortonCodes: per-instance 32-bit Morton codes computed from the
// world-AABB centroid, used as the radix-sort key. Written by
// tlasBuildMain.
WebGPUBuffer<char, false> mortonCodes;
// bvhNodes: 2N_PADDED - 1 sweep-tree BVH nodes built per frame
// by the LBVH-build compute pass. Each node 32 bytes (aabbMin +
// pad, aabbMax + pad). N_PADDED = 65536 (hardcoded in WGSL).
// Internal nodes [0, N_PADDED-1); leaves [N_PADDED-1, 2*N_PADDED-1).
// Node i's children are 2i+1, 2i+2 (implicit perfect binary
// tree). Cap: 65536 instances per scene.
WebGPUBuffer<char, false> bvhNodes;
// tlasBins: dead, kept allocated as a 64-byte placeholder so the
// existing wgpuBuildTLAS C++ signature doesn't need a churn.
// The pre-LBVH 64-bin partition was replaced by the full BVH.
WebGPUBuffer<char, false> tlasBins;
// Sort ping-pong buffers for the radix sort. Each pass reads
// from one and writes to the other, swapping role. Layout per
// element: 1 u32 packed key = (morton16 << 16) | tlasIndex16.
// Sized for N_PADDED.
WebGPUBuffer<char, false> sortTempA;
WebGPUBuffer<char, false> sortTempB;
std::uint32_t builtInstanceCount = 0;
};
class RenderingElement3D {
public:
RTInstance instance{};
std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
std::uint32_t userMetadata = 0;
// Application compute shader writes the transform field of this
// element's instanceBuffer slot directly — BuildTLAS preserves it.
bool transformOwnedByGpu = false;
static std::vector<RenderingElement3D*> elements;
inline static TlasWithBuffer tlases[Window::numFrames];
// Repopulate the TLAS for frame `index`. WebGPU path always does
// a fresh build (no refit) — the GPU build pass is cheap at the
// ~10100 instance counts the design targets; LBVH-for-TLAS is a
// future optimization for larger scenes.
//
// BuildTLAS is now split into Upload + Build so a physics
// compute pass (e.g. physics-tlas-transform) can run between the
// CPU mirror upload and the GPU LBVH build. The compute pass
// writes the per-instance transform bytes that BuildTLAS leaves
// intact for elements flagged transformOwnedByGpu, and those
// writes have to land before the LBVH reads them. The combined
// BuildTLAS is kept as a convenience for callers that don't
// interleave a compute pass (e.g. the ctor-time first build).
static void BuildTLASUpload(WebGPUCommandEncoderRef cmd, std::uint32_t index);
static void BuildTLASBuild(WebGPUCommandEncoderRef cmd, std::uint32_t index);
static void BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index);
static void Add(RenderingElement3D* e);
static void Remove(RenderingElement3D* e);
};
}
#endif // CRAFTER_GRAPHICS_WINDOW_DOM