194 lines
8.8 KiB
C++
194 lines
8.8 KiB
C++
/*
|
|
Crafter®.Graphics
|
|
Copyright (C) 2026 Catcrafts®
|
|
catcrafts.net
|
|
*/
|
|
|
|
// DOM-mode TLAS upkeep. BuildTLAS is split in two phases so a physics
|
|
// compute pass can run between them:
|
|
// - BuildTLASUpload mirrors the CPU-side RTInstance array into the
|
|
// host-visible instance buffer (with partial-write semantics that
|
|
// preserve the transform bytes for elements flagged
|
|
// transformOwnedByGpu, see notes in the body) and uploads the
|
|
// metadata buffer.
|
|
// - BuildTLASBuild dispatches the JS-side TLAS-build compute pass —
|
|
// which consults the per-BLAS records published at Mesh::Build()
|
|
// time to produce world-space AABBs and inverse transforms in the
|
|
// format `traceRay` / `rayQuery` consume.
|
|
// The combined BuildTLAS calls both back-to-back; callers that want to
|
|
// interleave a physics tlas-transform compute pass (which writes the
|
|
// transform bytes BuildTLASUpload leaves intact) call Upload + their
|
|
// compute pass + Build manually.
|
|
|
|
module;
|
|
module Crafter.Graphics:RenderingElement3D_implWebGPU;
|
|
|
|
import :RenderingElement3D;
|
|
import :Mesh;
|
|
import :WebGPU;
|
|
import :WebGPUBuffer;
|
|
import std;
|
|
|
|
using namespace Crafter;
|
|
|
|
std::vector<RenderingElement3D*> RenderingElement3D::elements;
|
|
|
|
void RenderingElement3D::Add(RenderingElement3D* e) {
|
|
e->indexInElements = static_cast<std::uint32_t>(elements.size());
|
|
elements.push_back(e);
|
|
}
|
|
|
|
void RenderingElement3D::Remove(RenderingElement3D* e) {
|
|
std::uint32_t idx = e->indexInElements;
|
|
if (idx == std::numeric_limits<std::uint32_t>::max()) return;
|
|
std::uint32_t last = static_cast<std::uint32_t>(elements.size() - 1);
|
|
if (idx != last) {
|
|
elements[idx] = elements[last];
|
|
elements[idx]->indexInElements = idx;
|
|
}
|
|
elements.pop_back();
|
|
e->indexInElements = std::numeric_limits<std::uint32_t>::max();
|
|
}
|
|
|
|
void RenderingElement3D::BuildTLASUpload(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t index) {
|
|
auto& tlas = tlases[index];
|
|
const std::uint32_t primitiveCount = static_cast<std::uint32_t>(elements.size());
|
|
if (primitiveCount == 0) {
|
|
tlas.builtInstanceCount = 0;
|
|
return;
|
|
}
|
|
|
|
constexpr std::uint32_t kNPadded = 65536u; // size for instance / metadata mirrors
|
|
constexpr std::uint32_t kLbvhMax = 16384u; // matches N_PADDED in lbvhBuildWgsl
|
|
constexpr std::uint32_t kNodeCount = 2u * kNPadded - 1u;
|
|
|
|
// ALL TLAS-side GPU buffers get allocated ONCE and never resized.
|
|
// The LBVH-build shader takes the real instance count via a uniform
|
|
// (lbvhPc.nReal) instead of arrayLength(&entries), so the
|
|
// tlas.buffer / entryOrder / mortonCodes don't need to grow when
|
|
// the application's element count changes.
|
|
//
|
|
// Why this matters: an earlier version resized these per-frame on
|
|
// primitiveCount change. The destroy+recreate cycle on the GPU
|
|
// buffer caused subtle mid-game flicker as soon as any element was
|
|
// added (e.g. firing a projectile) — fort braces would appear to
|
|
// briefly vanish in patterns deterministic on the projectile's
|
|
// angle. Suspected driver-level memory recycling without proper
|
|
// zero-init; the fixed-size allocation sidesteps it entirely.
|
|
if (tlas.instanceBuffer.handle == 0) {
|
|
tlas.instanceBuffer.Resize(kNPadded);
|
|
tlas.metadataBuffer.Resize(kNPadded);
|
|
tlas.bvhNodes.Resize(kNodeCount * 32u);
|
|
tlas.sortTempA.Resize(kNPadded * 4u);
|
|
tlas.sortTempB.Resize(kNPadded * 4u);
|
|
tlas.tlasBins.Resize(64 * 32);
|
|
// TLAS-entry / order / morton-code buffers: sized for the LBVH
|
|
// cap (16384). lbvhBuildMain iterates `lbvhPc.nReal` real
|
|
// entries; the remainder stays zero / sentinel. Keep these
|
|
// stable across element-count changes so the renderer's bind
|
|
// group references the same buffer handle every frame.
|
|
tlas.buffer.Resize(kLbvhMax * 144u);
|
|
tlas.entryOrder.Resize(kLbvhMax * 4u);
|
|
tlas.mortonCodes.Resize(kLbvhMax * 4u);
|
|
}
|
|
|
|
// NB: tlas.buffer / entryOrder / mortonCodes get resized in
|
|
// BuildTLASBuild, NOT here. Resize destroys + recreates the GPU
|
|
// resource (and the JS-side handle); the rayQuery dispatches that
|
|
// run between BuildTLASUpload and BuildTLASBuild (projectile-collide,
|
|
// splash, builder-pick) still hold the previous frame's TLAS in
|
|
// rtState.current{Tlas,EntryOrder,Bvh}. If we resized here, those
|
|
// handles would point at destroyed buffers and the dispatches would
|
|
// log "no TLAS built yet" every frame the element count changed
|
|
// (e.g. every projectile fire). Resizing inside BuildTLASBuild,
|
|
// immediately before wgpuBuildTLAS publishes the new handles, keeps
|
|
// the JS-side current* refs in sync with the GPU resources.
|
|
|
|
for (std::uint32_t i = 0; i < primitiveCount; ++i) {
|
|
auto& dst = tlas.instanceBuffer.value[i];
|
|
const auto& src = elements[i]->instance;
|
|
if (elements[i]->transformOwnedByGpu) {
|
|
// Preserve whatever the GPU compute shader most recently
|
|
// wrote into dst.transform. Update only the non-transform
|
|
// fields.
|
|
dst.instanceCustomIndex = src.instanceCustomIndex;
|
|
dst.mask = src.mask;
|
|
dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset;
|
|
dst.flags = src.flags;
|
|
dst.accelerationStructureReference = src.accelerationStructureReference;
|
|
} else {
|
|
dst = src;
|
|
}
|
|
tlas.metadataBuffer.value[i] = elements[i]->userMetadata;
|
|
}
|
|
|
|
// Upload the instance buffer with partial-write semantics: for runs
|
|
// of CPU-driven elements (transformOwnedByGpu=false) we push the
|
|
// whole 64-byte struct in one writeBuffer call; for GPU-driven runs
|
|
// we push only the trailing 16 metadata bytes per element, leaving
|
|
// the transform field intact for the physics-tlas-transform compute
|
|
// shader to update. The two arms below produce identical GPU state
|
|
// when every element is CPU-driven — this is a no-op refactor until
|
|
// 3DForts flips its physics elements to transformOwnedByGpu=true.
|
|
constexpr std::uint32_t kInstSize = sizeof(RTInstance); // 64
|
|
constexpr std::uint32_t kTransformSize = sizeof(RTTransformMatrix); // 48
|
|
constexpr std::uint32_t kMetaSize = kInstSize - kTransformSize; // 16
|
|
|
|
std::uint32_t runStart = 0;
|
|
bool runOwned = elements[0]->transformOwnedByGpu;
|
|
for (std::uint32_t i = 1; i <= primitiveCount; ++i) {
|
|
const bool atEnd = (i == primitiveCount);
|
|
const bool currOwned = atEnd ? !runOwned : elements[i]->transformOwnedByGpu;
|
|
if (currOwned == runOwned && !atEnd) continue;
|
|
|
|
if (runOwned) {
|
|
// GPU-driven run — metadata only, per element. Cannot batch
|
|
// because the metadata bytes are non-contiguous in the
|
|
// instance buffer (one 16-byte chunk per 64-byte slot).
|
|
for (std::uint32_t j = runStart; j < i; ++j) {
|
|
const std::uint32_t off = j * kInstSize + kTransformSize;
|
|
tlas.instanceBuffer.FlushDeviceRange(off, off, kMetaSize);
|
|
}
|
|
} else {
|
|
// CPU-driven run — one contiguous writeBuffer.
|
|
const std::uint32_t startOff = runStart * kInstSize;
|
|
const std::uint32_t bytes = (i - runStart) * kInstSize;
|
|
tlas.instanceBuffer.FlushDeviceRange(startOff, startOff, bytes);
|
|
}
|
|
runStart = i;
|
|
runOwned = currOwned;
|
|
}
|
|
|
|
tlas.metadataBuffer.FlushDevice();
|
|
}
|
|
|
|
void RenderingElement3D::BuildTLASBuild(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t index) {
|
|
auto& tlas = tlases[index];
|
|
const std::uint32_t primitiveCount = static_cast<std::uint32_t>(elements.size());
|
|
if (primitiveCount == 0) {
|
|
// Upload already cleared builtInstanceCount; nothing to dispatch.
|
|
return;
|
|
}
|
|
|
|
// No per-count Resize. tlas.buffer / entryOrder / mortonCodes were
|
|
// allocated at kLbvhMax in BuildTLASUpload's first call and stay
|
|
// that size. The LBVH shader reads the real count from a uniform
|
|
// (lbvhPc.nReal) wgpuBuildTLAS writes each call.
|
|
|
|
WebGPU::wgpuBuildTLAS(tlas.instanceBuffer.handle,
|
|
static_cast<std::int32_t>(primitiveCount),
|
|
tlas.buffer.handle,
|
|
tlas.entryOrder.handle,
|
|
tlas.mortonCodes.handle,
|
|
tlas.tlasBins.handle,
|
|
tlas.bvhNodes.handle,
|
|
tlas.sortTempA.handle,
|
|
tlas.sortTempB.handle);
|
|
|
|
tlas.builtInstanceCount = primitiveCount;
|
|
}
|
|
|
|
void RenderingElement3D::BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index) {
|
|
BuildTLASUpload(cmd, index);
|
|
BuildTLASBuild(cmd, index);
|
|
}
|