webgpu improvements
This commit is contained in:
parent
5a75571ffd
commit
8347467e1e
18 changed files with 1932 additions and 153 deletions
|
|
@ -121,6 +121,37 @@ export namespace Crafter {
|
|||
// customIndex (4) + _pad (12). Defined in the WGSL traversal
|
||||
// library; never directly read by C++.
|
||||
WebGPUBuffer<char, false> buffer;
|
||||
// GPU LBVH support — see additional/dom-webgpu.js's TLAS-build
|
||||
// pipeline.
|
||||
//
|
||||
// entryOrder: per-frame permutation array of u32, indexing into
|
||||
// `buffer` (the TLASEntry[] array). Populated by the radix-sort
|
||||
// pass to spatially-coherent Morton order, then consumed by the
|
||||
// BVH construction + traversal passes. In Stage 1 (this
|
||||
// baseline) it's the identity permutation written by
|
||||
// tlasBuildMain alongside the entries.
|
||||
WebGPUBuffer<char, false> entryOrder;
|
||||
// mortonCodes: per-instance 32-bit Morton codes computed from the
|
||||
// world-AABB centroid, used as the radix-sort key. Written by
|
||||
// tlasBuildMain.
|
||||
WebGPUBuffer<char, false> mortonCodes;
|
||||
// bvhNodes: 2N_PADDED - 1 sweep-tree BVH nodes built per frame
|
||||
// by the LBVH-build compute pass. Each node 32 bytes (aabbMin +
|
||||
// pad, aabbMax + pad). N_PADDED = 65536 (hardcoded in WGSL).
|
||||
// Internal nodes [0, N_PADDED-1); leaves [N_PADDED-1, 2*N_PADDED-1).
|
||||
// Node i's children are 2i+1, 2i+2 (implicit perfect binary
|
||||
// tree). Cap: 65536 instances per scene.
|
||||
WebGPUBuffer<char, false> bvhNodes;
|
||||
// tlasBins: dead, kept allocated as a 64-byte placeholder so the
|
||||
// existing wgpuBuildTLAS C++ signature doesn't need a churn.
|
||||
// The pre-LBVH 64-bin partition was replaced by the full BVH.
|
||||
WebGPUBuffer<char, false> tlasBins;
|
||||
// Sort ping-pong buffers for the radix sort. Each pass reads
|
||||
// from one and writes to the other, swapping role. Layout per
|
||||
// element: 1 u32 packed key = (morton16 << 16) | tlasIndex16.
|
||||
// Sized for N_PADDED.
|
||||
WebGPUBuffer<char, false> sortTempA;
|
||||
WebGPUBuffer<char, false> sortTempB;
|
||||
|
||||
std::uint32_t builtInstanceCount = 0;
|
||||
};
|
||||
|
|
@ -141,6 +172,17 @@ export namespace Crafter {
|
|||
// a fresh build (no refit) — the GPU build pass is cheap at the
|
||||
// ~10–100 instance counts the design targets; LBVH-for-TLAS is a
|
||||
// future optimization for larger scenes.
|
||||
//
|
||||
// BuildTLAS is now split into Upload + Build so a physics
|
||||
// compute pass (e.g. physics-tlas-transform) can run between the
|
||||
// CPU mirror upload and the GPU LBVH build. The compute pass
|
||||
// writes the per-instance transform bytes that BuildTLAS leaves
|
||||
// intact for elements flagged transformOwnedByGpu, and those
|
||||
// writes have to land before the LBVH reads them. The combined
|
||||
// BuildTLAS is kept as a convenience for callers that don't
|
||||
// interleave a compute pass (e.g. the ctor-time first build).
|
||||
static void BuildTLASUpload(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
||||
static void BuildTLASBuild(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
||||
static void BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index);
|
||||
|
||||
static void Add(RenderingElement3D* e);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue