webgpu improvements

This commit is contained in:
Jorijn van der Graaf 2026-05-24 13:32:08 +02:00
commit 8347467e1e
18 changed files with 1932 additions and 153 deletions

View file

@ -35,6 +35,40 @@ namespace Crafter::WebGPU {
extern "C" std::uint32_t wgpuCreateBuffer(std::int32_t byteSize);
__attribute__((import_module("env"), import_name("wgpuWriteBuffer")))
extern "C" void wgpuWriteBuffer(std::uint32_t handle, const void* srcPtr, std::int32_t byteSize);
__attribute__((import_module("env"), import_name("wgpuWriteBufferRange")))
extern "C" void wgpuWriteBufferRange(std::uint32_t handle,
std::uint32_t dstByteOffset,
const void* srcPtr,
std::int32_t byteSize);
// Kick off a GPU→CPU readback for the entire `byteSize`-byte prefix
// of the buffer at `handle`. Returns immediately; the actual map
// resolves asynchronously. Successive Enqueues without a Poll in
// between are no-ops until the previous map resolves.
//
// `resetBytes` ≥ 0 — if non-zero, the JS bridge encodes a
// clearBuffer over the first `resetBytes` bytes of the source
// buffer immediately after the copy, in the same command encoder.
// Used by Forts3D's GPU event queues to zero the atomic-add count
// for the next frame's substeps. The reset is TIED to a successful
// enqueue: if the enqueue was skipped (previous map still pending),
// the reset is skipped too — so events written by substeps during
// the missed-drain window accumulate into the next successful
// capture instead of being silently wiped.
__attribute__((import_module("env"), import_name("wgpuReadbackEnqueue")))
extern "C" void wgpuReadbackEnqueue(std::uint32_t handle,
std::int32_t byteSize,
std::int32_t resetBytes);
// Poll a previously-enqueued readback. Returns 1 and writes the
// bytes into `dstPtr` if the map resolved; returns 0 otherwise.
__attribute__((import_module("env"), import_name("wgpuReadbackPoll")))
extern "C" std::int32_t wgpuReadbackPoll(std::uint32_t handle, void* dstPtr, std::int32_t byteSize);
// Non-consuming readiness probe. Returns 1 if the readback has
// resolved and the next Poll would succeed; returns 0 otherwise.
// Used to gate multi-buffer drains (header + array) so neither side
// gets consumed until both are ready — otherwise the consumed side's
// data is lost while the other side waits for its map to resolve.
__attribute__((import_module("env"), import_name("wgpuReadbackReady")))
extern "C" std::int32_t wgpuReadbackReady(std::uint32_t handle);
__attribute__((import_module("env"), import_name("wgpuDestroyBuffer")))
extern "C" void wgpuDestroyBuffer(std::uint32_t handle);
@ -64,15 +98,26 @@ namespace Crafter::WebGPU {
// Used by Image2DArray<RGBA8> to stack per-material albedos for one
// multi-material scene.
__attribute__((import_module("env"), import_name("wgpuCreateImage2DArray")))
extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h, std::int32_t layerCount);
extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h,
std::int32_t layerCount, std::int32_t mipLevels);
// Upload a single mip level for one array layer. `level` indexes into
// the texture's mip chain (0 = base); `w` / `h` must be the dimensions
// at that level. Callers pass each level's pixels separately — mip
// generation is host-side.
__attribute__((import_module("env"), import_name("wgpuWriteImage2DLayer")))
extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer,
extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer, std::int32_t level,
const void* srcPtr, std::int32_t byteSize,
std::int32_t w, std::int32_t h);
__attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler")))
extern "C" std::uint32_t wgpuCreateLinearClampSampler();
// Linear-filtered, repeat-addressed sampler with mipmap linear-filter.
// The usual choice for tiled material textures (woodBrace, panel, etc.)
// which expect UV > 1.0 to wrap.
__attribute__((import_module("env"), import_name("wgpuCreateLinearRepeatSampler")))
extern "C" std::uint32_t wgpuCreateLinearRepeatSampler();
__attribute__((import_module("env"), import_name("wgpuFrameBegin")))
extern "C" void wgpuFrameBegin();
__attribute__((import_module("env"), import_name("wgpuFrameEnd")))
@ -158,12 +203,56 @@ namespace Crafter::WebGPU {
std::int32_t gx, std::int32_t gy,
const void* handlesPtr, std::int32_t handlesCount);
// GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or
// GPU-written), produces per-instance world-space AABBs + per-instance
// transform matrices in a flat tlasBuf SSBO consumed by traceRay / rayQuery.
// GPU TLAS-build dispatch. Two sequential compute passes:
// 1. tlasBuildMain — per-instance world AABB + identity permutation
// + naive Morton (overwritten in pass 2). Outputs the flat
// tlasBuf SSBO consumed by traceRay / rayQuery.
// 2. lbvhBuildMain — single workgroup of 1024 threads; reduces
// scene AABB, recomputes Morton with proper normalization,
// bitonic-sorts (morton, instance_id), writes the sorted
// permutation into `entryOrderBufHandle`, and refits a
// sweep-tree BVH into `bvhNodesBufHandle` bottom-up.
// Pre-LBVH bin-build is gone; `binsBufHandle` is kept in the
// signature as a placeholder so the C++ side doesn't churn.
__attribute__((import_module("env"), import_name("wgpuBuildTLAS")))
extern "C" void wgpuBuildTLAS(std::uint32_t instanceBufHandle,
std::int32_t instanceCount,
std::uint32_t tlasOutBufHandle);
std::uint32_t tlasOutBufHandle,
std::uint32_t entryOrderBufHandle,
std::uint32_t mortonBufHandle,
std::uint32_t binsBufHandle,
std::uint32_t bvhNodesBufHandle,
std::uint32_t sortTempABufHandle,
std::uint32_t sortTempBBufHandle);
// ── Standalone compute pipelines ───────────────────────────────────
//
// Mirror of the native ComputeShader API: load a user-authored
// compute WGSL with arbitrary @group bindings, dispatch it at any
// point in the frame (inside or outside the UI compute pass —
// physics ticks dispatch from update lambdas, which fire outside
// the per-frame render encoder).
//
// WGSL contract:
// @group(0) @binding(0) — uniform PushData (optional; only if
// pushUniformSize > 0 at load).
// @group(1+) @binding(N) — user bindings declared via
// UICustomBinding[]. When rayQuery is
// on, @group(1) is reserved for the RT
// heap and user bindings start at
// @group(2).
__attribute__((import_module("env"), import_name("wgpuLoadComputePipeline")))
extern "C" std::uint32_t wgpuLoadComputePipeline(
const void* wgslPtr, std::int32_t wgslLen,
std::int32_t pushUniformSize,
const void* bindingsPtr, std::int32_t bindingsCount,
std::int32_t rayQueryFlag);
__attribute__((import_module("env"), import_name("wgpuDispatchCompute")))
extern "C" void wgpuDispatchCompute(
std::uint32_t pipelineHandle,
const void* pushPtr, std::int32_t pushBytes,
const void* handlesPtr, std::int32_t handlesCount,
std::int32_t gx, std::int32_t gy, std::int32_t gz);
}
#endif // CRAFTER_GRAPHICS_WINDOW_DOM