webgpu improvements

2026-05-24 13:32:08 +02:00 · 2026-05-24 13:32:08 +02:00 · 8347467e1e
commit 8347467e1e
parent 5a75571ffd
18 changed files with 1932 additions and 153 deletions
--- a/interfaces/Crafter.Graphics-WebGPU.cppm
+++ b/interfaces/Crafter.Graphics-WebGPU.cppm
@ -35,6 +35,40 @@ namespace Crafter::WebGPU {
    extern "C" std::uint32_t wgpuCreateBuffer(std::int32_t byteSize);
    __attribute__((import_module("env"), import_name("wgpuWriteBuffer")))
    extern "C" void wgpuWriteBuffer(std::uint32_t handle, const void* srcPtr, std::int32_t byteSize);
+    __attribute__((import_module("env"), import_name("wgpuWriteBufferRange")))
+    extern "C" void wgpuWriteBufferRange(std::uint32_t handle,
+                                          std::uint32_t dstByteOffset,
+                                          const void* srcPtr,
+                                          std::int32_t byteSize);
+    // Kick off a GPU→CPU readback for the entire `byteSize`-byte prefix
+    // of the buffer at `handle`. Returns immediately; the actual map
+    // resolves asynchronously. Successive Enqueues without a Poll in
+    // between are no-ops until the previous map resolves.
+    //
+    // `resetBytes` ≥ 0 — if non-zero, the JS bridge encodes a
+    // clearBuffer over the first `resetBytes` bytes of the source
+    // buffer immediately after the copy, in the same command encoder.
+    // Used by Forts3D's GPU event queues to zero the atomic-add count
+    // for the next frame's substeps. The reset is TIED to a successful
+    // enqueue: if the enqueue was skipped (previous map still pending),
+    // the reset is skipped too — so events written by substeps during
+    // the missed-drain window accumulate into the next successful
+    // capture instead of being silently wiped.
+    __attribute__((import_module("env"), import_name("wgpuReadbackEnqueue")))
+    extern "C" void wgpuReadbackEnqueue(std::uint32_t handle,
+                                         std::int32_t byteSize,
+                                         std::int32_t resetBytes);
+    // Poll a previously-enqueued readback. Returns 1 and writes the
+    // bytes into `dstPtr` if the map resolved; returns 0 otherwise.
+    __attribute__((import_module("env"), import_name("wgpuReadbackPoll")))
+    extern "C" std::int32_t wgpuReadbackPoll(std::uint32_t handle, void* dstPtr, std::int32_t byteSize);
+    // Non-consuming readiness probe. Returns 1 if the readback has
+    // resolved and the next Poll would succeed; returns 0 otherwise.
+    // Used to gate multi-buffer drains (header + array) so neither side
+    // gets consumed until both are ready — otherwise the consumed side's
+    // data is lost while the other side waits for its map to resolve.
+    __attribute__((import_module("env"), import_name("wgpuReadbackReady")))
+    extern "C" std::int32_t wgpuReadbackReady(std::uint32_t handle);
    __attribute__((import_module("env"), import_name("wgpuDestroyBuffer")))
    extern "C" void wgpuDestroyBuffer(std::uint32_t handle);

@ -64,15 +98,26 @@ namespace Crafter::WebGPU {
    // Used by Image2DArray<RGBA8> to stack per-material albedos for one
    // multi-material scene.
    __attribute__((import_module("env"), import_name("wgpuCreateImage2DArray")))
-    extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h, std::int32_t layerCount);
+    extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h,
+                                                    std::int32_t layerCount, std::int32_t mipLevels);
+    // Upload a single mip level for one array layer. `level` indexes into
+    // the texture's mip chain (0 = base); `w` / `h` must be the dimensions
+    // at that level. Callers pass each level's pixels separately — mip
+    // generation is host-side.
    __attribute__((import_module("env"), import_name("wgpuWriteImage2DLayer")))
-    extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer,
+    extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer, std::int32_t level,
                                          const void* srcPtr, std::int32_t byteSize,
                                          std::int32_t w, std::int32_t h);

    __attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler")))
    extern "C" std::uint32_t wgpuCreateLinearClampSampler();

+    // Linear-filtered, repeat-addressed sampler with mipmap linear-filter.
+    // The usual choice for tiled material textures (woodBrace, panel, etc.)
+    // which expect UV > 1.0 to wrap.
+    __attribute__((import_module("env"), import_name("wgpuCreateLinearRepeatSampler")))
+    extern "C" std::uint32_t wgpuCreateLinearRepeatSampler();
+
    __attribute__((import_module("env"), import_name("wgpuFrameBegin")))
    extern "C" void wgpuFrameBegin();
    __attribute__((import_module("env"), import_name("wgpuFrameEnd")))
@ -158,12 +203,56 @@ namespace Crafter::WebGPU {
                                   std::int32_t  gx, std::int32_t gy,
                                   const void* handlesPtr, std::int32_t handlesCount);

-    // GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or
-    // GPU-written), produces per-instance world-space AABBs + per-instance
-    // transform matrices in a flat tlasBuf SSBO consumed by traceRay / rayQuery.
+    // GPU TLAS-build dispatch. Two sequential compute passes:
+    //   1. tlasBuildMain — per-instance world AABB + identity permutation
+    //      + naive Morton (overwritten in pass 2). Outputs the flat
+    //      tlasBuf SSBO consumed by traceRay / rayQuery.
+    //   2. lbvhBuildMain — single workgroup of 1024 threads; reduces
+    //      scene AABB, recomputes Morton with proper normalization,
+    //      bitonic-sorts (morton, instance_id), writes the sorted
+    //      permutation into `entryOrderBufHandle`, and refits a
+    //      sweep-tree BVH into `bvhNodesBufHandle` bottom-up.
+    // Pre-LBVH bin-build is gone; `binsBufHandle` is kept in the
+    // signature as a placeholder so the C++ side doesn't churn.
    __attribute__((import_module("env"), import_name("wgpuBuildTLAS")))
    extern "C" void wgpuBuildTLAS(std::uint32_t instanceBufHandle,
                                  std::int32_t  instanceCount,
-                                  std::uint32_t tlasOutBufHandle);
+                                  std::uint32_t tlasOutBufHandle,
+                                  std::uint32_t entryOrderBufHandle,
+                                  std::uint32_t mortonBufHandle,
+                                  std::uint32_t binsBufHandle,
+                                  std::uint32_t bvhNodesBufHandle,
+                                  std::uint32_t sortTempABufHandle,
+                                  std::uint32_t sortTempBBufHandle);
+
+    // ── Standalone compute pipelines ───────────────────────────────────
+    //
+    // Mirror of the native ComputeShader API: load a user-authored
+    // compute WGSL with arbitrary @group bindings, dispatch it at any
+    // point in the frame (inside or outside the UI compute pass —
+    // physics ticks dispatch from update lambdas, which fire outside
+    // the per-frame render encoder).
+    //
+    // WGSL contract:
+    //   @group(0) @binding(0) — uniform PushData (optional; only if
+    //                            pushUniformSize > 0 at load).
+    //   @group(1+) @binding(N) — user bindings declared via
+    //                            UICustomBinding[]. When rayQuery is
+    //                            on, @group(1) is reserved for the RT
+    //                            heap and user bindings start at
+    //                            @group(2).
+    __attribute__((import_module("env"), import_name("wgpuLoadComputePipeline")))
+    extern "C" std::uint32_t wgpuLoadComputePipeline(
+        const void* wgslPtr, std::int32_t wgslLen,
+        std::int32_t pushUniformSize,
+        const void* bindingsPtr, std::int32_t bindingsCount,
+        std::int32_t rayQueryFlag);
+
+    __attribute__((import_module("env"), import_name("wgpuDispatchCompute")))
+    extern "C" void wgpuDispatchCompute(
+        std::uint32_t pipelineHandle,
+        const void* pushPtr, std::int32_t pushBytes,
+        const void* handlesPtr, std::int32_t handlesCount,
+        std::int32_t gx, std::int32_t gy, std::int32_t gz);
 }
 #endif // CRAFTER_GRAPHICS_WINDOW_DOM