Crafter.Graphics/interfaces/Crafter.Graphics-WebGPU.cppm
catbot 321fe596a7 feat(webgpu-rt): add intersection stage, procedural hit group, AABB BLAS API
Extends the cross-backend RT type surface for procedural geometry +
any-hit on the WebGPU path:

- RTShaderGroupType::ProceduralHitGroup + RTShaderGroup::intersectionShader
  (mirror VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR).
- WebGPURTStage::Intersection for AABB intersection shaders.
- Mesh::BuildProcedural(span<RTAabb>, opaque) — the WebGPU analog of a
  VK_GEOMETRY_TYPE_AABBS_KHR geometry.
- wgpuRegisterMeshBLAS gains geomType / opaqueFlag / primCount.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-02 22:09:14 +00:00

266 lines
16 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
*/
// JS bridge declarations for the DOM-mode WebGPU backend. Each function
// corresponds to one entry in `additional/dom-webgpu.js`. Handles are
// opaque uint32 cookies into the JS-side handle tables.
export module Crafter.Graphics:WebGPU;
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
export namespace Crafter {
using WebGPUBufferRef = std::uint32_t;
using WebGPUTextureRef = std::uint32_t;
using WebGPUSamplerRef = std::uint32_t;
using WebGPUCommandEncoderRef = std::uint32_t; // unused as a real handle; just a marker type for portability
}
namespace Crafter::WebGPU {
__attribute__((import_module("env"), import_name("wgpuGetCanvasWidth")))
extern "C" std::int32_t wgpuGetCanvasWidth();
__attribute__((import_module("env"), import_name("wgpuGetCanvasHeight")))
extern "C" std::int32_t wgpuGetCanvasHeight();
__attribute__((import_module("env"), import_name("wgpuSurfaceWidth")))
extern "C" std::int32_t wgpuSurfaceWidth();
__attribute__((import_module("env"), import_name("wgpuSurfaceHeight")))
extern "C" std::int32_t wgpuSurfaceHeight();
__attribute__((import_module("env"), import_name("wgpuInit")))
extern "C" void wgpuInit();
__attribute__((import_module("env"), import_name("wgpuCreateBuffer")))
extern "C" std::uint32_t wgpuCreateBuffer(std::int32_t byteSize);
__attribute__((import_module("env"), import_name("wgpuWriteBuffer")))
extern "C" void wgpuWriteBuffer(std::uint32_t handle, const void* srcPtr, std::int32_t byteSize);
__attribute__((import_module("env"), import_name("wgpuWriteBufferRange")))
extern "C" void wgpuWriteBufferRange(std::uint32_t handle,
std::uint32_t dstByteOffset,
const void* srcPtr,
std::int32_t byteSize);
// Kick off a GPU→CPU readback for the entire `byteSize`-byte prefix
// of the buffer at `handle`. Returns immediately; the actual map
// resolves asynchronously. Successive Enqueues without a Poll in
// between are no-ops until the previous map resolves.
//
// `resetBytes` ≥ 0 — if non-zero, the JS bridge encodes a
// clearBuffer over the first `resetBytes` bytes of the source
// buffer immediately after the copy, in the same command encoder.
// Used by Forts3D's GPU event queues to zero the atomic-add count
// for the next frame's substeps. The reset is TIED to a successful
// enqueue: if the enqueue was skipped (previous map still pending),
// the reset is skipped too — so events written by substeps during
// the missed-drain window accumulate into the next successful
// capture instead of being silently wiped.
__attribute__((import_module("env"), import_name("wgpuReadbackEnqueue")))
extern "C" void wgpuReadbackEnqueue(std::uint32_t handle,
std::int32_t byteSize,
std::int32_t resetBytes);
// Poll a previously-enqueued readback. Returns 1 and writes the
// bytes into `dstPtr` if the map resolved; returns 0 otherwise.
__attribute__((import_module("env"), import_name("wgpuReadbackPoll")))
extern "C" std::int32_t wgpuReadbackPoll(std::uint32_t handle, void* dstPtr, std::int32_t byteSize);
// Non-consuming readiness probe. Returns 1 if the readback has
// resolved and the next Poll would succeed; returns 0 otherwise.
// Used to gate multi-buffer drains (header + array) so neither side
// gets consumed until both are ready — otherwise the consumed side's
// data is lost while the other side waits for its map to resolve.
__attribute__((import_module("env"), import_name("wgpuReadbackReady")))
extern "C" std::int32_t wgpuReadbackReady(std::uint32_t handle);
__attribute__((import_module("env"), import_name("wgpuDestroyBuffer")))
extern "C" void wgpuDestroyBuffer(std::uint32_t handle);
__attribute__((import_module("env"), import_name("wgpuCreateAtlasTexture")))
extern "C" std::uint32_t wgpuCreateAtlasTexture(std::int32_t w, std::int32_t h);
__attribute__((import_module("env"), import_name("wgpuWriteAtlasRegion")))
extern "C" void wgpuWriteAtlasRegion(std::uint32_t handle, const void* srcPtr,
std::int32_t srcW, std::int32_t srcH,
std::int32_t srcBytesPerRow,
std::int32_t dstX, std::int32_t dstY,
std::int32_t copyW, std::int32_t copyH);
__attribute__((import_module("env"), import_name("wgpuDestroyTexture")))
extern "C" void wgpuDestroyTexture(std::uint32_t handle);
// General-purpose rgba8unorm 2D texture for material albedo etc.
// Separate from the atlas path because atlas uses r8unorm + sub-region
// writes; this one takes the whole image in one shot.
__attribute__((import_module("env"), import_name("wgpuCreateImage2D")))
extern "C" std::uint32_t wgpuCreateImage2D(std::int32_t w, std::int32_t h);
__attribute__((import_module("env"), import_name("wgpuWriteImage2D")))
extern "C" void wgpuWriteImage2D(std::uint32_t handle, const void* srcPtr,
std::int32_t byteSize,
std::int32_t w, std::int32_t h);
// 2D texture array — `layerCount` rgba8unorm layers of identical (w × h).
// Sampled via `texture_2d_array<f32>` in WGSL (UICustomBindingKind 3).
// Used by Image2DArray<RGBA8> to stack per-material albedos for one
// multi-material scene.
__attribute__((import_module("env"), import_name("wgpuCreateImage2DArray")))
extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h,
std::int32_t layerCount, std::int32_t mipLevels);
// Upload a single mip level for one array layer. `level` indexes into
// the texture's mip chain (0 = base); `w` / `h` must be the dimensions
// at that level. Callers pass each level's pixels separately — mip
// generation is host-side.
__attribute__((import_module("env"), import_name("wgpuWriteImage2DLayer")))
extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer, std::int32_t level,
const void* srcPtr, std::int32_t byteSize,
std::int32_t w, std::int32_t h);
__attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler")))
extern "C" std::uint32_t wgpuCreateLinearClampSampler();
// Linear-filtered, repeat-addressed sampler with mipmap linear-filter.
// The usual choice for tiled material textures (woodBrace, panel, etc.)
// which expect UV > 1.0 to wrap.
__attribute__((import_module("env"), import_name("wgpuCreateLinearRepeatSampler")))
extern "C" std::uint32_t wgpuCreateLinearRepeatSampler();
__attribute__((import_module("env"), import_name("wgpuFrameBegin")))
extern "C" void wgpuFrameBegin();
__attribute__((import_module("env"), import_name("wgpuFrameEnd")))
extern "C" void wgpuFrameEnd();
__attribute__((import_module("env"), import_name("wgpuDispatchQuads")))
extern "C" void wgpuDispatchQuads(std::uint32_t itemsHandle, const void* headerPtr,
std::int32_t gx, std::int32_t gy);
__attribute__((import_module("env"), import_name("wgpuDispatchCircles")))
extern "C" void wgpuDispatchCircles(std::uint32_t itemsHandle, const void* headerPtr,
std::int32_t gx, std::int32_t gy);
__attribute__((import_module("env"), import_name("wgpuDispatchImages")))
extern "C" void wgpuDispatchImages(std::uint32_t itemsHandle, const void* headerPtr,
std::int32_t gx, std::int32_t gy,
std::uint32_t texHandle, std::uint32_t sampHandle);
__attribute__((import_module("env"), import_name("wgpuDispatchText")))
extern "C" void wgpuDispatchText(std::uint32_t itemsHandle, const void* headerPtr,
std::int32_t gx, std::int32_t gy,
std::uint32_t atlasHandle, std::uint32_t sampHandle);
// ─── custom user-authored compute shaders ───────────────────────────
// rayQueryFlag = 1 swaps group(1) from the UI ping-pong pair to the RT
// data heaps (TLAS, BVH, meshRecs, verts, idx, primRemap, outImage) and
// prepends a WGSL prelude exposing the rayQuery* API. Shaders that set
// this MUST NOT declare their own @group(1) bindings.
__attribute__((import_module("env"), import_name("wgpuLoadCustomShader")))
extern "C" std::uint32_t wgpuLoadCustomShader(const void* wgslPtr, std::int32_t wgslLen,
const void* bindingsPtr, std::int32_t bindingsCount,
std::int32_t rayQueryFlag);
__attribute__((import_module("env"), import_name("wgpuDispatchCustom")))
extern "C" void wgpuDispatchCustom(std::uint32_t pipelineHandle,
const void* pushPtr, std::int32_t pushBytes,
const void* handlesPtr, std::int32_t handlesCount,
std::int32_t gx, std::int32_t gy, std::int32_t gz);
// ─── software raytracing ───────────────────────────────────────────
//
// Mesh::Build forwards vertex / index / BVH-node / primRemap arrays
// to the JS bridge, which queue.writeBuffers them into the global
// RT mesh heaps (growing if needed) and records the per-mesh offsets
// under a freshly-allocated u32 handle. The handle is what user code
// stores in RTInstance::accelerationStructureReference; the WebGPU
// TLAS-build compute shader resolves it back to root AABB + heap
// offsets at dispatch time. Returns 0 on failure.
// The optional `attribsPtr` / `attribsByteCount` carry per-vertex
// attribute payload (normals, UVs, etc. — layout is example-defined)
// that gets appended to a global attribs heap and exposed to RT
// closest-hit shaders as `vertexAttribs : array<u32>` at
// @group(1) @binding(7). Pass (nullptr, 0) for positions-only meshes.
// `geomType` selects the primitive kind: 0 = triangles (the
// verticesPtr/indicesPtr streams), 1 = AABBs (VK_GEOMETRY_TYPE_AABBS) —
// then verticesPtr holds 2 vec3 per primitive [min, max], indexCount is
// 0, and an intersection shader supplies the hit. `opaqueFlag` is the
// geometry's opaque bit (0 lets any-hit run). `primCount` is the
// triangle / AABB primitive count.
__attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS")))
extern "C" std::uint32_t wgpuRegisterMeshBLAS(
float minX, float minY, float minZ,
float maxX, float maxY, float maxZ,
const void* verticesPtr, std::int32_t vertexCount,
const void* indicesPtr, std::int32_t indexCount,
const void* bvhNodesPtr, std::int32_t bvhNodeCount,
const void* primRemapPtr, std::int32_t primRemapCount,
const void* attribsPtr, std::int32_t attribsByteCount,
std::int32_t geomType, std::int32_t opaqueFlag, std::int32_t primCount);
// RT pipeline build. The library composes WGSL by concatenating the
// traversal library, generated hit-group switches, and the user-
// supplied raygen / miss / closesthit / anyhit bodies. `bindings` is
// UICustomBinding-shaped (8 bytes each) declaring extra @group(2)+
// resources the user's closest-hit / miss / raygen WGSL references.
// Pass (nullptr, 0) for a pipeline with no user-declared bindings.
// Returns an opaque pipeline handle.
__attribute__((import_module("env"), import_name("wgpuLoadRTPipeline")))
extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen,
const void* bindingsPtr, std::int32_t bindingsCount);
// Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched
// over a (gx, gy) tile grid; the library writes the push data (camera,
// payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS
// + global mesh heap, and runs one workgroup per 8x8 screen tile.
// `handles[]` carries resolved WebGPU resource handles for every user
// binding declared at pipeline-load time, in the same order. Pass
// (nullptr, 0) for a pipeline with no user bindings.
__attribute__((import_module("env"), import_name("wgpuDispatchRT")))
extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle,
const void* pushPtr, std::int32_t pushBytes,
std::uint32_t tlasBufHandle,
std::int32_t instanceCount,
std::int32_t gx, std::int32_t gy,
const void* handlesPtr, std::int32_t handlesCount,
std::int32_t maxDepth);
// GPU TLAS-build dispatch. Two sequential compute passes:
// 1. tlasBuildMain — per-instance world AABB + identity permutation
// + naive Morton (overwritten in pass 2). Outputs the flat
// tlasBuf SSBO consumed by traceRay / rayQuery.
// 2. lbvhBuildMain — single workgroup of 1024 threads; reduces
// scene AABB, recomputes Morton with proper normalization,
// bitonic-sorts (morton, instance_id), writes the sorted
// permutation into `entryOrderBufHandle`, and refits a
// sweep-tree BVH into `bvhNodesBufHandle` bottom-up.
// Pre-LBVH bin-build is gone; `binsBufHandle` is kept in the
// signature as a placeholder so the C++ side doesn't churn.
__attribute__((import_module("env"), import_name("wgpuBuildTLAS")))
extern "C" void wgpuBuildTLAS(std::uint32_t instanceBufHandle,
std::int32_t instanceCount,
std::uint32_t tlasOutBufHandle,
std::uint32_t entryOrderBufHandle,
std::uint32_t mortonBufHandle,
std::uint32_t binsBufHandle,
std::uint32_t bvhNodesBufHandle,
std::uint32_t sortTempABufHandle,
std::uint32_t sortTempBBufHandle);
// ── Standalone compute pipelines ───────────────────────────────────
//
// Mirror of the native ComputeShader API: load a user-authored
// compute WGSL with arbitrary @group bindings, dispatch it at any
// point in the frame (inside or outside the UI compute pass —
// physics ticks dispatch from update lambdas, which fire outside
// the per-frame render encoder).
//
// WGSL contract:
// @group(0) @binding(0) — uniform PushData (optional; only if
// pushUniformSize > 0 at load).
// @group(1+) @binding(N) — user bindings declared via
// UICustomBinding[]. When rayQuery is
// on, @group(1) is reserved for the RT
// heap and user bindings start at
// @group(2).
__attribute__((import_module("env"), import_name("wgpuLoadComputePipeline")))
extern "C" std::uint32_t wgpuLoadComputePipeline(
const void* wgslPtr, std::int32_t wgslLen,
std::int32_t pushUniformSize,
const void* bindingsPtr, std::int32_t bindingsCount,
std::int32_t rayQueryFlag);
__attribute__((import_module("env"), import_name("wgpuDispatchCompute")))
extern "C" void wgpuDispatchCompute(
std::uint32_t pipelineHandle,
const void* pushPtr, std::int32_t pushBytes,
const void* handlesPtr, std::int32_t handlesCount,
std::int32_t gx, std::int32_t gy, std::int32_t gz);
}
#endif // CRAFTER_GRAPHICS_WINDOW_DOM