Crafter.Graphics/additional/dom-webgpu.js

1837 lines
76 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Crafter.Graphics WebGPU bridge — DOM mode UI compute pipeline.
Surface model (high-level, deliberately not 1:1 with WebGPU):
- JS owns the GPUDevice/queue/compute pipelines/bind-group cache.
- C++ owns slot allocation and per-frame logic; it calls into ~15 imports.
- Standard UI shaders are embedded as WGSL strings at the bottom of this
file and compiled once at startup.
Ping-pong output strategy (per Decision 2 in plan):
- Two rgba8unorm storage textures sized to the canvas.
- Each Dispatch reads `prev` (sampled), writes `out` (storage, write-only).
- JS swaps the two between dispatches, so item-order overdraw works.
- At frame end, the current `out` is blitted to the canvas via
copyTextureToTexture (canvas configured as rgba8unorm to match).
This file is loaded as <script type="module">. Top-level await blocks
runtime.js's _start() until adapter + device are resolved, so by the time
main() runs every import here is fully wired.
*/
// ─── env stubs (assigned synchronously, BEFORE any async work) ────────────
// The wasm module's import-link step needs every declared wgpu* import to
// resolve to a Function. If init below throws, the stubs stay in place so
// the wasm still links — and the call site gets a clear error at runtime
// instead of "import object field X is not a Function" at link time.
window.crafter_webbuild_env = window.crafter_webbuild_env || {};
window.crafter_webbuild_env.table = window.crafter_webbuild_env.table
|| new WebAssembly.Table({ initial: 4, element: "anyfunc" });
let initError = null;
function stub(name) {
return (...args) => {
const msg = `[crafter-wgpu] ${name}() called but WebGPU init failed: ${initError?.message ?? "(no error captured)"}`;
console.error(msg);
throw new Error(msg);
};
}
{
const e = window.crafter_webbuild_env;
for (const n of [
"wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
"wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
"wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
"wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
"wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
"wgpuLoadCustomShader", "wgpuDispatchCustom",
"wgpuRegisterMeshBLAS", "wgpuLoadRTPipeline", "wgpuDispatchRT", "wgpuBuildTLAS",
]) {
// Read-write ints don't need a stub-throw; return 0 for the size queries.
e[n] = n.endsWith("Width") || n.endsWith("Height")
? () => 0
: (n === "wgpuRegisterMeshBLAS" ? () => 0 : stub(n));
}
}
// ─── canvas + device init (runs before _start) ───────────────────────────
// Wrapped in an async IIFE assigned to window.crafter_webbuild_env_ready so
// the runtime.js shim can `await` it explicitly before calling _start().
// Sibling <script type="module"> top-level awaits are NOT reliably
// serialized in Firefox (verified 2026-05), so we can't depend on this
// file's TLA to block runtime.js by itself.
window.crafter_webbuild_env_ready = (async () => {
try {
if (!navigator.gpu) {
document.body.innerHTML = "<p style=\"font-family:sans-serif;padding:24px\">"
+ "WebGPU not available in this browser. Try Chrome 121+ / Firefox 141+ / Safari 26+.</p>";
initError = new Error("WebGPU unavailable");
throw initError;
}
const canvas = document.createElement("canvas");
canvas.id = "crafter-canvas";
canvas.style.cssText = "position:fixed;inset:0;width:100vw;height:100vh;display:block;";
document.body.style.margin = "0";
document.body.appendChild(canvas);
function syncCanvasSize() {
// Match canvas pixel size to its CSS pixel size 1:1 so MouseEvent
// clientX/clientY (CSS pixels) and the wasm-side window.width/.height
// share the same coordinate space. (HiDPI sharpness is a v2 concern
// — would need DPR on the GPU side AND a scaling step in the C++
// Window/Event glue so layout/hit-testing/dispatch counts stay
// consistent.)
const w = window.innerWidth;
const h = window.innerHeight;
if (canvas.width !== w) canvas.width = w;
if (canvas.height !== h) canvas.height = h;
return { w, h };
}
syncCanvasSize();
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
initError = new Error("navigator.gpu.requestAdapter() returned null (no compatible adapter)");
console.error("[crafter-wgpu]", initError.message);
throw initError;
}
const device = await adapter.requestDevice();
const queue = device.queue;
const ctx = canvas.getContext("webgpu");
const canvasFormat = "rgba8unorm"; // match storage textures, skip swizzle blit
ctx.configure({ device, format: canvasFormat, alphaMode: "opaque",
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_DST });
device.lost.then((info) => {
console.error("[crafter-wgpu] device lost:", info.message);
state.gpuLost = true;
});
// ─── handle tables ─────────────────────────────────────────────────────
const buffers = new Map(); // handle → GPUBuffer
const textures = new Map(); // handle → GPUTexture
const textureViews = new Map(); // handle → GPUTextureView (mirrors textures key for the view)
const samplers = new Map(); // handle → GPUSampler
let nextHandle = 1;
function newHandle() { return nextHandle++; }
// ─── ping-pong storage textures ────────────────────────────────────────
const state = {
pingTex: null, pingView: null,
pongTex: null, pongView: null,
outIsPing: true, // current "out" target
width: 0, height: 0,
encoder: null,
pass: null,
headerRing: null, // GPUBuffer; uniform header writes ring through this
headerRingSize: 0,
headerRingOffset: 0,
bindGroupCache: new Map(), // key → GPUBindGroup
gpuLost: false,
};
function recreatePingPong(w, h) {
const usage = GPUTextureUsage.STORAGE_BINDING
| GPUTextureUsage.TEXTURE_BINDING
| GPUTextureUsage.COPY_SRC
| GPUTextureUsage.COPY_DST; // COPY_DST so we can clear it
if (state.pingTex) state.pingTex.destroy();
if (state.pongTex) state.pongTex.destroy();
state.pingTex = device.createTexture({ size: [w, h], format: "rgba8unorm", usage });
state.pongTex = device.createTexture({ size: [w, h], format: "rgba8unorm", usage });
state.pingView = state.pingTex.createView();
state.pongView = state.pongTex.createView();
state.width = w; state.height = h;
state.outIsPing = true;
state.bindGroupCache.clear();
}
function ensureSized() {
const { w, h } = syncCanvasSize();
if (w !== state.width || h !== state.height) {
recreatePingPong(w, h);
// Notify the wasm side that the surface size changed so it can
// fire onResize through Window. The wasm export is added by
// Crafter.Graphics-Window.cpp.
const onResize = wasmExports && wasmExports.__crafterDom_resize;
if (onResize) onResize(1, w, h);
}
}
// Header ring buffer: 256-byte-aligned slots holding UIDispatchHeader (48
// bytes of meaningful data, padded to 256). Wraps at frame boundary.
const HEADER_ALIGN = 256;
const HEADER_RING_SLOTS = 64;
state.headerRingSize = HEADER_ALIGN * HEADER_RING_SLOTS;
state.headerRing = device.createBuffer({
size: state.headerRingSize,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});
// ─── pipelines ─────────────────────────────────────────────────────────
const wgslShared = String.raw`
struct UIDispatchHeader {
outImage: u32,
itemBuffer: u32,
surfaceW: u32,
surfaceH: u32,
clipX: f32,
clipY: f32,
clipW: f32,
clipH: f32,
itemCount: u32,
frameIdx: u32,
flags: u32,
_pad: u32,
};
@group(0) @binding(0) var<uniform> hdr : UIDispatchHeader;
@group(1) @binding(0) var outTex : texture_storage_2d<rgba8unorm, write>;
@group(1) @binding(1) var prevTex : texture_2d<f32>;
fn uiResolvePixel(coord: vec2<u32>) -> bool {
if (coord.x >= hdr.surfaceW || coord.y >= hdr.surfaceH) { return false; }
let fx = f32(coord.x); let fy = f32(coord.y);
if (fx < hdr.clipX || fy < hdr.clipY) { return false; }
if (fx >= hdr.clipX + hdr.clipW) { return false; }
if (fy >= hdr.clipY + hdr.clipH) { return false; }
return true;
}
fn uiBlendOver(dst: vec4<f32>, src: vec4<f32>) -> vec4<f32> {
let a = clamp(src.a, 0.0, 1.0);
let rgb = mix(dst.rgb, src.rgb, vec3<f32>(a));
let outA = a + dst.a * (1.0 - a);
return vec4<f32>(rgb, outA);
}
fn uiSdRoundRect(p: vec2<f32>, halfSize: vec2<f32>, r4: vec4<f32>) -> f32 {
var r: vec4<f32> = r4;
// Pick radius for the quadrant p is in. r order: (TL, TR, BR, BL).
let rx = select(r.x, r.z, p.x > 0.0);
let ry = select(r.w, r.y, p.x > 0.0);
let radius = select(ry, rx, p.y > 0.0);
let q = abs(p) - halfSize + vec2<f32>(radius);
return min(max(q.x, q.y), 0.0) + length(max(q, vec2<f32>(0.0))) - radius;
}
`;
const wgslQuads = wgslShared + String.raw`
struct QuadItem {
rect: vec4<f32>,
color: vec4<f32>,
corners: vec4<f32>,
outline: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<QuadItem>;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let lo = it.rect.xy;
let hi = it.rect.xy + it.rect.zw;
if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
let halfSize = it.rect.zw * 0.5;
let p = sp - (it.rect.xy + halfSize);
let d = uiSdRoundRect(p, halfSize, it.corners);
let bodyA = clamp(0.5 - d, 0.0, 1.0);
if (bodyA <= 0.0 && it.outline.x <= 0.0) { continue; }
var src = vec4<f32>(it.color.rgb, it.color.a * bodyA);
if (it.outline.x > 0.0) {
let t = abs(d + it.outline.x * 0.5) - it.outline.x * 0.5;
let outlineA = clamp(0.5 - t, 0.0, 1.0);
src = vec4<f32>(mix(src.rgb, it.outline.yzw, vec3<f32>(outlineA)),
max(src.a, outlineA));
}
if (src.a <= 0.0) { continue; }
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
const wgslCircles = wgslShared + String.raw`
struct CircleItem {
centerRadius: vec4<f32>,
color: vec4<f32>,
outline: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<CircleItem>;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let center = it.centerRadius.xy;
let radius = it.centerRadius.z;
let d = length(sp - center) - radius;
let bodyA = clamp(0.5 - d, 0.0, 1.0);
if (bodyA <= 0.0 && it.outline.x <= 0.0) { continue; }
var src = vec4<f32>(it.color.rgb, it.color.a * bodyA);
if (it.outline.x > 0.0) {
let t = abs(d + it.outline.x * 0.5) - it.outline.x * 0.5;
let outlineA = clamp(0.5 - t, 0.0, 1.0);
src = vec4<f32>(mix(src.rgb, it.outline.yzw, vec3<f32>(outlineA)),
max(src.a, outlineA));
}
if (src.a <= 0.0) { continue; }
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
const wgslImages = wgslShared + String.raw`
struct ImageItem {
rect: vec4<f32>,
uv: vec4<f32>,
tint: vec4<f32>,
slots: vec4<u32>,
};
@group(2) @binding(0) var<storage, read> items : array<ImageItem>;
@group(3) @binding(0) var imgTex : texture_2d<f32>;
@group(3) @binding(1) var imgSampler : sampler;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let lo = it.rect.xy;
let hi = it.rect.xy + it.rect.zw;
if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
let t = (sp - lo) / it.rect.zw;
let uv = vec2<f32>(mix(it.uv.x, it.uv.z, t.x), mix(it.uv.y, it.uv.w, t.y));
let sample = textureSampleLevel(imgTex, imgSampler, uv, 0.0);
let src = sample * it.tint;
if (src.a <= 0.0) { continue; }
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
const wgslText = wgslShared + String.raw`
struct GlyphItem {
rect: vec4<f32>,
uv: vec4<f32>,
color: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<GlyphItem>;
@group(3) @binding(0) var atlasTex : texture_2d<f32>;
@group(3) @binding(1) var atlasSampler : sampler;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let lo = it.rect.xy;
let hi = it.rect.xy + it.rect.zw;
if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
let t = (sp - lo) / it.rect.zw;
let uv = vec2<f32>(mix(it.uv.x, it.uv.z, t.x), mix(it.uv.y, it.uv.w, t.y));
// stb_truetype SDF: pixel value ~128 is the edge. Treat alpha as
// the smoothed step around that midpoint.
let sdf = textureSampleLevel(atlasTex, atlasSampler, uv, 0.0).r;
let alpha = clamp((sdf - 0.5) * 8.0 + 0.5, 0.0, 1.0);
if (alpha <= 0.0) { continue; }
let src = vec4<f32>(it.color.rgb, it.color.a * alpha);
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
function makePipeline(label, wgsl, hasGroup3) {
const mod = device.createShaderModule({ label, code: wgsl });
// Layout: group 0 uniform header, group 1 (out storage + prev sampled),
// group 2 storage items SSBO, optional group 3 (texture + sampler).
const bgl0 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
]});
const bgl1 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, texture: { sampleType: "float", viewDimension: "2d" } },
]});
const bgl2 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
]});
const layouts = [bgl0, bgl1, bgl2];
let bgl3 = null;
if (hasGroup3) {
bgl3 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, texture: { sampleType: "float", viewDimension: "2d" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, sampler: { type: "filtering" } },
]});
layouts.push(bgl3);
}
const pl = device.createPipelineLayout({ bindGroupLayouts: layouts });
const pipeline = device.createComputePipeline({
layout: pl,
compute: { module: mod, entryPoint: "main" },
});
return { pipeline, bgl0, bgl1, bgl2, bgl3 };
}
const pipeQuads = makePipeline("ui-quads", wgslQuads, false);
const pipeCircles = makePipeline("ui-circles", wgslCircles, false);
const pipeImages = makePipeline("ui-images", wgslImages, true);
const pipeText = makePipeline("ui-text", wgslText, true);
// Bind groups for group 0 (header uniform with dynamic offset) — one per
// pipeline, references the same ring buffer.
function makeHdrBG(pipe) {
return device.createBindGroup({
layout: pipe.bgl0,
entries: [{ binding: 0, resource: { buffer: state.headerRing, offset: 0, size: 48 } }],
});
}
const hdrBG = {
quads: makeHdrBG(pipeQuads),
circles: makeHdrBG(pipeCircles),
images: makeHdrBG(pipeImages),
text: makeHdrBG(pipeText),
};
// Group 1 changes between dispatches because `out` and `prev` swap on the
// ping-pong. Cached by current ping-pong direction and texture size; the
// stored bind group is reusable across all pipelines that share a
// layout-compatible bgl1 (all standard pipelines and custom shaders do,
// since they declare identical group-1 entries per the contract).
function getGroup1BG(bgl1) {
const key = `g1/${state.outIsPing ? 1 : 0}/${state.width}x${state.height}`;
let bg = state.bindGroupCache.get(key);
if (bg) return bg;
const outView = state.outIsPing ? state.pingView : state.pongView;
const prevView = state.outIsPing ? state.pongView : state.pingView;
bg = device.createBindGroup({
layout: bgl1,
entries: [
{ binding: 0, resource: outView },
{ binding: 1, resource: prevView },
],
});
state.bindGroupCache.set(key, bg);
return bg;
}
function getGroup2BG(pipe, itemsHandle) {
const key = `items/${pipe === pipeQuads ? "q" : pipe === pipeCircles ? "c" : pipe === pipeImages ? "i" : "t"}/${itemsHandle}`;
let bg = state.bindGroupCache.get(key);
if (bg) return bg;
const buf = buffers.get(itemsHandle);
if (!buf) throw new Error(`getGroup2BG: unknown items buffer ${itemsHandle}`);
bg = device.createBindGroup({
layout: pipe.bgl2,
entries: [{ binding: 0, resource: { buffer: buf } }],
});
state.bindGroupCache.set(key, bg);
return bg;
}
function getGroup3BG(pipe, texHandle, sampHandle) {
const key = `t3/${texHandle}/${sampHandle}/${pipe === pipeImages ? "i" : "x"}`;
let bg = state.bindGroupCache.get(key);
if (bg) return bg;
const tex = textureViews.get(texHandle);
const sam = samplers.get(sampHandle);
if (!tex || !sam) throw new Error(`getGroup3BG: unknown view ${texHandle} / sampler ${sampHandle}`);
bg = device.createBindGroup({
layout: pipe.bgl3,
entries: [
{ binding: 0, resource: tex },
{ binding: 1, resource: sam },
],
});
state.bindGroupCache.set(key, bg);
return bg;
}
// ─── wasm import surface ───────────────────────────────────────────────
let wasmExports = null;
// Crafter.Build's runtime.js exposes the wasi instance on
// window.crafter_wasi after instantiation. We grab the exports lazily so
// every import-side function works regardless of call order. memU8 /
// memF32 / memU32 always re-derive the typed-array view because the
// wasm memory's backing ArrayBuffer is detached and replaced whenever
// the wasm grows its memory; caching a typed array would alias to
// freed memory after a grow.
function getExports() {
if (wasmExports) return wasmExports;
const wasi = window.crafter_wasi;
if (!wasi || !wasi.instance) {
throw new Error("[crafter-wgpu] wasm exports not available yet (called too early)");
}
wasmExports = wasi.instance.exports;
return wasmExports;
}
function memU8() { return new Uint8Array(getExports().memory.buffer); }
function memF32() { return new Float32Array(getExports().memory.buffer); }
function memU32() { return new Uint32Array(getExports().memory.buffer); }
// Stubs were assigned at the top of this file; we now overwrite them with
// real implementations now that init has succeeded.
const env = window.crafter_webbuild_env;
env.wgpuGetCanvasWidth = () => canvas.width;
env.wgpuGetCanvasHeight = () => canvas.height;
env.wgpuCreateBuffer = (byteSize) => {
const h = newHandle();
const buf = device.createBuffer({
size: Math.max(16, byteSize),
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
buffers.set(h, buf);
return h;
};
env.wgpuWriteBuffer = (handle, srcPtr, byteSize) => {
state.writeBufferCount = (state.writeBufferCount || 0) + 1;
state.lastWriteHandle = handle;
state.lastWriteSize = byteSize;
const buf = buffers.get(handle);
if (!buf) return;
// writeBuffer requires a multiple of 4 bytes and an aligned offset.
const aligned = (byteSize + 3) & ~3;
queue.writeBuffer(buf, 0, memU8().buffer, srcPtr, aligned);
};
env.wgpuDestroyBuffer = (handle) => {
const buf = buffers.get(handle);
if (buf) { buf.destroy(); buffers.delete(handle); }
// Invalidate any cached bind group that referenced this handle.
for (const k of state.bindGroupCache.keys()) {
if (k.startsWith("items/") && k.endsWith("/" + handle)) {
state.bindGroupCache.delete(k);
}
}
};
env.wgpuCreateAtlasTexture = (w, h) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h],
format: "r8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView());
return handle;
};
env.wgpuWriteAtlasRegion = (handle, srcPtr, srcW, srcH, srcBytesPerRow, dstX, dstY, copyW, copyH) => {
const tex = textures.get(handle);
if (!tex) return;
// For r8unorm, 1 byte per pixel; writeTexture requires bytesPerRow >= 256
// OR == width if width*1 % 256 === 0 — for arbitrary widths we need to
// re-pack into a 256-aligned staging buffer.
const alignedBPR = Math.max(256, (srcBytesPerRow + 255) & ~255);
if (alignedBPR === srcBytesPerRow) {
const bytes = memU8().subarray(srcPtr + dstY * srcBytesPerRow + dstX,
srcPtr + (dstY + copyH) * srcBytesPerRow);
queue.writeTexture(
{ texture: tex, origin: { x: dstX, y: dstY } },
bytes,
{ bytesPerRow: srcBytesPerRow, rowsPerImage: copyH },
{ width: copyW, height: copyH }
);
} else {
// Repack copyW × copyH starting at (dstX, dstY) in the source.
const staging = new Uint8Array(alignedBPR * copyH);
const src = memU8();
for (let y = 0; y < copyH; y++) {
const srcRow = (dstY + y) * srcBytesPerRow + dstX;
staging.set(src.subarray(srcPtr + srcRow, srcPtr + srcRow + copyW),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex, origin: { x: dstX, y: dstY } },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: copyH },
{ width: copyW, height: copyH }
);
}
};
env.wgpuDestroyTexture = (handle) => {
const tex = textures.get(handle);
if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
};
env.wgpuCreateLinearClampSampler = () => {
const handle = newHandle();
samplers.set(handle, device.createSampler({
magFilter: "linear", minFilter: "linear",
addressModeU: "clamp-to-edge", addressModeV: "clamp-to-edge",
}));
return handle;
};
// ─── per-frame ──────────────────────────────────────────────────────────
env.wgpuFrameBegin = () => {
state.frameBeginCount = (state.frameBeginCount || 0) + 1;
if (state.gpuLost) return;
ensureSized();
state.encoder = device.createCommandEncoder();
state.outIsPing = true; // reset so each frame starts on the same target
state.headerRingOffset = 0;
// DON'T clearBuffer the header ring here. queue.writeBuffer ops from
// writeHeader() are enqueued BEFORE this command buffer's submit,
// so an encoded clearBuffer would wipe them — the dispatches would
// then read all-zero headers and uiResolvePixel would reject every
// pixel (surfaceW=0).
clearStorageTexture(state.encoder, state.outIsPing ? state.pingTex : state.pongTex,
state.width, state.height);
state.pass = state.encoder.beginComputePass();
};
let zeroBuffer = null;
let zeroBufferSize = 0;
function clearStorageTexture(encoder, tex, w, h) {
const bpr = (w * 4 + 255) & ~255;
const need = bpr * h;
if (!zeroBuffer || zeroBufferSize < need) {
if (zeroBuffer) zeroBuffer.destroy();
zeroBuffer = device.createBuffer({ size: need, usage: GPUBufferUsage.COPY_SRC, mappedAtCreation: true });
new Uint8Array(zeroBuffer.getMappedRange()).fill(0);
zeroBuffer.unmap();
zeroBufferSize = need;
}
encoder.copyBufferToTexture(
{ buffer: zeroBuffer, bytesPerRow: bpr, rowsPerImage: h },
{ texture: tex },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
}
env.wgpuFrameEnd = () => {
state.frameEndCount = (state.frameEndCount || 0) + 1;
if (state.gpuLost || !state.encoder) return;
state.pass.end();
state.pass = null;
// Blit last-written ping-pong texture → canvas. After N dispatches,
// state.outIsPing points at the NEXT write target, so the latest
// content lives in the OPPOSITE texture.
const finalTex = state.outIsPing ? state.pongTex : state.pingTex;
const canvasTex = ctx.getCurrentTexture();
state.encoder.copyTextureToTexture(
{ texture: finalTex },
{ texture: canvasTex },
{ width: state.width, height: state.height, depthOrArrayLayers: 1 }
);
queue.submit([state.encoder.finish()]);
state.encoder = null;
};
// Write a 48-byte UIDispatchHeader into the ring buffer at the current
// offset (which is incremented and 256-aligned). Returns the dynamic
// offset to pass to setBindGroup.
function writeHeader(headerPtr) {
const offset = state.headerRingOffset;
if (offset + HEADER_ALIGN > state.headerRingSize) {
// Ring is small enough that overrun in one frame means too many
// dispatches. Soft-wrap; correctness already requires the ring
// be large enough.
state.headerRingOffset = 0;
}
queue.writeBuffer(state.headerRing, state.headerRingOffset,
memU8().buffer, headerPtr, 48);
state.headerRingOffset += HEADER_ALIGN;
return offset;
}
function dispatchStandard(pipe, hdrBindGroup, headerPtr, gx, gy, itemsHandle, group3) {
if (!state.pass) return;
const off = writeHeader(headerPtr);
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, hdrBindGroup, [off]);
state.pass.setBindGroup(1, getGroup1BG(pipe.bgl1));
state.pass.setBindGroup(2, getGroup2BG(pipe, itemsHandle));
if (group3) state.pass.setBindGroup(3, group3);
state.pass.dispatchWorkgroups(gx, gy, 1);
// Flip ping-pong: the texture we just wrote becomes next dispatch's prev.
state.outIsPing = !state.outIsPing;
}
env.wgpuDispatchQuads = (itemsHandle, headerPtr, gx, gy) => {
state.dispatchQuadsCount = (state.dispatchQuadsCount || 0) + 1;
dispatchStandard(pipeQuads, hdrBG.quads, headerPtr, gx, gy, itemsHandle, null);
};
env.wgpuDispatchCircles = (itemsHandle, headerPtr, gx, gy) => {
dispatchStandard(pipeCircles, hdrBG.circles, headerPtr, gx, gy, itemsHandle, null);
};
env.wgpuDispatchImages = (itemsHandle, headerPtr, gx, gy, texHandle, sampHandle) => {
const g3 = getGroup3BG(pipeImages, texHandle, sampHandle);
dispatchStandard(pipeImages, hdrBG.images, headerPtr, gx, gy, itemsHandle, g3);
};
env.wgpuDispatchText = (itemsHandle, headerPtr, gx, gy, atlasHandle, sampHandle) => {
const g3 = getGroup3BG(pipeText, atlasHandle, sampHandle);
dispatchStandard(pipeText, hdrBG.text, headerPtr, gx, gy, itemsHandle, g3);
};
// ─── custom user-authored shaders ─────────────────────────────────────
//
// Bind-group contract (mirrors :WebGPUComputeShader.cppm):
// group 0 binding 0 — uniform UIDispatchHeader (dynamic offset, 48b)
// group 1 binding 0 — texture_storage_2d<rgba8unorm, write> out
// group 1 binding 1 — texture_2d<f32> prev
// group 2+ — user-declared (UICustomBinding entries)
//
// Each UICustomBinding entry on the wasm side is 8 bytes:
// u8 group, u8 binding, u8 kind, u8 pad, u32 pushOffset
// kind: 0 = read-only-storage SSBO, 1 = sampled tex 2d, 2 = filtering sampler.
const customPipelines = new Map(); // handle → { pipeline, bgls, hdrBG, byGroup }
env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQueryFlag) => {
if (!rtState.vertHeap && rayQueryFlag) rtInit();
const userWgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
// For rayQuery-capable shaders, prepend the RT prelude + ray-query
// library. The user shader can declare its own group 0 / 2+ bindings
// but MUST NOT redeclare group(1) — that's reserved for RT data.
const wgsl = rayQueryFlag
? (rtWgslTypes + rtWgslMegakernelBindings + rtWgslRayQueryLib + "\n" + userWgsl)
: userWgsl;
const bindings = [];
const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
for (let i = 0; i < bindingsCount; i++) {
bindings.push({
group: dv.getUint8(i*8 + 0),
binding: dv.getUint8(i*8 + 1),
kind: dv.getUint8(i*8 + 2),
pushOffset: dv.getUint32(i*8 + 4, true),
});
}
// Group bindings by @group(N) for layout creation.
const byGroup = new Map();
for (const b of bindings) {
if (b.group < 2) {
console.error(`[crafter-wgpu] custom shader: @group(${b.group}) reserved; use groups >= 2`);
return 0;
}
if (!byGroup.has(b.group)) byGroup.set(b.group, []);
byGroup.get(b.group).push(b);
}
// Group 0 = header uniform (same for both paths).
// Group 1 = ping-pong out+prev OR RT data (TLAS, BVH, meshRecs, verts,
// idx, primRemap, outImage) when rayQuery flag is on.
const bgls = [
device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
]}),
rayQueryFlag
? device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
]})
: device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE,
texture: { sampleType: "float", viewDimension: "2d" } },
]}),
];
// Sorted custom groups. Pad any gaps with empty bgls (WebGPU pipeline
// layouts require a contiguous array of GPUBindGroupLayout per group
// index up to the highest used).
const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
for (let g = 2; g <= highest; g++) {
if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" };
return e;
});
bgls.push(device.createBindGroupLayout({ entries }));
} else {
bgls.push(device.createBindGroupLayout({ entries: [] }));
}
}
let pipeline;
try {
const mod = device.createShaderModule({ code: wgsl });
const layout = device.createPipelineLayout({ bindGroupLayouts: bgls });
pipeline = device.createComputePipeline({ layout, compute: { module: mod, entryPoint: "main" } });
} catch (e) {
console.error("[crafter-wgpu] custom shader compile failed:", e);
if (rayQueryFlag) console.error("[crafter-wgpu] WGSL was:\n", wgsl);
return 0;
}
const hdrBG = device.createBindGroup({
layout: bgls[0],
entries: [{ binding: 0, resource: { buffer: state.headerRing, offset: 0, size: 48 } }],
});
const handle = newHandle();
customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups, rayQueryCapable: !!rayQueryFlag });
return handle;
};
env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handlesCount,
gx, gy, gz) => {
state.dispatchCustomCount = (state.dispatchCustomCount || 0) + 1;
if (!state.pass) return;
const pipe = customPipelines.get(pipelineHandle);
if (!pipe) {
console.error("[crafter-wgpu] wgpuDispatchCustom: unknown pipeline", pipelineHandle);
return;
}
// Write header (first 48 bytes of push).
const off = writeHeader(pushPtr);
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, pipe.hdrBG, [off]);
// Group 1: rayQuery-capable shaders get the RT data heaps + the most
// recently built TLAS; everyone else gets the standard ping-pong pair.
if (pipe.rayQueryCapable) {
const tlasBuf = buffers.get(rtState.currentTlas);
if (!tlasBuf) {
console.error("[crafter-wgpu] rayQuery dispatch but no TLAS built yet");
return;
}
const outView = state.outIsPing ? state.pingView : state.pongView;
const rtBG = device.createBindGroup({
layout: pipe.bgls[1],
entries: [
{ binding: 0, resource: { buffer: tlasBuf } },
{ binding: 1, resource: { buffer: rtState.bvhHeap.gpu } },
{ binding: 2, resource: { buffer: rtState.meshRecordsBuffer } },
{ binding: 3, resource: { buffer: rtState.vertHeap.gpu } },
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView },
],
});
state.pass.setBindGroup(1, rtBG);
} else {
state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1]));
}
// Walk bindings in declaration order and assemble bind groups.
// handles[] from wasm is in the SAME order as customBindings, so we
// pick up indices by walking byGroup in the same sorted order.
const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
let handleIdx = 0;
for (const g of pipe.sortedGroups) {
const entries = pipe.byGroup.get(g).map(b => {
const h = handles[handleIdx++];
let resource;
if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h);
return { binding: b.binding, resource };
});
const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
state.pass.setBindGroup(g, bg);
}
state.pass.dispatchWorkgroups(gx, gy, gz);
state.outIsPing = !state.outIsPing;
};
// Debug accessor for browser-console diagnostics.
window.crafter_wgpu_state = state;
window.crafter_wgpu_device = device;
window.crafter_wgpu_canvasCtx = ctx;
window.crafter_wgpu_debug = () => ({
width: state.width, height: state.height,
outIsPing: state.outIsPing,
encoderActive: !!state.encoder,
passActive: !!state.pass,
bgCacheSize: state.bindGroupCache.size,
bufferHandles: buffers.size,
textureHandles: textures.size,
samplerHandles: samplers.size,
headerRingOffset: state.headerRingOffset,
frameBeginCount: state.frameBeginCount || 0,
frameEndCount: state.frameEndCount || 0,
dispatchQuadsCount: state.dispatchQuadsCount || 0,
writeBufferCount: state.writeBufferCount || 0,
lastWriteHandle: state.lastWriteHandle,
lastWriteSize: state.lastWriteSize,
});
window.crafter_wgpu_bufferKeys = () => [...buffers.keys()];
// Read back the first QuadItem from a registered buffer to verify the
// GPU sees what the CPU wrote.
window.crafter_wgpu_readBuffer = async (handle, byteSize = 64) => {
const buf = buffers.get(handle);
if (!buf) return "no buffer for handle " + handle;
const read = device.createBuffer({ size: 256, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST });
const enc = device.createCommandEncoder();
enc.copyBufferToBuffer(buf, 0, read, 0, byteSize);
device.queue.submit([enc.finish()]);
await read.mapAsync(GPUMapMode.READ);
const data = new Float32Array(read.getMappedRange().slice(0, byteSize));
read.unmap();
return [...data];
};
// Surface size getters (the wasm side may query these on Resize events).
env.wgpuSurfaceWidth = () => state.width || canvas.width;
env.wgpuSurfaceHeight = () => state.height || canvas.height;
// One-shot init: forces ping-pong allocation at current canvas size so
// any Buffer/Texture creation before the first frame works against a
// concrete size. Called by Crafter::Device::Initialize on the wasm side.
env.wgpuInit = () => {
const { w, h } = syncCanvasSize();
recreatePingPong(w, h);
};
// Resize listener — wires up to the same `resize` event dom-env.js
// listens to. We trigger sizing on next frame begin; no work here.
window.addEventListener("resize", () => { /* ensureSized in wgpuFrameBegin */ });
// ─────────────────────────────────────────────────────────────────────
// ── Software raytracing subsystem ────────────────────────────────────
// ─────────────────────────────────────────────────────────────────────
//
// WebGPU has no hardware RT. The library emulates DXR/VKRT semantics in
// compute: a megakernel raygen pipeline traverses a CPU-built BLAS BVH +
// GPU-built TLAS, dispatches user closesthit / anyhit / miss via a
// generated `switch`. The same traversal kernel is also exposed as a
// rayQuery* function set for regular compute shaders (see the
// rayQueryCapable path under wgpuLoadCustomShader).
//
// The four mesh data streams live in *shared* GPU heaps; each Mesh::Build
// appends to them and gets a u32 handle back. The handle is what the
// application stores in RTInstance::accelerationStructureReference.
// ── WGSL library: shared types + constants (no bindings) ─────────────
// Used by both the megakernel pipeline (which adds group(0..1) bindings)
// and the TLAS-build pipeline (which only uses group(2)). Keeping bindings
// out of the shared block avoids inflating storage-buffer count past the
// 8-per-stage baseline limit on pipelines that don't actually use them.
const rtWgslTypes = String.raw`
struct RTDispatchHeader {
surfaceW: u32,
surfaceH: u32,
instanceCount: u32,
flags: u32,
};
struct RayDesc {
origin: vec3<f32>,
tMin: f32,
direction: vec3<f32>,
tMax: f32,
};
struct HitInfo {
t: f32,
instanceId: u32,
primitiveId: u32,
hitGroupIndex: u32,
attribs: vec2<f32>,
objectRayOrigin: vec3<f32>,
objectRayDirection: vec3<f32>,
objectToWorldR0: vec4<f32>,
objectToWorldR1: vec4<f32>,
objectToWorldR2: vec4<f32>,
customIndex: u32,
};
// Matches Crafter::BVHNode in interfaces/Crafter.Graphics-Mesh.cppm.
struct BVHNode {
aabbMin: vec3<f32>,
firstChildOrPrim: u32,
aabbMax: vec3<f32>,
primCount: u32,
};
// Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
struct MeshRecord {
rootAabbMin: vec3<f32>,
vertexOffset: u32,
rootAabbMax: vec3<f32>,
indexOffset: u32,
bvhOffset: u32,
primRemapOffset: u32,
triangleCount: u32,
_pad: u32,
};
// Per-instance TLAS record built by the TLAS-build compute pass.
struct TLASEntry {
aabbMin: vec3<f32>,
maskHGOffset: u32,
aabbMax: vec3<f32>,
blasMeshIdx: u32,
objectToWorldR0: vec4<f32>,
objectToWorldR1: vec4<f32>,
objectToWorldR2: vec4<f32>,
worldToObjectR0: vec4<f32>,
worldToObjectR1: vec4<f32>,
worldToObjectR2: vec4<f32>,
customIndex: u32,
instanceFlags: u32,
_pad0: u32,
_pad1: u32,
};
// ── Ray flag mirror of VkGeometryInstanceFlagBitsKHR + DXR ray flags ──
const RT_FLAG_OPAQUE: u32 = 0x1u;
const RT_FLAG_NO_OPAQUE: u32 = 0x2u;
const RT_FLAG_TERMINATE_ON_FIRST_HIT: u32 = 0x4u;
const RT_FLAG_SKIP_CLOSEST_HIT: u32 = 0x8u;
const RT_FLAG_CULL_BACK_FACING_TRIANGLES: u32 = 0x10u;
const RT_FLAG_CULL_FRONT_FACING_TRIANGLES: u32 = 0x20u;
const RT_FLAG_CULL_OPAQUE: u32 = 0x40u;
const RT_FLAG_CULL_NO_OPAQUE: u32 = 0x80u;
const RT_FLAG_SKIP_TRIANGLES: u32 = 0x100u;
const RT_FLAG_SKIP_AABBS: u32 = 0x200u;
const RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE: u32 = 0x1u;
const RT_INSTANCE_TRIANGLE_FLIP_FACING: u32 = 0x2u;
const RT_INSTANCE_FORCE_OPAQUE: u32 = 0x4u;
const RT_INSTANCE_FORCE_NO_OPAQUE: u32 = 0x8u;
const RT_ANYHIT_ACCEPT: u32 = 0u;
const RT_ANYHIT_IGNORE: u32 = 1u;
const RT_ANYHIT_END_SEARCH: u32 = 2u;
const RT_INTERSECTION_NONE: u32 = 0u;
const RT_INTERSECTION_TRIANGLE: u32 = 1u;
`;
// Megakernel-only bindings. Concatenated after rtWgslTypes for the
// raygen pipeline; the TLAS-build pipeline omits these because it doesn't
// touch them — declaring them would push it past 8 storage buffers per
// stage on the WebGPU baseline.
const rtWgslMegakernelBindings = String.raw`
@group(0) @binding(0) var<uniform> hdr : RTDispatchHeader;
@group(1) @binding(0) var<storage,read> tlasEntries : array<TLASEntry>;
@group(1) @binding(1) var<storage,read> bvhNodes : array<BVHNode>;
@group(1) @binding(2) var<storage,read> meshRecords : array<MeshRecord>;
@group(1) @binding(3) var<storage,read> vertices : array<f32>;
@group(1) @binding(4) var<storage,read> indices : array<u32>;
@group(1) @binding(5) var<storage,read> primRemap : array<u32>;
@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
`;
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
// Injected after the user-supplied closesthit/anyhit/miss sources +
// mega-switch dispatchers (which PipelineRTWebGPU emits). User raygen
// sources sit after this block so they can call traceRay.
const rtWgslHelpers = String.raw`
fn _rtFetchTri(meshRec: MeshRecord, triIndex: u32) -> array<vec3<f32>, 3> {
let baseIdx = meshRec.indexOffset + triIndex * 3u;
let i0 = indices[baseIdx + 0u];
let i1 = indices[baseIdx + 1u];
let i2 = indices[baseIdx + 2u];
let baseV = meshRec.vertexOffset;
let v0i = (baseV + i0) * 3u;
let v1i = (baseV + i1) * 3u;
let v2i = (baseV + i2) * 3u;
return array<vec3<f32>, 3>(
vec3<f32>(vertices[v0i + 0u], vertices[v0i + 1u], vertices[v0i + 2u]),
vec3<f32>(vertices[v1i + 0u], vertices[v1i + 1u], vertices[v1i + 2u]),
vec3<f32>(vertices[v2i + 0u], vertices[v2i + 1u], vertices[v2i + 2u]),
);
}
fn _rtAabb(ro: vec3<f32>, invRd: vec3<f32>, mn: vec3<f32>, mx: vec3<f32>, tMax: f32) -> bool {
let t0 = (mn - ro) * invRd;
let t1 = (mx - ro) * invRd;
let tmin = min(t0, t1);
let tmax = max(t0, t1);
let tEnter = max(max(tmin.x, tmin.y), tmin.z);
let tExit = min(min(tmax.x, tmax.y), tmax.z);
return tExit >= max(tEnter, 0.0) && tEnter <= tMax;
}
struct _RtTriHit { hit: bool, t: f32, u: f32, v: f32 };
fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>,
tMin: f32, tMax: f32) -> _RtTriHit {
var r: _RtTriHit;
r.hit = false;
let e1 = p1 - p0;
let e2 = p2 - p0;
let h = cross(rd, e2);
let a = dot(e1, h);
if (abs(a) < 1e-8) { return r; }
let f = 1.0 / a;
let s = ro - p0;
let u = f * dot(s, h);
if (u < 0.0 || u > 1.0) { return r; }
let q = cross(s, e1);
let v = f * dot(rd, q);
if (v < 0.0 || u + v > 1.0) { return r; }
let t = f * dot(e2, q);
if (t < tMin || t > tMax) { return r; }
r.hit = true; r.t = t; r.u = u; r.v = v;
return r;
}
// Iterative stack-based BLAS traversal. Returns true if traversal was
// terminated by an END_SEARCH from anyhit (caller should stop entirely).
fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
instanceId: u32, hitGroupBase: u32,
bestHit: ptr<function, HitInfo>,
bestT: ptr<function, f32>,
payload: ptr<function, Payload>) -> bool {
let invD = vec3<f32>(1.0) / rayObj.direction;
var stack: array<u32, 32>;
var sp: u32 = 0u;
var nodeRel: u32 = 0u;
loop {
let abs = meshRec.bvhOffset + nodeRel;
let node = bvhNodes[abs];
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
if (node.primCount > 0u) {
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
let verts = _rtFetchTri(meshRec, triIndex);
let tr = _rtTri(rayObj.origin, rayObj.direction,
verts[0], verts[1], verts[2],
rayObj.tMin, *bestT);
if (!tr.hit) { continue; }
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
let facing = dot(geomNormal, rayObj.direction);
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
var candidate: HitInfo;
candidate.t = tr.t;
candidate.instanceId = instanceId;
candidate.primitiveId = triIndex;
candidate.hitGroupIndex = hitGroupBase;
candidate.attribs = vec2<f32>(tr.u, tr.v);
candidate.objectRayOrigin = rayObj.origin;
candidate.objectRayDirection = rayObj.direction;
let opaque = (flags & RT_FLAG_OPAQUE) != 0u
|| (flags & RT_FLAG_NO_OPAQUE) == 0u; // default opaque
if (opaque) {
*bestHit = candidate;
*bestT = tr.t;
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
} else {
let r = runAnyHit(hitGroupBase, rayObj, candidate, payload);
if (r == RT_ANYHIT_END_SEARCH) {
*bestHit = candidate;
*bestT = tr.t;
return true;
}
if (r == RT_ANYHIT_ACCEPT) {
*bestHit = candidate;
*bestT = tr.t;
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
}
}
}
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
// inner node — push right, descend left
let left = node.firstChildOrPrim;
let right = left + 1u;
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
nodeRel = left;
}
return false;
}
fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
sbtRecordOffset: u32, sbtRecordStride: u32,
bestHit: ptr<function, HitInfo>,
bestT: ptr<function, f32>,
payload: ptr<function, Payload>) -> bool {
let invD = vec3<f32>(1.0) / rayWorld.direction;
let n = hdr.instanceCount;
for (var i: u32 = 0u; i < n; i = i + 1u) {
let inst = tlasEntries[i];
let instanceMask = inst.maskHGOffset & 0xFFu;
if ((instanceMask & cullMask) == 0u) { continue; }
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; }
// Transform ray to object space.
let r0 = inst.worldToObjectR0;
let r1 = inst.worldToObjectR1;
let r2 = inst.worldToObjectR2;
var rayObj: RayDesc;
rayObj.origin = vec3<f32>(
dot(r0.xyz, rayWorld.origin) + r0.w,
dot(r1.xyz, rayWorld.origin) + r1.w,
dot(r2.xyz, rayWorld.origin) + r2.w,
);
rayObj.direction = vec3<f32>(
dot(r0.xyz, rayWorld.direction),
dot(r1.xyz, rayWorld.direction),
dot(r2.xyz, rayWorld.direction),
);
rayObj.tMin = rayWorld.tMin;
rayObj.tMax = *bestT;
var effective = flags;
let iflags = inst.instanceFlags;
if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) {
effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE;
}
if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) {
effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE;
}
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
}
let hitGroupOffset = inst.maskHGOffset >> 8u;
let hitGroupBase = sbtRecordOffset + hitGroupOffset;
let meshRec = meshRecords[inst.blasMeshIdx];
let pre = *bestT;
let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase,
bestHit, bestT, payload);
if (endSearch) { return true; }
if ((*bestT) < pre) {
// record world-space object-to-world for the closest-hit shader
(*bestHit).objectToWorldR0 = inst.objectToWorldR0;
(*bestHit).objectToWorldR1 = inst.objectToWorldR1;
(*bestHit).objectToWorldR2 = inst.objectToWorldR2;
(*bestHit).customIndex = inst.customIndex;
if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
}
}
return false;
}
fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32,
sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32,
rayOrigin: vec3<f32>, rayTMin: f32,
rayDir: vec3<f32>, rayTMax: f32,
payload: ptr<function, Payload>) {
var ray: RayDesc;
ray.origin = rayOrigin;
ray.direction = rayDir;
ray.tMin = rayTMin;
ray.tMax = rayTMax;
var bestHit: HitInfo;
bestHit.t = rayTMax;
var bestT = rayTMax;
let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu,
sbtRecordOffset, sbtRecordStride,
&bestHit, &bestT, payload);
if (bestT < rayTMax) {
if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) {
runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload);
}
} else {
runMiss(missIndex, ray, payload);
}
}
`;
// ── WGSL library: rayQuery API for non-megakernel compute shaders ────
//
// Mirrors GL_EXT_ray_query semantics that 3DForts's physics shaders use
// (projectile-collide, splash, builder-pick). User WGSL:
// var rq: RayQuery;
// rayQueryInitialize(&rq, 0u, flags, mask, origin, tMin, dir, tMax);
// while (rayQueryProceed(&rq)) {} // run traversal to completion
// if (rayQueryGetCommittedIntersectionType(&rq) != RT_INTERSECTION_NONE) {
// let t = rayQueryGetCommittedT(&rq);
// ...
// }
//
// v1 simplification: traversal force-opaques every hit (no anyhit). The
// user can still test for triangle vs miss and read t/instance/bary.
// Anyhit-style candidate inspection is a future extension.
const rtWgslRayQueryLib = String.raw`
struct RayQuery {
ray: RayDesc,
flags: u32,
cullMask: u32,
committedType: u32,
committedT: f32,
committedInstanceId: u32,
committedInstanceCustomIndex: u32,
committedPrimitiveIndex: u32,
committedBarycentrics: vec2<f32>,
committedObjectRayOrigin: vec3<f32>,
committedObjectRayDirection: vec3<f32>,
committedObjectToWorldR0: vec4<f32>,
committedObjectToWorldR1: vec4<f32>,
committedObjectToWorldR2: vec4<f32>,
committedWorldToObjectR0: vec4<f32>,
committedWorldToObjectR1: vec4<f32>,
committedWorldToObjectR2: vec4<f32>,
done: u32,
};
fn _rqTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
instanceId: u32, customIndex: u32,
inst: TLASEntry,
rq: ptr<function, RayQuery>) {
let invD = vec3<f32>(1.0) / rayObj.direction;
var stack: array<u32, 32>;
var sp: u32 = 0u;
var nodeRel: u32 = 0u;
loop {
let abs = meshRec.bvhOffset + nodeRel;
let node = bvhNodes[abs];
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, (*rq).committedT)) {
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
if (node.primCount > 0u) {
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
let verts = _rtFetchTri(meshRec, triIndex);
let tr = _rtTri(rayObj.origin, rayObj.direction,
verts[0], verts[1], verts[2],
rayObj.tMin, (*rq).committedT);
if (!tr.hit) { continue; }
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
let facing = dot(geomNormal, rayObj.direction);
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
(*rq).committedType = RT_INTERSECTION_TRIANGLE;
(*rq).committedT = tr.t;
(*rq).committedInstanceId = instanceId;
(*rq).committedInstanceCustomIndex = customIndex;
(*rq).committedPrimitiveIndex = triIndex;
(*rq).committedBarycentrics = vec2<f32>(tr.u, tr.v);
(*rq).committedObjectRayOrigin = rayObj.origin;
(*rq).committedObjectRayDirection = rayObj.direction;
(*rq).committedObjectToWorldR0 = inst.objectToWorldR0;
(*rq).committedObjectToWorldR1 = inst.objectToWorldR1;
(*rq).committedObjectToWorldR2 = inst.objectToWorldR2;
(*rq).committedWorldToObjectR0 = inst.worldToObjectR0;
(*rq).committedWorldToObjectR1 = inst.worldToObjectR1;
(*rq).committedWorldToObjectR2 = inst.worldToObjectR2;
}
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
let left = node.firstChildOrPrim;
let right = left + 1u;
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
nodeRel = left;
}
}
fn _rqTraverseTlas(rq: ptr<function, RayQuery>) {
let rayWorld = (*rq).ray;
let invD = vec3<f32>(1.0) / rayWorld.direction;
let n = hdr.instanceCount;
let cullMask = (*rq).cullMask;
let rayFlags = (*rq).flags;
for (var i: u32 = 0u; i < n; i = i + 1u) {
let inst = tlasEntries[i];
let instanceMask = inst.maskHGOffset & 0xFFu;
if ((instanceMask & cullMask) == 0u) { continue; }
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, (*rq).committedT)) { continue; }
let r0 = inst.worldToObjectR0;
let r1 = inst.worldToObjectR1;
let r2 = inst.worldToObjectR2;
var rayObj: RayDesc;
rayObj.origin = vec3<f32>(
dot(r0.xyz, rayWorld.origin) + r0.w,
dot(r1.xyz, rayWorld.origin) + r1.w,
dot(r2.xyz, rayWorld.origin) + r2.w,
);
rayObj.direction = vec3<f32>(
dot(r0.xyz, rayWorld.direction),
dot(r1.xyz, rayWorld.direction),
dot(r2.xyz, rayWorld.direction),
);
rayObj.tMin = rayWorld.tMin;
rayObj.tMax = (*rq).committedT;
var effective = rayFlags;
let iflags = inst.instanceFlags;
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
}
let meshRec = meshRecords[inst.blasMeshIdx];
_rqTraverseBlas(rayObj, effective, meshRec, i, inst.customIndex, inst, rq);
if ((rayFlags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u
&& (*rq).committedType != RT_INTERSECTION_NONE) {
return;
}
}
}
fn rayQueryInitialize(rq: ptr<function, RayQuery>, tlasIdx: u32, flags: u32, cullMask: u32,
origin: vec3<f32>, tMin: f32, direction: vec3<f32>, tMax: f32) {
(*rq).ray.origin = origin;
(*rq).ray.tMin = tMin;
(*rq).ray.direction = direction;
(*rq).ray.tMax = tMax;
(*rq).flags = flags;
(*rq).cullMask = cullMask & 0xFFu;
(*rq).committedType = RT_INTERSECTION_NONE;
(*rq).committedT = tMax;
(*rq).done = 0u;
}
fn rayQueryProceed(rq: ptr<function, RayQuery>) -> bool {
if ((*rq).done != 0u) { return false; }
_rqTraverseTlas(rq);
(*rq).done = 1u;
return false;
}
fn rayQueryTerminate(rq: ptr<function, RayQuery>) {
(*rq).done = 1u;
}
fn rayQueryGetCommittedIntersectionType(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedType; }
fn rayQueryGetCommittedT(rq: ptr<function, RayQuery>) -> f32 { return (*rq).committedT; }
fn rayQueryGetCommittedInstanceId(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedInstanceId; }
fn rayQueryGetCommittedInstanceCustomIndex(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedInstanceCustomIndex; }
fn rayQueryGetCommittedPrimitiveIndex(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedPrimitiveIndex; }
fn rayQueryGetCommittedBarycentrics(rq: ptr<function, RayQuery>) -> vec2<f32> { return (*rq).committedBarycentrics; }
fn rayQueryGetCommittedObjectRayOrigin(rq: ptr<function, RayQuery>) -> vec3<f32> { return (*rq).committedObjectRayOrigin; }
fn rayQueryGetCommittedObjectRayDirection(rq: ptr<function, RayQuery>) -> vec3<f32> { return (*rq).committedObjectRayDirection; }
fn rayQueryGetCommittedWorldPosition(rq: ptr<function, RayQuery>) -> vec3<f32> {
return (*rq).ray.origin + (*rq).ray.direction * (*rq).committedT;
}
`;
// ── Internal compute pipeline: builds TLASEntry[] from the RTInstance[]
// + the meshRecords table. One thread per instance.
// Uses only rtWgslTypes (no megakernel bindings) so it stays well under
// the 8-storage-buffer-per-stage baseline limit.
const tlasBuildWgsl = rtWgslTypes /* needs MeshRecord + TLASEntry */ + String.raw`
struct RTInstance {
transformR0: vec4<f32>,
transformR1: vec4<f32>,
transformR2: vec4<f32>,
customIndexMask: u32, // customIndex: low 24, mask: high 8
sbtFlags: u32, // sbtOffset: low 24, flags: high 8
accelStructureRef: vec2<u32>,
};
@group(0) @binding(0) var<storage, read> inInstances : array<RTInstance>;
@group(0) @binding(1) var<storage, read> inMeshes : array<MeshRecord>;
@group(0) @binding(2) var<storage, read_write> outEntries : array<TLASEntry>;
fn _invMat3(c0: vec3<f32>, c1: vec3<f32>, c2: vec3<f32>) -> array<vec3<f32>, 3> {
let m00 = c1.y*c2.z - c1.z*c2.y;
let m01 = c1.z*c2.x - c1.x*c2.z;
let m02 = c1.x*c2.y - c1.y*c2.x;
let det = c0.x*m00 + c0.y*m01 + c0.z*m02;
let inv = 1.0 / det;
return array<vec3<f32>, 3>(
vec3<f32>(m00, c0.z*c2.y - c0.y*c2.z, c0.y*c1.z - c0.z*c1.y) * inv,
vec3<f32>(m01, c0.x*c2.z - c0.z*c2.x, c0.z*c1.x - c0.x*c1.z) * inv,
vec3<f32>(m02, c0.y*c2.x - c0.x*c2.y, c0.x*c1.y - c0.y*c1.x) * inv,
);
}
@compute @workgroup_size(64, 1, 1)
fn tlasBuildMain(@builtin(global_invocation_id) gid: vec3<u32>) {
let i = gid.x;
if (i >= arrayLength(&inInstances)) { return; }
let inst = inInstances[i];
let meshIdx = inst.accelStructureRef.x;
let custom = inst.customIndexMask & 0xFFFFFFu;
let mask = (inst.customIndexMask >> 24u) & 0xFFu;
let sbtOffset = inst.sbtFlags & 0xFFFFFFu;
let iflags = (inst.sbtFlags >> 24u) & 0xFFu;
let rec = inMeshes[meshIdx];
// 8 corners → world AABB.
var worldMin = vec3<f32>(1e30);
var worldMax = vec3<f32>(-1e30);
for (var c: u32 = 0u; c < 8u; c = c + 1u) {
let corner = vec3<f32>(
select(rec.rootAabbMin.x, rec.rootAabbMax.x, (c & 1u) != 0u),
select(rec.rootAabbMin.y, rec.rootAabbMax.y, (c & 2u) != 0u),
select(rec.rootAabbMin.z, rec.rootAabbMax.z, (c & 4u) != 0u),
);
let wc = vec3<f32>(
dot(inst.transformR0.xyz, corner) + inst.transformR0.w,
dot(inst.transformR1.xyz, corner) + inst.transformR1.w,
dot(inst.transformR2.xyz, corner) + inst.transformR2.w,
);
worldMin = min(worldMin, wc);
worldMax = max(worldMax, wc);
}
// Inverse 3x4 affine.
let inv = _invMat3(inst.transformR0.xyz, inst.transformR1.xyz, inst.transformR2.xyz);
let T = vec3<f32>(inst.transformR0.w, inst.transformR1.w, inst.transformR2.w);
let invT = vec3<f32>(-dot(inv[0], T), -dot(inv[1], T), -dot(inv[2], T));
var e: TLASEntry;
e.aabbMin = worldMin;
e.aabbMax = worldMax;
e.maskHGOffset = mask | (sbtOffset << 8u);
e.blasMeshIdx = meshIdx;
e.objectToWorldR0 = inst.transformR0;
e.objectToWorldR1 = inst.transformR1;
e.objectToWorldR2 = inst.transformR2;
e.worldToObjectR0 = vec4<f32>(inv[0], invT.x);
e.worldToObjectR1 = vec4<f32>(inv[1], invT.y);
e.worldToObjectR2 = vec4<f32>(inv[2], invT.z);
e.customIndex = custom;
e.instanceFlags = iflags;
outEntries[i] = e;
}
`;
// ── RT runtime state ──────────────────────────────────────────────────
// Mesh heaps grow geometrically; each Mesh::Build appends + records its
// offsets in meshRecordsCpu/Buffer. nextMeshHandle is what gets returned
// to C++ as RTInstance::accelerationStructureReference.
const RT_HEAP_INITIAL_BYTES = 64 * 1024;
function makeRtHeap() {
return {
gpu: device.createBuffer({
size: RT_HEAP_INITIAL_BYTES,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
}),
capacity: RT_HEAP_INITIAL_BYTES,
cursor: 0,
};
}
function rtHeapEnsure(h, neededBytes) {
if (h.cursor + neededBytes <= h.capacity) return;
let cap = h.capacity;
while (h.cursor + neededBytes > cap) cap *= 2;
const ng = device.createBuffer({
size: cap,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
});
const enc = device.createCommandEncoder();
if (h.cursor > 0) enc.copyBufferToBuffer(h.gpu, 0, ng, 0, h.cursor);
queue.submit([enc.finish()]);
h.gpu.destroy();
h.gpu = ng;
h.capacity = cap;
// Invalidate cached bind groups that referenced the heap.
rtState.bindGroupCache.clear();
}
const rtState = {
vertHeap: null, // f32 stream (3 floats per vertex)
indexHeap: null, // u32 stream
bvhHeap: null, // BVHNode stream (32 bytes per node)
primRemapHeap: null, // u32 stream
meshRecordsBuffer: null, // GPUBuffer of MeshRecord[]
meshRecordsCapacity: 0,
nextMeshHandle: 1,
rtHeader: null, // uniform buffer for RTDispatchHeader (256 B aligned)
bindGroupCache: new Map(), // key → bind group
tlasBuildPipeline: null,
tlasBuildBgl: null,
// Latest TLAS buffer handle from wgpuBuildTLAS, used by rayQuery-capable
// compute shaders at dispatch time.
currentTlas: 0,
currentTlasInstanceCount: 0,
};
function rtInit() {
rtState.vertHeap = makeRtHeap();
rtState.indexHeap = makeRtHeap();
rtState.bvhHeap = makeRtHeap();
rtState.primRemapHeap = makeRtHeap();
rtState.meshRecordsCapacity = 16;
rtState.meshRecordsBuffer = device.createBuffer({
size: rtState.meshRecordsCapacity * 48,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
rtState.rtHeader = device.createBuffer({
size: 256,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});
// TLAS-build compute pipeline. Only group(0) is used (3 SSBOs).
const mod = device.createShaderModule({ code: tlasBuildWgsl, label: "rt-tlas-build" });
const tlasBuildBgl = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
]});
rtState.tlasBuildBgl = tlasBuildBgl;
rtState.tlasBuildPipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [tlasBuildBgl] }),
compute: { module: mod, entryPoint: "tlasBuildMain" },
});
}
function rtMeshRecordsEnsure(meshCount) {
if (meshCount <= rtState.meshRecordsCapacity) return;
let cap = rtState.meshRecordsCapacity;
while (cap < meshCount) cap *= 2;
const ng = device.createBuffer({
size: cap * 48,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
const enc = device.createCommandEncoder();
enc.copyBufferToBuffer(rtState.meshRecordsBuffer, 0, ng, 0,
rtState.meshRecordsCapacity * 48);
queue.submit([enc.finish()]);
rtState.meshRecordsBuffer.destroy();
rtState.meshRecordsBuffer = ng;
rtState.meshRecordsCapacity = cap;
rtState.bindGroupCache.clear();
}
env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
verticesPtr, vertexCount,
indicesPtr, indexCount,
bvhNodesPtr, bvhNodeCount,
primRemapPtr, primRemapCount) => {
if (!rtState.vertHeap) rtInit();
const vBytes = vertexCount * 12;
const iBytes = indexCount * 4;
const nBytes = bvhNodeCount * 32;
const rBytes = primRemapCount * 4;
rtHeapEnsure(rtState.vertHeap, vBytes);
rtHeapEnsure(rtState.indexHeap, iBytes);
rtHeapEnsure(rtState.bvhHeap, nBytes);
rtHeapEnsure(rtState.primRemapHeap, rBytes);
const vOff = rtState.vertHeap.cursor / 12; // in vec3 units
const iOff = rtState.indexHeap.cursor / 4; // in u32 units
const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units
const rOff = rtState.primRemapHeap.cursor / 4;
// queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
// already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
queue.writeBuffer(rtState.vertHeap.gpu, rtState.vertHeap.cursor,
memU8().buffer, verticesPtr, vBytes);
queue.writeBuffer(rtState.indexHeap.gpu, rtState.indexHeap.cursor,
memU8().buffer, indicesPtr, iBytes);
queue.writeBuffer(rtState.bvhHeap.gpu, rtState.bvhHeap.cursor,
memU8().buffer, bvhNodesPtr, nBytes);
queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
memU8().buffer, primRemapPtr, rBytes);
rtState.vertHeap.cursor += vBytes;
rtState.indexHeap.cursor += iBytes;
rtState.bvhHeap.cursor += nBytes;
rtState.primRemapHeap.cursor += rBytes;
const handle = rtState.nextMeshHandle++;
rtMeshRecordsEnsure(handle + 1);
// Build the MeshRecord (48 bytes) and write it.
const rec = new ArrayBuffer(48);
const f32 = new Float32Array(rec);
const u32 = new Uint32Array(rec);
f32[0] = minX; f32[1] = minY; f32[2] = minZ;
u32[3] = vOff;
f32[4] = maxX; f32[5] = maxY; f32[6] = maxZ;
u32[7] = iOff;
u32[8] = nOff;
u32[9] = rOff;
u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
u32[11] = 0;
queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
return handle;
};
env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
if (!rtState.tlasBuildPipeline) return;
const inst = buffers.get(instanceBufHandle);
const out = buffers.get(tlasOutBufHandle);
if (!inst || !out) {
console.error("[crafter-wgpu] wgpuBuildTLAS: unknown buffer handle");
return;
}
const bg = device.createBindGroup({
layout: rtState.tlasBuildBgl,
entries: [
{ binding: 0, resource: { buffer: inst } },
{ binding: 1, resource: { buffer: rtState.meshRecordsBuffer } },
{ binding: 2, resource: { buffer: out } },
],
});
if (state.pass) {
// Mid-frame — close the user's compute pass, run our build pass
// on the same encoder, then reopen.
state.pass.end();
state.pass = null;
}
const enc = state.encoder || device.createCommandEncoder();
const ownEncoder = !state.encoder;
const pass = enc.beginComputePass({ label: "tlas-build" });
pass.setPipeline(rtState.tlasBuildPipeline);
pass.setBindGroup(0, bg);
const groups = Math.ceil(instanceCount / 64);
pass.dispatchWorkgroups(groups, 1, 1);
pass.end();
if (ownEncoder) {
queue.submit([enc.finish()]);
} else {
state.pass = state.encoder.beginComputePass();
}
// Publish so rayQuery-capable compute pipelines pick up the latest TLAS
// without each dispatch having to thread the handle explicitly.
rtState.currentTlas = tlasOutBufHandle;
rtState.currentTlasInstanceCount = instanceCount;
};
// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
// switches + raygen + @compute entry) with the library prelude/helpers.
const rtPipelines = new Map(); // handle → { pipeline, bgls }
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
if (!rtState.vertHeap) rtInit();
const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
// Insert helpers at the marker; prepend prelude.
const marker = "// @CRAFTER_RT_LIBRARY_HELPERS_HERE";
let beforeHelpers = userPart;
let afterHelpers = "";
const mi = userPart.indexOf(marker);
if (mi >= 0) {
beforeHelpers = userPart.substring(0, mi);
afterHelpers = userPart.substring(mi + marker.length);
}
const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
let pipeline;
try {
const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
// RTDispatchHeader is 16 bytes; bind exactly that.
const headerBgl = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
buffer: { type: "uniform", minBindingSize: 16 } },
]});
const dataBgl = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
]});
pipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }),
compute: { module: mod, entryPoint: "main" },
});
const handle = newHandle();
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl });
return handle;
} catch (e) {
console.error("[crafter-wgpu] RT pipeline compile failed:", e);
console.error("[crafter-wgpu] WGSL was:\n", fullWgsl);
return 0;
}
};
env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
tlasBufHandle, instanceCount, gx, gy) => {
if (!state.pass) return;
const pipe = rtPipelines.get(pipelineHandle);
const tlas = buffers.get(tlasBufHandle);
if (!pipe || !tlas) {
console.error("[crafter-wgpu] wgpuDispatchRT: unknown pipeline or tlas");
return;
}
// Write RT header from push data (first 16 bytes). Surface dims + instance count + flags.
const hdr32 = new Uint32Array(4);
hdr32[0] = state.width;
hdr32[1] = state.height;
hdr32[2] = instanceCount;
hdr32[3] = 0;
queue.writeBuffer(rtState.rtHeader, 0, hdr32);
const headerBg = device.createBindGroup({
layout: pipe.headerBgl,
entries: [{ binding: 0, resource: { buffer: rtState.rtHeader, offset: 0, size: 16 } }],
});
const outView = state.outIsPing ? state.pingView : state.pongView;
const dataBg = device.createBindGroup({
layout: pipe.dataBgl,
entries: [
{ binding: 0, resource: { buffer: tlas } },
{ binding: 1, resource: { buffer: rtState.bvhHeap.gpu } },
{ binding: 2, resource: { buffer: rtState.meshRecordsBuffer } },
{ binding: 3, resource: { buffer: rtState.vertHeap.gpu } },
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView },
],
});
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, headerBg);
state.pass.setBindGroup(1, dataBg);
state.pass.dispatchWorkgroups(gx, gy, 1);
state.outIsPing = !state.outIsPing;
};
console.log("[crafter-wgpu] init complete; env handlers wired");
} catch (e) {
// Capture any throw so the stub error messages name the real cause
// instead of "(no error captured)". Re-throw so runtime.js's catch
// also logs it.
initError = e instanceof Error ? e : new Error(String(e));
console.error("[crafter-wgpu] init failed:", initError);
throw initError;
}
})(); // end window.crafter_webbuild_env_ready