Crafter.Graphics/additional/dom-webgpu.js

2037 lines
86 KiB
JavaScript
Raw Normal View History

2026-05-18 04:58:52 +02:00
/*
Crafter.Graphics WebGPU bridge DOM mode UI compute pipeline.
Surface model (high-level, deliberately not 1:1 with WebGPU):
- JS owns the GPUDevice/queue/compute pipelines/bind-group cache.
- C++ owns slot allocation and per-frame logic; it calls into ~15 imports.
- Standard UI shaders are embedded as WGSL strings at the bottom of this
file and compiled once at startup.
Ping-pong output strategy (per Decision 2 in plan):
- Two rgba8unorm storage textures sized to the canvas.
- Each Dispatch reads `prev` (sampled), writes `out` (storage, write-only).
- JS swaps the two between dispatches, so item-order overdraw works.
- At frame end, the current `out` is blitted to the canvas via
copyTextureToTexture (canvas configured as rgba8unorm to match).
This file is loaded as <script type="module">. Top-level await blocks
runtime.js's _start() until adapter + device are resolved, so by the time
main() runs every import here is fully wired.
*/
// ─── env stubs (assigned synchronously, BEFORE any async work) ────────────
// The wasm module's import-link step needs every declared wgpu* import to
// resolve to a Function. If init below throws, the stubs stay in place so
// the wasm still links — and the call site gets a clear error at runtime
// instead of "import object field X is not a Function" at link time.
window.crafter_webbuild_env = window.crafter_webbuild_env || {};
window.crafter_webbuild_env.table = window.crafter_webbuild_env.table
|| new WebAssembly.Table({ initial: 4, element: "anyfunc" });
let initError = null;
function stub(name) {
return (...args) => {
const msg = `[crafter-wgpu] ${name}() called but WebGPU init failed: ${initError?.message ?? "(no error captured)"}`;
console.error(msg);
throw new Error(msg);
};
}
{
const e = window.crafter_webbuild_env;
for (const n of [
"wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
"wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
"wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
2026-05-19 00:27:09 +02:00
"wgpuCreateImage2D", "wgpuWriteImage2D",
"wgpuCreateImage2DArray", "wgpuWriteImage2DLayer",
2026-05-18 04:58:52 +02:00
"wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
"wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
2026-05-18 18:43:30 +02:00
"wgpuLoadCustomShader", "wgpuDispatchCustom",
"wgpuRegisterMeshBLAS", "wgpuLoadRTPipeline", "wgpuDispatchRT", "wgpuBuildTLAS",
2026-05-18 04:58:52 +02:00
]) {
// Read-write ints don't need a stub-throw; return 0 for the size queries.
2026-05-18 18:43:30 +02:00
e[n] = n.endsWith("Width") || n.endsWith("Height")
? () => 0
: (n === "wgpuRegisterMeshBLAS" ? () => 0 : stub(n));
2026-05-18 04:58:52 +02:00
}
}
// ─── canvas + device init (runs before _start) ───────────────────────────
// Wrapped in an async IIFE assigned to window.crafter_webbuild_env_ready so
// the runtime.js shim can `await` it explicitly before calling _start().
// Sibling <script type="module"> top-level awaits are NOT reliably
// serialized in Firefox (verified 2026-05), so we can't depend on this
// file's TLA to block runtime.js by itself.
window.crafter_webbuild_env_ready = (async () => {
try {
if (!navigator.gpu) {
document.body.innerHTML = "<p style=\"font-family:sans-serif;padding:24px\">"
+ "WebGPU not available in this browser. Try Chrome 121+ / Firefox 141+ / Safari 26+.</p>";
initError = new Error("WebGPU unavailable");
throw initError;
}
const canvas = document.createElement("canvas");
canvas.id = "crafter-canvas";
canvas.style.cssText = "position:fixed;inset:0;width:100vw;height:100vh;display:block;";
document.body.style.margin = "0";
document.body.appendChild(canvas);
function syncCanvasSize() {
// Match canvas pixel size to its CSS pixel size 1:1 so MouseEvent
// clientX/clientY (CSS pixels) and the wasm-side window.width/.height
// share the same coordinate space. (HiDPI sharpness is a v2 concern
// — would need DPR on the GPU side AND a scaling step in the C++
// Window/Event glue so layout/hit-testing/dispatch counts stay
// consistent.)
const w = window.innerWidth;
const h = window.innerHeight;
if (canvas.width !== w) canvas.width = w;
if (canvas.height !== h) canvas.height = h;
return { w, h };
}
syncCanvasSize();
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
initError = new Error("navigator.gpu.requestAdapter() returned null (no compatible adapter)");
console.error("[crafter-wgpu]", initError.message);
throw initError;
}
const device = await adapter.requestDevice();
const queue = device.queue;
const ctx = canvas.getContext("webgpu");
const canvasFormat = "rgba8unorm"; // match storage textures, skip swizzle blit
ctx.configure({ device, format: canvasFormat, alphaMode: "opaque",
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_DST });
device.lost.then((info) => {
console.error("[crafter-wgpu] device lost:", info.message);
state.gpuLost = true;
});
// ─── handle tables ─────────────────────────────────────────────────────
const buffers = new Map(); // handle → GPUBuffer
const textures = new Map(); // handle → GPUTexture
const textureViews = new Map(); // handle → GPUTextureView (mirrors textures key for the view)
const samplers = new Map(); // handle → GPUSampler
let nextHandle = 1;
function newHandle() { return nextHandle++; }
// ─── ping-pong storage textures ────────────────────────────────────────
const state = {
pingTex: null, pingView: null,
pongTex: null, pongView: null,
outIsPing: true, // current "out" target
width: 0, height: 0,
encoder: null,
pass: null,
headerRing: null, // GPUBuffer; uniform header writes ring through this
headerRingSize: 0,
headerRingOffset: 0,
bindGroupCache: new Map(), // key → GPUBindGroup
gpuLost: false,
};
function recreatePingPong(w, h) {
const usage = GPUTextureUsage.STORAGE_BINDING
| GPUTextureUsage.TEXTURE_BINDING
| GPUTextureUsage.COPY_SRC
| GPUTextureUsage.COPY_DST; // COPY_DST so we can clear it
if (state.pingTex) state.pingTex.destroy();
if (state.pongTex) state.pongTex.destroy();
state.pingTex = device.createTexture({ size: [w, h], format: "rgba8unorm", usage });
state.pongTex = device.createTexture({ size: [w, h], format: "rgba8unorm", usage });
state.pingView = state.pingTex.createView();
state.pongView = state.pongTex.createView();
state.width = w; state.height = h;
state.outIsPing = true;
state.bindGroupCache.clear();
}
function ensureSized() {
const { w, h } = syncCanvasSize();
if (w !== state.width || h !== state.height) {
recreatePingPong(w, h);
// Notify the wasm side that the surface size changed so it can
// fire onResize through Window. The wasm export is added by
// Crafter.Graphics-Window.cpp.
const onResize = wasmExports && wasmExports.__crafterDom_resize;
if (onResize) onResize(1, w, h);
}
}
// Header ring buffer: 256-byte-aligned slots holding UIDispatchHeader (48
// bytes of meaningful data, padded to 256). Wraps at frame boundary.
const HEADER_ALIGN = 256;
const HEADER_RING_SLOTS = 64;
state.headerRingSize = HEADER_ALIGN * HEADER_RING_SLOTS;
state.headerRing = device.createBuffer({
size: state.headerRingSize,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});
// ─── pipelines ─────────────────────────────────────────────────────────
const wgslShared = String.raw`
struct UIDispatchHeader {
outImage: u32,
itemBuffer: u32,
surfaceW: u32,
surfaceH: u32,
clipX: f32,
clipY: f32,
clipW: f32,
clipH: f32,
itemCount: u32,
frameIdx: u32,
flags: u32,
_pad: u32,
};
@group(0) @binding(0) var<uniform> hdr : UIDispatchHeader;
@group(1) @binding(0) var outTex : texture_storage_2d<rgba8unorm, write>;
@group(1) @binding(1) var prevTex : texture_2d<f32>;
fn uiResolvePixel(coord: vec2<u32>) -> bool {
if (coord.x >= hdr.surfaceW || coord.y >= hdr.surfaceH) { return false; }
let fx = f32(coord.x); let fy = f32(coord.y);
if (fx < hdr.clipX || fy < hdr.clipY) { return false; }
if (fx >= hdr.clipX + hdr.clipW) { return false; }
if (fy >= hdr.clipY + hdr.clipH) { return false; }
return true;
}
fn uiBlendOver(dst: vec4<f32>, src: vec4<f32>) -> vec4<f32> {
let a = clamp(src.a, 0.0, 1.0);
let rgb = mix(dst.rgb, src.rgb, vec3<f32>(a));
let outA = a + dst.a * (1.0 - a);
return vec4<f32>(rgb, outA);
}
fn uiSdRoundRect(p: vec2<f32>, halfSize: vec2<f32>, r4: vec4<f32>) -> f32 {
var r: vec4<f32> = r4;
// Pick radius for the quadrant p is in. r order: (TL, TR, BR, BL).
let rx = select(r.x, r.z, p.x > 0.0);
let ry = select(r.w, r.y, p.x > 0.0);
let radius = select(ry, rx, p.y > 0.0);
let q = abs(p) - halfSize + vec2<f32>(radius);
return min(max(q.x, q.y), 0.0) + length(max(q, vec2<f32>(0.0))) - radius;
}
`;
const wgslQuads = wgslShared + String.raw`
struct QuadItem {
rect: vec4<f32>,
color: vec4<f32>,
corners: vec4<f32>,
outline: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<QuadItem>;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let lo = it.rect.xy;
let hi = it.rect.xy + it.rect.zw;
if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
let halfSize = it.rect.zw * 0.5;
let p = sp - (it.rect.xy + halfSize);
let d = uiSdRoundRect(p, halfSize, it.corners);
let bodyA = clamp(0.5 - d, 0.0, 1.0);
if (bodyA <= 0.0 && it.outline.x <= 0.0) { continue; }
var src = vec4<f32>(it.color.rgb, it.color.a * bodyA);
if (it.outline.x > 0.0) {
let t = abs(d + it.outline.x * 0.5) - it.outline.x * 0.5;
let outlineA = clamp(0.5 - t, 0.0, 1.0);
src = vec4<f32>(mix(src.rgb, it.outline.yzw, vec3<f32>(outlineA)),
max(src.a, outlineA));
}
if (src.a <= 0.0) { continue; }
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
const wgslCircles = wgslShared + String.raw`
struct CircleItem {
centerRadius: vec4<f32>,
color: vec4<f32>,
outline: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<CircleItem>;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let center = it.centerRadius.xy;
let radius = it.centerRadius.z;
let d = length(sp - center) - radius;
let bodyA = clamp(0.5 - d, 0.0, 1.0);
if (bodyA <= 0.0 && it.outline.x <= 0.0) { continue; }
var src = vec4<f32>(it.color.rgb, it.color.a * bodyA);
if (it.outline.x > 0.0) {
let t = abs(d + it.outline.x * 0.5) - it.outline.x * 0.5;
let outlineA = clamp(0.5 - t, 0.0, 1.0);
src = vec4<f32>(mix(src.rgb, it.outline.yzw, vec3<f32>(outlineA)),
max(src.a, outlineA));
}
if (src.a <= 0.0) { continue; }
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
const wgslImages = wgslShared + String.raw`
struct ImageItem {
rect: vec4<f32>,
uv: vec4<f32>,
tint: vec4<f32>,
slots: vec4<u32>,
};
@group(2) @binding(0) var<storage, read> items : array<ImageItem>;
@group(3) @binding(0) var imgTex : texture_2d<f32>;
@group(3) @binding(1) var imgSampler : sampler;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let lo = it.rect.xy;
let hi = it.rect.xy + it.rect.zw;
if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
let t = (sp - lo) / it.rect.zw;
let uv = vec2<f32>(mix(it.uv.x, it.uv.z, t.x), mix(it.uv.y, it.uv.w, t.y));
let sample = textureSampleLevel(imgTex, imgSampler, uv, 0.0);
let src = sample * it.tint;
if (src.a <= 0.0) { continue; }
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
const wgslText = wgslShared + String.raw`
struct GlyphItem {
rect: vec4<f32>,
uv: vec4<f32>,
color: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<GlyphItem>;
@group(3) @binding(0) var atlasTex : texture_2d<f32>;
@group(3) @binding(1) var atlasSampler : sampler;
@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (!uiResolvePixel(gid.xy)) { return; }
let coord = vec2<i32>(i32(gid.x), i32(gid.y));
var dst = textureLoad(prevTex, coord, 0);
let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
let it = items[i];
let lo = it.rect.xy;
let hi = it.rect.xy + it.rect.zw;
if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
let t = (sp - lo) / it.rect.zw;
let uv = vec2<f32>(mix(it.uv.x, it.uv.z, t.x), mix(it.uv.y, it.uv.w, t.y));
// stb_truetype SDF: pixel value ~128 is the edge. Treat alpha as
// the smoothed step around that midpoint.
let sdf = textureSampleLevel(atlasTex, atlasSampler, uv, 0.0).r;
let alpha = clamp((sdf - 0.5) * 8.0 + 0.5, 0.0, 1.0);
if (alpha <= 0.0) { continue; }
let src = vec4<f32>(it.color.rgb, it.color.a * alpha);
dst = uiBlendOver(dst, src);
}
textureStore(outTex, coord, dst);
}
`;
function makePipeline(label, wgsl, hasGroup3) {
const mod = device.createShaderModule({ label, code: wgsl });
// Layout: group 0 uniform header, group 1 (out storage + prev sampled),
// group 2 storage items SSBO, optional group 3 (texture + sampler).
const bgl0 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
]});
const bgl1 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, texture: { sampleType: "float", viewDimension: "2d" } },
]});
const bgl2 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
]});
const layouts = [bgl0, bgl1, bgl2];
let bgl3 = null;
if (hasGroup3) {
bgl3 = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, texture: { sampleType: "float", viewDimension: "2d" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, sampler: { type: "filtering" } },
]});
layouts.push(bgl3);
}
const pl = device.createPipelineLayout({ bindGroupLayouts: layouts });
const pipeline = device.createComputePipeline({
layout: pl,
compute: { module: mod, entryPoint: "main" },
});
return { pipeline, bgl0, bgl1, bgl2, bgl3 };
}
const pipeQuads = makePipeline("ui-quads", wgslQuads, false);
const pipeCircles = makePipeline("ui-circles", wgslCircles, false);
const pipeImages = makePipeline("ui-images", wgslImages, true);
const pipeText = makePipeline("ui-text", wgslText, true);
// Bind groups for group 0 (header uniform with dynamic offset) — one per
// pipeline, references the same ring buffer.
function makeHdrBG(pipe) {
return device.createBindGroup({
layout: pipe.bgl0,
entries: [{ binding: 0, resource: { buffer: state.headerRing, offset: 0, size: 48 } }],
});
}
const hdrBG = {
quads: makeHdrBG(pipeQuads),
circles: makeHdrBG(pipeCircles),
images: makeHdrBG(pipeImages),
text: makeHdrBG(pipeText),
};
// Group 1 changes between dispatches because `out` and `prev` swap on the
2026-05-18 05:39:17 +02:00
// ping-pong. Cached by current ping-pong direction and texture size; the
// stored bind group is reusable across all pipelines that share a
// layout-compatible bgl1 (all standard pipelines and custom shaders do,
// since they declare identical group-1 entries per the contract).
function getGroup1BG(bgl1) {
const key = `g1/${state.outIsPing ? 1 : 0}/${state.width}x${state.height}`;
2026-05-18 04:58:52 +02:00
let bg = state.bindGroupCache.get(key);
if (bg) return bg;
const outView = state.outIsPing ? state.pingView : state.pongView;
const prevView = state.outIsPing ? state.pongView : state.pingView;
bg = device.createBindGroup({
2026-05-18 05:39:17 +02:00
layout: bgl1,
2026-05-18 04:58:52 +02:00
entries: [
{ binding: 0, resource: outView },
{ binding: 1, resource: prevView },
],
});
state.bindGroupCache.set(key, bg);
return bg;
}
function getGroup2BG(pipe, itemsHandle) {
const key = `items/${pipe === pipeQuads ? "q" : pipe === pipeCircles ? "c" : pipe === pipeImages ? "i" : "t"}/${itemsHandle}`;
let bg = state.bindGroupCache.get(key);
if (bg) return bg;
const buf = buffers.get(itemsHandle);
if (!buf) throw new Error(`getGroup2BG: unknown items buffer ${itemsHandle}`);
bg = device.createBindGroup({
layout: pipe.bgl2,
entries: [{ binding: 0, resource: { buffer: buf } }],
});
state.bindGroupCache.set(key, bg);
return bg;
}
function getGroup3BG(pipe, texHandle, sampHandle) {
const key = `t3/${texHandle}/${sampHandle}/${pipe === pipeImages ? "i" : "x"}`;
let bg = state.bindGroupCache.get(key);
if (bg) return bg;
const tex = textureViews.get(texHandle);
const sam = samplers.get(sampHandle);
if (!tex || !sam) throw new Error(`getGroup3BG: unknown view ${texHandle} / sampler ${sampHandle}`);
bg = device.createBindGroup({
layout: pipe.bgl3,
entries: [
{ binding: 0, resource: tex },
{ binding: 1, resource: sam },
],
});
state.bindGroupCache.set(key, bg);
return bg;
}
// ─── wasm import surface ───────────────────────────────────────────────
let wasmExports = null;
// Crafter.Build's runtime.js exposes the wasi instance on
// window.crafter_wasi after instantiation. We grab the exports lazily so
// every import-side function works regardless of call order. memU8 /
// memF32 / memU32 always re-derive the typed-array view because the
// wasm memory's backing ArrayBuffer is detached and replaced whenever
// the wasm grows its memory; caching a typed array would alias to
// freed memory after a grow.
function getExports() {
if (wasmExports) return wasmExports;
const wasi = window.crafter_wasi;
if (!wasi || !wasi.instance) {
throw new Error("[crafter-wgpu] wasm exports not available yet (called too early)");
}
wasmExports = wasi.instance.exports;
return wasmExports;
}
function memU8() { return new Uint8Array(getExports().memory.buffer); }
function memF32() { return new Float32Array(getExports().memory.buffer); }
function memU32() { return new Uint32Array(getExports().memory.buffer); }
// Stubs were assigned at the top of this file; we now overwrite them with
// real implementations now that init has succeeded.
const env = window.crafter_webbuild_env;
env.wgpuGetCanvasWidth = () => canvas.width;
env.wgpuGetCanvasHeight = () => canvas.height;
env.wgpuCreateBuffer = (byteSize) => {
const h = newHandle();
const buf = device.createBuffer({
size: Math.max(16, byteSize),
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
buffers.set(h, buf);
return h;
};
env.wgpuWriteBuffer = (handle, srcPtr, byteSize) => {
state.writeBufferCount = (state.writeBufferCount || 0) + 1;
state.lastWriteHandle = handle;
state.lastWriteSize = byteSize;
const buf = buffers.get(handle);
if (!buf) return;
// writeBuffer requires a multiple of 4 bytes and an aligned offset.
const aligned = (byteSize + 3) & ~3;
queue.writeBuffer(buf, 0, memU8().buffer, srcPtr, aligned);
};
env.wgpuDestroyBuffer = (handle) => {
const buf = buffers.get(handle);
if (buf) { buf.destroy(); buffers.delete(handle); }
// Invalidate any cached bind group that referenced this handle.
for (const k of state.bindGroupCache.keys()) {
if (k.startsWith("items/") && k.endsWith("/" + handle)) {
state.bindGroupCache.delete(k);
}
}
};
env.wgpuCreateAtlasTexture = (w, h) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h],
format: "r8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView());
return handle;
};
env.wgpuWriteAtlasRegion = (handle, srcPtr, srcW, srcH, srcBytesPerRow, dstX, dstY, copyW, copyH) => {
const tex = textures.get(handle);
if (!tex) return;
// For r8unorm, 1 byte per pixel; writeTexture requires bytesPerRow >= 256
// OR == width if width*1 % 256 === 0 — for arbitrary widths we need to
// re-pack into a 256-aligned staging buffer.
const alignedBPR = Math.max(256, (srcBytesPerRow + 255) & ~255);
if (alignedBPR === srcBytesPerRow) {
const bytes = memU8().subarray(srcPtr + dstY * srcBytesPerRow + dstX,
srcPtr + (dstY + copyH) * srcBytesPerRow);
queue.writeTexture(
{ texture: tex, origin: { x: dstX, y: dstY } },
bytes,
{ bytesPerRow: srcBytesPerRow, rowsPerImage: copyH },
{ width: copyW, height: copyH }
);
} else {
// Repack copyW × copyH starting at (dstX, dstY) in the source.
const staging = new Uint8Array(alignedBPR * copyH);
const src = memU8();
for (let y = 0; y < copyH; y++) {
const srcRow = (dstY + y) * srcBytesPerRow + dstX;
staging.set(src.subarray(srcPtr + srcRow, srcPtr + srcRow + copyW),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex, origin: { x: dstX, y: dstY } },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: copyH },
{ width: copyW, height: copyH }
);
}
};
env.wgpuDestroyTexture = (handle) => {
const tex = textures.get(handle);
if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
};
2026-05-19 00:27:09 +02:00
// General-purpose 2D rgba8unorm texture, used by Image2D<RGBA8>. Distinct
// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot
// upload of a whole image, sized to the pixel data the caller hands over.
env.wgpuCreateImage2D = (w, h) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h],
format: "rgba8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView());
return handle;
};
// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by
// Image2DArray<RGBA8> to back one material albedo per layer; shaders
// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`.
env.wgpuCreateImage2DArray = (w, h, layerCount) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h, layerCount],
dimension: "2d",
format: "rgba8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView({
dimension: "2d-array",
arrayLayerCount: layerCount,
}));
return handle;
};
env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => {
const tex = textures.get(handle);
if (!tex) return;
const srcBPR = w * 4;
const alignedBPR = (srcBPR + 255) & ~255;
if (alignedBPR === srcBPR) {
queue.writeTexture(
{ texture: tex, origin: [0, 0, layer] },
memU8().subarray(srcPtr, srcPtr + byteSize),
{ bytesPerRow: srcBPR, rowsPerImage: h },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
} else {
const staging = new Uint8Array(alignedBPR * h);
const src = memU8();
for (let y = 0; y < h; y++) {
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex, origin: [0, 0, layer] },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: h },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
}
};
env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => {
const tex = textures.get(handle);
if (!tex) return;
// queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp
// when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4.
const srcBPR = w * 4;
const alignedBPR = (srcBPR + 255) & ~255;
if (alignedBPR === srcBPR) {
// Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64).
queue.writeTexture(
{ texture: tex },
memU8().subarray(srcPtr, srcPtr + byteSize),
{ bytesPerRow: srcBPR, rowsPerImage: h },
{ width: w, height: h }
);
} else {
// Repack into a 256-aligned staging buffer. One alloc per Update,
// freed when the function returns — fine for asset-load time use.
const staging = new Uint8Array(alignedBPR * h);
const src = memU8();
for (let y = 0; y < h; y++) {
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: h },
{ width: w, height: h }
);
}
};
2026-05-18 04:58:52 +02:00
env.wgpuCreateLinearClampSampler = () => {
const handle = newHandle();
samplers.set(handle, device.createSampler({
magFilter: "linear", minFilter: "linear",
addressModeU: "clamp-to-edge", addressModeV: "clamp-to-edge",
}));
return handle;
};
// ─── per-frame ──────────────────────────────────────────────────────────
env.wgpuFrameBegin = () => {
state.frameBeginCount = (state.frameBeginCount || 0) + 1;
if (state.gpuLost) return;
ensureSized();
state.encoder = device.createCommandEncoder();
state.outIsPing = true; // reset so each frame starts on the same target
state.headerRingOffset = 0;
// DON'T clearBuffer the header ring here. queue.writeBuffer ops from
// writeHeader() are enqueued BEFORE this command buffer's submit,
// so an encoded clearBuffer would wipe them — the dispatches would
// then read all-zero headers and uiResolvePixel would reject every
// pixel (surfaceW=0).
clearStorageTexture(state.encoder, state.outIsPing ? state.pingTex : state.pongTex,
state.width, state.height);
state.pass = state.encoder.beginComputePass();
};
let zeroBuffer = null;
let zeroBufferSize = 0;
function clearStorageTexture(encoder, tex, w, h) {
const bpr = (w * 4 + 255) & ~255;
const need = bpr * h;
if (!zeroBuffer || zeroBufferSize < need) {
if (zeroBuffer) zeroBuffer.destroy();
zeroBuffer = device.createBuffer({ size: need, usage: GPUBufferUsage.COPY_SRC, mappedAtCreation: true });
new Uint8Array(zeroBuffer.getMappedRange()).fill(0);
zeroBuffer.unmap();
zeroBufferSize = need;
}
encoder.copyBufferToTexture(
{ buffer: zeroBuffer, bytesPerRow: bpr, rowsPerImage: h },
{ texture: tex },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
}
env.wgpuFrameEnd = () => {
state.frameEndCount = (state.frameEndCount || 0) + 1;
if (state.gpuLost || !state.encoder) return;
state.pass.end();
state.pass = null;
// Blit last-written ping-pong texture → canvas. After N dispatches,
// state.outIsPing points at the NEXT write target, so the latest
// content lives in the OPPOSITE texture.
const finalTex = state.outIsPing ? state.pongTex : state.pingTex;
const canvasTex = ctx.getCurrentTexture();
state.encoder.copyTextureToTexture(
{ texture: finalTex },
{ texture: canvasTex },
{ width: state.width, height: state.height, depthOrArrayLayers: 1 }
);
queue.submit([state.encoder.finish()]);
state.encoder = null;
};
// Write a 48-byte UIDispatchHeader into the ring buffer at the current
// offset (which is incremented and 256-aligned). Returns the dynamic
// offset to pass to setBindGroup.
function writeHeader(headerPtr) {
const offset = state.headerRingOffset;
if (offset + HEADER_ALIGN > state.headerRingSize) {
// Ring is small enough that overrun in one frame means too many
// dispatches. Soft-wrap; correctness already requires the ring
// be large enough.
state.headerRingOffset = 0;
}
queue.writeBuffer(state.headerRing, state.headerRingOffset,
memU8().buffer, headerPtr, 48);
state.headerRingOffset += HEADER_ALIGN;
return offset;
}
function dispatchStandard(pipe, hdrBindGroup, headerPtr, gx, gy, itemsHandle, group3) {
if (!state.pass) return;
const off = writeHeader(headerPtr);
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, hdrBindGroup, [off]);
2026-05-18 05:39:17 +02:00
state.pass.setBindGroup(1, getGroup1BG(pipe.bgl1));
2026-05-18 04:58:52 +02:00
state.pass.setBindGroup(2, getGroup2BG(pipe, itemsHandle));
if (group3) state.pass.setBindGroup(3, group3);
state.pass.dispatchWorkgroups(gx, gy, 1);
// Flip ping-pong: the texture we just wrote becomes next dispatch's prev.
state.outIsPing = !state.outIsPing;
}
env.wgpuDispatchQuads = (itemsHandle, headerPtr, gx, gy) => {
state.dispatchQuadsCount = (state.dispatchQuadsCount || 0) + 1;
dispatchStandard(pipeQuads, hdrBG.quads, headerPtr, gx, gy, itemsHandle, null);
};
env.wgpuDispatchCircles = (itemsHandle, headerPtr, gx, gy) => {
dispatchStandard(pipeCircles, hdrBG.circles, headerPtr, gx, gy, itemsHandle, null);
};
env.wgpuDispatchImages = (itemsHandle, headerPtr, gx, gy, texHandle, sampHandle) => {
const g3 = getGroup3BG(pipeImages, texHandle, sampHandle);
dispatchStandard(pipeImages, hdrBG.images, headerPtr, gx, gy, itemsHandle, g3);
};
env.wgpuDispatchText = (itemsHandle, headerPtr, gx, gy, atlasHandle, sampHandle) => {
const g3 = getGroup3BG(pipeText, atlasHandle, sampHandle);
dispatchStandard(pipeText, hdrBG.text, headerPtr, gx, gy, itemsHandle, g3);
};
2026-05-18 05:39:17 +02:00
// ─── custom user-authored shaders ─────────────────────────────────────
//
// Bind-group contract (mirrors :WebGPUComputeShader.cppm):
// group 0 binding 0 — uniform UIDispatchHeader (dynamic offset, 48b)
// group 1 binding 0 — texture_storage_2d<rgba8unorm, write> out
// group 1 binding 1 — texture_2d<f32> prev
// group 2+ — user-declared (UICustomBinding entries)
//
// Each UICustomBinding entry on the wasm side is 8 bytes:
// u8 group, u8 binding, u8 kind, u8 pad, u32 pushOffset
// kind: 0 = read-only-storage SSBO, 1 = sampled tex 2d, 2 = filtering sampler.
const customPipelines = new Map(); // handle → { pipeline, bgls, hdrBG, byGroup }
2026-05-18 18:43:30 +02:00
env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQueryFlag) => {
if (!rtState.vertHeap && rayQueryFlag) rtInit();
const userWgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
// For rayQuery-capable shaders, prepend the RT prelude + ray-query
// library. The user shader can declare its own group 0 / 2+ bindings
// but MUST NOT redeclare group(1) — that's reserved for RT data.
const wgsl = rayQueryFlag
? (rtWgslTypes + rtWgslMegakernelBindings + rtWgslRayQueryLib + "\n" + userWgsl)
: userWgsl;
2026-05-18 05:39:17 +02:00
const bindings = [];
const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
for (let i = 0; i < bindingsCount; i++) {
bindings.push({
group: dv.getUint8(i*8 + 0),
binding: dv.getUint8(i*8 + 1),
kind: dv.getUint8(i*8 + 2),
pushOffset: dv.getUint32(i*8 + 4, true),
});
}
// Group bindings by @group(N) for layout creation.
const byGroup = new Map();
for (const b of bindings) {
if (b.group < 2) {
console.error(`[crafter-wgpu] custom shader: @group(${b.group}) reserved; use groups >= 2`);
return 0;
}
if (!byGroup.has(b.group)) byGroup.set(b.group, []);
byGroup.get(b.group).push(b);
}
2026-05-18 18:43:30 +02:00
// Group 0 = header uniform (same for both paths).
// Group 1 = ping-pong out+prev OR RT data (TLAS, BVH, meshRecs, verts,
// idx, primRemap, outImage) when rayQuery flag is on.
2026-05-18 05:39:17 +02:00
const bgls = [
device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
]}),
2026-05-18 18:43:30 +02:00
rayQueryFlag
? device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
2026-05-19 00:27:09 +02:00
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
2026-05-18 18:43:30 +02:00
]})
: device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE,
texture: { sampleType: "float", viewDimension: "2d" } },
]}),
2026-05-18 05:39:17 +02:00
];
// Sorted custom groups. Pad any gaps with empty bgls (WebGPU pipeline
// layouts require a contiguous array of GPUBindGroupLayout per group
// index up to the highest used).
const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
for (let g = 2; g <= highest; g++) {
if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
2026-05-19 00:27:09 +02:00
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
2026-05-18 05:39:17 +02:00
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" };
2026-05-19 00:27:09 +02:00
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
2026-05-18 05:39:17 +02:00
return e;
});
bgls.push(device.createBindGroupLayout({ entries }));
} else {
bgls.push(device.createBindGroupLayout({ entries: [] }));
}
}
let pipeline;
try {
const mod = device.createShaderModule({ code: wgsl });
const layout = device.createPipelineLayout({ bindGroupLayouts: bgls });
pipeline = device.createComputePipeline({ layout, compute: { module: mod, entryPoint: "main" } });
} catch (e) {
console.error("[crafter-wgpu] custom shader compile failed:", e);
2026-05-18 18:43:30 +02:00
if (rayQueryFlag) console.error("[crafter-wgpu] WGSL was:\n", wgsl);
2026-05-18 05:39:17 +02:00
return 0;
}
const hdrBG = device.createBindGroup({
layout: bgls[0],
entries: [{ binding: 0, resource: { buffer: state.headerRing, offset: 0, size: 48 } }],
});
const handle = newHandle();
2026-05-18 18:43:30 +02:00
customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups, rayQueryCapable: !!rayQueryFlag });
2026-05-18 05:39:17 +02:00
return handle;
};
env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handlesCount,
gx, gy, gz) => {
state.dispatchCustomCount = (state.dispatchCustomCount || 0) + 1;
if (!state.pass) return;
const pipe = customPipelines.get(pipelineHandle);
if (!pipe) {
console.error("[crafter-wgpu] wgpuDispatchCustom: unknown pipeline", pipelineHandle);
return;
}
// Write header (first 48 bytes of push).
const off = writeHeader(pushPtr);
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, pipe.hdrBG, [off]);
2026-05-18 18:43:30 +02:00
// Group 1: rayQuery-capable shaders get the RT data heaps + the most
// recently built TLAS; everyone else gets the standard ping-pong pair.
if (pipe.rayQueryCapable) {
const tlasBuf = buffers.get(rtState.currentTlas);
if (!tlasBuf) {
console.error("[crafter-wgpu] rayQuery dispatch but no TLAS built yet");
return;
}
const outView = state.outIsPing ? state.pingView : state.pongView;
const rtBG = device.createBindGroup({
layout: pipe.bgls[1],
entries: [
{ binding: 0, resource: { buffer: tlasBuf } },
{ binding: 1, resource: { buffer: rtState.bvhHeap.gpu } },
{ binding: 2, resource: { buffer: rtState.meshRecordsBuffer } },
{ binding: 3, resource: { buffer: rtState.vertHeap.gpu } },
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView },
2026-05-19 00:27:09 +02:00
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
2026-05-18 18:43:30 +02:00
],
});
state.pass.setBindGroup(1, rtBG);
} else {
state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1]));
}
2026-05-18 05:39:17 +02:00
// Walk bindings in declaration order and assemble bind groups.
// handles[] from wasm is in the SAME order as customBindings, so we
// pick up indices by walking byGroup in the same sorted order.
const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
let handleIdx = 0;
for (const g of pipe.sortedGroups) {
const entries = pipe.byGroup.get(g).map(b => {
const h = handles[handleIdx++];
let resource;
if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h);
2026-05-19 00:27:09 +02:00
else if (b.kind === 3) resource = textureViews.get(h);
2026-05-18 05:39:17 +02:00
return { binding: b.binding, resource };
});
const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
state.pass.setBindGroup(g, bg);
}
state.pass.dispatchWorkgroups(gx, gy, gz);
state.outIsPing = !state.outIsPing;
};
2026-05-18 04:58:52 +02:00
// Debug accessor for browser-console diagnostics.
window.crafter_wgpu_state = state;
window.crafter_wgpu_device = device;
window.crafter_wgpu_canvasCtx = ctx;
window.crafter_wgpu_debug = () => ({
width: state.width, height: state.height,
outIsPing: state.outIsPing,
encoderActive: !!state.encoder,
passActive: !!state.pass,
bgCacheSize: state.bindGroupCache.size,
bufferHandles: buffers.size,
textureHandles: textures.size,
samplerHandles: samplers.size,
headerRingOffset: state.headerRingOffset,
frameBeginCount: state.frameBeginCount || 0,
frameEndCount: state.frameEndCount || 0,
dispatchQuadsCount: state.dispatchQuadsCount || 0,
writeBufferCount: state.writeBufferCount || 0,
lastWriteHandle: state.lastWriteHandle,
lastWriteSize: state.lastWriteSize,
});
2026-05-18 05:39:17 +02:00
window.crafter_wgpu_bufferKeys = () => [...buffers.keys()];
2026-05-18 04:58:52 +02:00
// Read back the first QuadItem from a registered buffer to verify the
// GPU sees what the CPU wrote.
window.crafter_wgpu_readBuffer = async (handle, byteSize = 64) => {
const buf = buffers.get(handle);
if (!buf) return "no buffer for handle " + handle;
const read = device.createBuffer({ size: 256, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST });
const enc = device.createCommandEncoder();
enc.copyBufferToBuffer(buf, 0, read, 0, byteSize);
device.queue.submit([enc.finish()]);
await read.mapAsync(GPUMapMode.READ);
const data = new Float32Array(read.getMappedRange().slice(0, byteSize));
read.unmap();
return [...data];
};
// Surface size getters (the wasm side may query these on Resize events).
env.wgpuSurfaceWidth = () => state.width || canvas.width;
env.wgpuSurfaceHeight = () => state.height || canvas.height;
// One-shot init: forces ping-pong allocation at current canvas size so
// any Buffer/Texture creation before the first frame works against a
// concrete size. Called by Crafter::Device::Initialize on the wasm side.
env.wgpuInit = () => {
const { w, h } = syncCanvasSize();
recreatePingPong(w, h);
};
// Resize listener — wires up to the same `resize` event dom-env.js
// listens to. We trigger sizing on next frame begin; no work here.
window.addEventListener("resize", () => { /* ensureSized in wgpuFrameBegin */ });
2026-05-18 18:43:30 +02:00
// ─────────────────────────────────────────────────────────────────────
// ── Software raytracing subsystem ────────────────────────────────────
// ─────────────────────────────────────────────────────────────────────
//
// WebGPU has no hardware RT. The library emulates DXR/VKRT semantics in
// compute: a megakernel raygen pipeline traverses a CPU-built BLAS BVH +
// GPU-built TLAS, dispatches user closesthit / anyhit / miss via a
// generated `switch`. The same traversal kernel is also exposed as a
// rayQuery* function set for regular compute shaders (see the
// rayQueryCapable path under wgpuLoadCustomShader).
//
// The four mesh data streams live in *shared* GPU heaps; each Mesh::Build
// appends to them and gets a u32 handle back. The handle is what the
// application stores in RTInstance::accelerationStructureReference.
// ── WGSL library: shared types + constants (no bindings) ─────────────
// Used by both the megakernel pipeline (which adds group(0..1) bindings)
// and the TLAS-build pipeline (which only uses group(2)). Keeping bindings
// out of the shared block avoids inflating storage-buffer count past the
// 8-per-stage baseline limit on pipelines that don't actually use them.
const rtWgslTypes = String.raw`
struct RTDispatchHeader {
surfaceW: u32,
surfaceH: u32,
instanceCount: u32,
flags: u32,
};
struct RayDesc {
origin: vec3<f32>,
tMin: f32,
direction: vec3<f32>,
tMax: f32,
};
struct HitInfo {
t: f32,
instanceId: u32,
primitiveId: u32,
hitGroupIndex: u32,
attribs: vec2<f32>,
objectRayOrigin: vec3<f32>,
objectRayDirection: vec3<f32>,
objectToWorldR0: vec4<f32>,
objectToWorldR1: vec4<f32>,
objectToWorldR2: vec4<f32>,
customIndex: u32,
};
// Matches Crafter::BVHNode in interfaces/Crafter.Graphics-Mesh.cppm.
struct BVHNode {
aabbMin: vec3<f32>,
firstChildOrPrim: u32,
aabbMax: vec3<f32>,
primCount: u32,
};
// Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
2026-05-19 00:27:09 +02:00
// attribsOffset is the per-mesh base index (in u32 words) into the
// vertexAttribs heap; meshes registered without per-vertex attribs leave
// it 0 (the heap entries at that range are also 0 / never touched). The
// per-vertex stride lives in the user's WGSL — the library doesn't store
// it because the layout is example-defined (Sponza uses 8 u32 / vertex
// for VertexNormalTangentUVPacked).
2026-05-18 18:43:30 +02:00
struct MeshRecord {
rootAabbMin: vec3<f32>,
vertexOffset: u32,
rootAabbMax: vec3<f32>,
indexOffset: u32,
bvhOffset: u32,
primRemapOffset: u32,
triangleCount: u32,
2026-05-19 00:27:09 +02:00
attribsOffset: u32,
2026-05-18 18:43:30 +02:00
};
// Per-instance TLAS record built by the TLAS-build compute pass.
struct TLASEntry {
aabbMin: vec3<f32>,
maskHGOffset: u32,
aabbMax: vec3<f32>,
blasMeshIdx: u32,
objectToWorldR0: vec4<f32>,
objectToWorldR1: vec4<f32>,
objectToWorldR2: vec4<f32>,
worldToObjectR0: vec4<f32>,
worldToObjectR1: vec4<f32>,
worldToObjectR2: vec4<f32>,
customIndex: u32,
instanceFlags: u32,
_pad0: u32,
_pad1: u32,
};
// ── Ray flag mirror of VkGeometryInstanceFlagBitsKHR + DXR ray flags ──
const RT_FLAG_OPAQUE: u32 = 0x1u;
const RT_FLAG_NO_OPAQUE: u32 = 0x2u;
const RT_FLAG_TERMINATE_ON_FIRST_HIT: u32 = 0x4u;
const RT_FLAG_SKIP_CLOSEST_HIT: u32 = 0x8u;
const RT_FLAG_CULL_BACK_FACING_TRIANGLES: u32 = 0x10u;
const RT_FLAG_CULL_FRONT_FACING_TRIANGLES: u32 = 0x20u;
const RT_FLAG_CULL_OPAQUE: u32 = 0x40u;
const RT_FLAG_CULL_NO_OPAQUE: u32 = 0x80u;
const RT_FLAG_SKIP_TRIANGLES: u32 = 0x100u;
const RT_FLAG_SKIP_AABBS: u32 = 0x200u;
const RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE: u32 = 0x1u;
const RT_INSTANCE_TRIANGLE_FLIP_FACING: u32 = 0x2u;
const RT_INSTANCE_FORCE_OPAQUE: u32 = 0x4u;
const RT_INSTANCE_FORCE_NO_OPAQUE: u32 = 0x8u;
const RT_ANYHIT_ACCEPT: u32 = 0u;
const RT_ANYHIT_IGNORE: u32 = 1u;
const RT_ANYHIT_END_SEARCH: u32 = 2u;
const RT_INTERSECTION_NONE: u32 = 0u;
const RT_INTERSECTION_TRIANGLE: u32 = 1u;
`;
// Megakernel-only bindings. Concatenated after rtWgslTypes for the
// raygen pipeline; the TLAS-build pipeline omits these because it doesn't
// touch them — declaring them would push it past 8 storage buffers per
// stage on the WebGPU baseline.
const rtWgslMegakernelBindings = String.raw`
@group(0) @binding(0) var<uniform> hdr : RTDispatchHeader;
@group(1) @binding(0) var<storage,read> tlasEntries : array<TLASEntry>;
@group(1) @binding(1) var<storage,read> bvhNodes : array<BVHNode>;
@group(1) @binding(2) var<storage,read> meshRecords : array<MeshRecord>;
@group(1) @binding(3) var<storage,read> vertices : array<f32>;
@group(1) @binding(4) var<storage,read> indices : array<u32>;
@group(1) @binding(5) var<storage,read> primRemap : array<u32>;
@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
2026-05-19 00:27:09 +02:00
@group(1) @binding(7) var<storage,read> vertexAttribs : array<u32>;
2026-05-18 18:43:30 +02:00
`;
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
// Injected after the user-supplied closesthit/anyhit/miss sources +
// mega-switch dispatchers (which PipelineRTWebGPU emits). User raygen
// sources sit after this block so they can call traceRay.
const rtWgslHelpers = String.raw`
fn _rtFetchTri(meshRec: MeshRecord, triIndex: u32) -> array<vec3<f32>, 3> {
let baseIdx = meshRec.indexOffset + triIndex * 3u;
let i0 = indices[baseIdx + 0u];
let i1 = indices[baseIdx + 1u];
let i2 = indices[baseIdx + 2u];
let baseV = meshRec.vertexOffset;
let v0i = (baseV + i0) * 3u;
let v1i = (baseV + i1) * 3u;
let v2i = (baseV + i2) * 3u;
return array<vec3<f32>, 3>(
vec3<f32>(vertices[v0i + 0u], vertices[v0i + 1u], vertices[v0i + 2u]),
vec3<f32>(vertices[v1i + 0u], vertices[v1i + 1u], vertices[v1i + 2u]),
vec3<f32>(vertices[v2i + 0u], vertices[v2i + 1u], vertices[v2i + 2u]),
);
}
fn _rtAabb(ro: vec3<f32>, invRd: vec3<f32>, mn: vec3<f32>, mx: vec3<f32>, tMax: f32) -> bool {
let t0 = (mn - ro) * invRd;
let t1 = (mx - ro) * invRd;
let tmin = min(t0, t1);
let tmax = max(t0, t1);
let tEnter = max(max(tmin.x, tmin.y), tmin.z);
let tExit = min(min(tmax.x, tmax.y), tmax.z);
return tExit >= max(tEnter, 0.0) && tEnter <= tMax;
}
struct _RtTriHit { hit: bool, t: f32, u: f32, v: f32 };
fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>,
tMin: f32, tMax: f32) -> _RtTriHit {
var r: _RtTriHit;
r.hit = false;
let e1 = p1 - p0;
let e2 = p2 - p0;
let h = cross(rd, e2);
let a = dot(e1, h);
if (abs(a) < 1e-8) { return r; }
let f = 1.0 / a;
let s = ro - p0;
let u = f * dot(s, h);
if (u < 0.0 || u > 1.0) { return r; }
let q = cross(s, e1);
let v = f * dot(rd, q);
if (v < 0.0 || u + v > 1.0) { return r; }
let t = f * dot(e2, q);
if (t < tMin || t > tMax) { return r; }
r.hit = true; r.t = t; r.u = u; r.v = v;
return r;
}
// Iterative stack-based BLAS traversal. Returns true if traversal was
// terminated by an END_SEARCH from anyhit (caller should stop entirely).
fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
instanceId: u32, hitGroupBase: u32,
bestHit: ptr<function, HitInfo>,
bestT: ptr<function, f32>,
payload: ptr<function, Payload>) -> bool {
let invD = vec3<f32>(1.0) / rayObj.direction;
var stack: array<u32, 32>;
var sp: u32 = 0u;
var nodeRel: u32 = 0u;
loop {
let abs = meshRec.bvhOffset + nodeRel;
let node = bvhNodes[abs];
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
if (node.primCount > 0u) {
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
let verts = _rtFetchTri(meshRec, triIndex);
let tr = _rtTri(rayObj.origin, rayObj.direction,
verts[0], verts[1], verts[2],
rayObj.tMin, *bestT);
if (!tr.hit) { continue; }
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
let facing = dot(geomNormal, rayObj.direction);
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
var candidate: HitInfo;
candidate.t = tr.t;
candidate.instanceId = instanceId;
candidate.primitiveId = triIndex;
candidate.hitGroupIndex = hitGroupBase;
candidate.attribs = vec2<f32>(tr.u, tr.v);
candidate.objectRayOrigin = rayObj.origin;
candidate.objectRayDirection = rayObj.direction;
let opaque = (flags & RT_FLAG_OPAQUE) != 0u
|| (flags & RT_FLAG_NO_OPAQUE) == 0u; // default opaque
if (opaque) {
*bestHit = candidate;
*bestT = tr.t;
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
} else {
let r = runAnyHit(hitGroupBase, rayObj, candidate, payload);
if (r == RT_ANYHIT_END_SEARCH) {
*bestHit = candidate;
*bestT = tr.t;
return true;
}
if (r == RT_ANYHIT_ACCEPT) {
*bestHit = candidate;
*bestT = tr.t;
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
}
}
}
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
// inner node — push right, descend left
let left = node.firstChildOrPrim;
let right = left + 1u;
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
nodeRel = left;
}
return false;
}
fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
sbtRecordOffset: u32, sbtRecordStride: u32,
bestHit: ptr<function, HitInfo>,
bestT: ptr<function, f32>,
payload: ptr<function, Payload>) -> bool {
let invD = vec3<f32>(1.0) / rayWorld.direction;
let n = hdr.instanceCount;
for (var i: u32 = 0u; i < n; i = i + 1u) {
let inst = tlasEntries[i];
let instanceMask = inst.maskHGOffset & 0xFFu;
if ((instanceMask & cullMask) == 0u) { continue; }
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; }
// Transform ray to object space.
let r0 = inst.worldToObjectR0;
let r1 = inst.worldToObjectR1;
let r2 = inst.worldToObjectR2;
var rayObj: RayDesc;
rayObj.origin = vec3<f32>(
dot(r0.xyz, rayWorld.origin) + r0.w,
dot(r1.xyz, rayWorld.origin) + r1.w,
dot(r2.xyz, rayWorld.origin) + r2.w,
);
rayObj.direction = vec3<f32>(
dot(r0.xyz, rayWorld.direction),
dot(r1.xyz, rayWorld.direction),
dot(r2.xyz, rayWorld.direction),
);
rayObj.tMin = rayWorld.tMin;
rayObj.tMax = *bestT;
var effective = flags;
let iflags = inst.instanceFlags;
if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) {
effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE;
}
if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) {
effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE;
}
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
}
let hitGroupOffset = inst.maskHGOffset >> 8u;
let hitGroupBase = sbtRecordOffset + hitGroupOffset;
let meshRec = meshRecords[inst.blasMeshIdx];
let pre = *bestT;
let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase,
bestHit, bestT, payload);
if (endSearch) { return true; }
if ((*bestT) < pre) {
// record world-space object-to-world for the closest-hit shader
(*bestHit).objectToWorldR0 = inst.objectToWorldR0;
(*bestHit).objectToWorldR1 = inst.objectToWorldR1;
(*bestHit).objectToWorldR2 = inst.objectToWorldR2;
(*bestHit).customIndex = inst.customIndex;
if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
}
}
return false;
}
fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32,
sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32,
rayOrigin: vec3<f32>, rayTMin: f32,
rayDir: vec3<f32>, rayTMax: f32,
payload: ptr<function, Payload>) {
var ray: RayDesc;
ray.origin = rayOrigin;
ray.direction = rayDir;
ray.tMin = rayTMin;
ray.tMax = rayTMax;
var bestHit: HitInfo;
bestHit.t = rayTMax;
var bestT = rayTMax;
let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu,
sbtRecordOffset, sbtRecordStride,
&bestHit, &bestT, payload);
if (bestT < rayTMax) {
if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) {
runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload);
}
} else {
runMiss(missIndex, ray, payload);
}
}
`;
// ── WGSL library: rayQuery API for non-megakernel compute shaders ────
//
// Mirrors GL_EXT_ray_query semantics that 3DForts's physics shaders use
// (projectile-collide, splash, builder-pick). User WGSL:
// var rq: RayQuery;
// rayQueryInitialize(&rq, 0u, flags, mask, origin, tMin, dir, tMax);
// while (rayQueryProceed(&rq)) {} // run traversal to completion
// if (rayQueryGetCommittedIntersectionType(&rq) != RT_INTERSECTION_NONE) {
// let t = rayQueryGetCommittedT(&rq);
// ...
// }
//
// v1 simplification: traversal force-opaques every hit (no anyhit). The
// user can still test for triangle vs miss and read t/instance/bary.
// Anyhit-style candidate inspection is a future extension.
const rtWgslRayQueryLib = String.raw`
struct RayQuery {
ray: RayDesc,
flags: u32,
cullMask: u32,
committedType: u32,
committedT: f32,
committedInstanceId: u32,
committedInstanceCustomIndex: u32,
committedPrimitiveIndex: u32,
committedBarycentrics: vec2<f32>,
committedObjectRayOrigin: vec3<f32>,
committedObjectRayDirection: vec3<f32>,
committedObjectToWorldR0: vec4<f32>,
committedObjectToWorldR1: vec4<f32>,
committedObjectToWorldR2: vec4<f32>,
committedWorldToObjectR0: vec4<f32>,
committedWorldToObjectR1: vec4<f32>,
committedWorldToObjectR2: vec4<f32>,
done: u32,
};
fn _rqTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
instanceId: u32, customIndex: u32,
inst: TLASEntry,
rq: ptr<function, RayQuery>) {
let invD = vec3<f32>(1.0) / rayObj.direction;
var stack: array<u32, 32>;
var sp: u32 = 0u;
var nodeRel: u32 = 0u;
loop {
let abs = meshRec.bvhOffset + nodeRel;
let node = bvhNodes[abs];
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, (*rq).committedT)) {
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
if (node.primCount > 0u) {
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
let verts = _rtFetchTri(meshRec, triIndex);
let tr = _rtTri(rayObj.origin, rayObj.direction,
verts[0], verts[1], verts[2],
rayObj.tMin, (*rq).committedT);
if (!tr.hit) { continue; }
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
let facing = dot(geomNormal, rayObj.direction);
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
(*rq).committedType = RT_INTERSECTION_TRIANGLE;
(*rq).committedT = tr.t;
(*rq).committedInstanceId = instanceId;
(*rq).committedInstanceCustomIndex = customIndex;
(*rq).committedPrimitiveIndex = triIndex;
(*rq).committedBarycentrics = vec2<f32>(tr.u, tr.v);
(*rq).committedObjectRayOrigin = rayObj.origin;
(*rq).committedObjectRayDirection = rayObj.direction;
(*rq).committedObjectToWorldR0 = inst.objectToWorldR0;
(*rq).committedObjectToWorldR1 = inst.objectToWorldR1;
(*rq).committedObjectToWorldR2 = inst.objectToWorldR2;
(*rq).committedWorldToObjectR0 = inst.worldToObjectR0;
(*rq).committedWorldToObjectR1 = inst.worldToObjectR1;
(*rq).committedWorldToObjectR2 = inst.worldToObjectR2;
}
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
let left = node.firstChildOrPrim;
let right = left + 1u;
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
nodeRel = left;
}
}
fn _rqTraverseTlas(rq: ptr<function, RayQuery>) {
let rayWorld = (*rq).ray;
let invD = vec3<f32>(1.0) / rayWorld.direction;
let n = hdr.instanceCount;
let cullMask = (*rq).cullMask;
let rayFlags = (*rq).flags;
for (var i: u32 = 0u; i < n; i = i + 1u) {
let inst = tlasEntries[i];
let instanceMask = inst.maskHGOffset & 0xFFu;
if ((instanceMask & cullMask) == 0u) { continue; }
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, (*rq).committedT)) { continue; }
let r0 = inst.worldToObjectR0;
let r1 = inst.worldToObjectR1;
let r2 = inst.worldToObjectR2;
var rayObj: RayDesc;
rayObj.origin = vec3<f32>(
dot(r0.xyz, rayWorld.origin) + r0.w,
dot(r1.xyz, rayWorld.origin) + r1.w,
dot(r2.xyz, rayWorld.origin) + r2.w,
);
rayObj.direction = vec3<f32>(
dot(r0.xyz, rayWorld.direction),
dot(r1.xyz, rayWorld.direction),
dot(r2.xyz, rayWorld.direction),
);
rayObj.tMin = rayWorld.tMin;
rayObj.tMax = (*rq).committedT;
var effective = rayFlags;
let iflags = inst.instanceFlags;
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
}
let meshRec = meshRecords[inst.blasMeshIdx];
_rqTraverseBlas(rayObj, effective, meshRec, i, inst.customIndex, inst, rq);
if ((rayFlags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u
&& (*rq).committedType != RT_INTERSECTION_NONE) {
return;
}
}
}
fn rayQueryInitialize(rq: ptr<function, RayQuery>, tlasIdx: u32, flags: u32, cullMask: u32,
origin: vec3<f32>, tMin: f32, direction: vec3<f32>, tMax: f32) {
(*rq).ray.origin = origin;
(*rq).ray.tMin = tMin;
(*rq).ray.direction = direction;
(*rq).ray.tMax = tMax;
(*rq).flags = flags;
(*rq).cullMask = cullMask & 0xFFu;
(*rq).committedType = RT_INTERSECTION_NONE;
(*rq).committedT = tMax;
(*rq).done = 0u;
}
fn rayQueryProceed(rq: ptr<function, RayQuery>) -> bool {
if ((*rq).done != 0u) { return false; }
_rqTraverseTlas(rq);
(*rq).done = 1u;
return false;
}
fn rayQueryTerminate(rq: ptr<function, RayQuery>) {
(*rq).done = 1u;
}
fn rayQueryGetCommittedIntersectionType(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedType; }
fn rayQueryGetCommittedT(rq: ptr<function, RayQuery>) -> f32 { return (*rq).committedT; }
fn rayQueryGetCommittedInstanceId(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedInstanceId; }
fn rayQueryGetCommittedInstanceCustomIndex(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedInstanceCustomIndex; }
fn rayQueryGetCommittedPrimitiveIndex(rq: ptr<function, RayQuery>) -> u32 { return (*rq).committedPrimitiveIndex; }
fn rayQueryGetCommittedBarycentrics(rq: ptr<function, RayQuery>) -> vec2<f32> { return (*rq).committedBarycentrics; }
fn rayQueryGetCommittedObjectRayOrigin(rq: ptr<function, RayQuery>) -> vec3<f32> { return (*rq).committedObjectRayOrigin; }
fn rayQueryGetCommittedObjectRayDirection(rq: ptr<function, RayQuery>) -> vec3<f32> { return (*rq).committedObjectRayDirection; }
fn rayQueryGetCommittedWorldPosition(rq: ptr<function, RayQuery>) -> vec3<f32> {
return (*rq).ray.origin + (*rq).ray.direction * (*rq).committedT;
}
`;
// ── Internal compute pipeline: builds TLASEntry[] from the RTInstance[]
// + the meshRecords table. One thread per instance.
// Uses only rtWgslTypes (no megakernel bindings) so it stays well under
// the 8-storage-buffer-per-stage baseline limit.
const tlasBuildWgsl = rtWgslTypes /* needs MeshRecord + TLASEntry */ + String.raw`
struct RTInstance {
transformR0: vec4<f32>,
transformR1: vec4<f32>,
transformR2: vec4<f32>,
customIndexMask: u32, // customIndex: low 24, mask: high 8
sbtFlags: u32, // sbtOffset: low 24, flags: high 8
accelStructureRef: vec2<u32>,
};
@group(0) @binding(0) var<storage, read> inInstances : array<RTInstance>;
@group(0) @binding(1) var<storage, read> inMeshes : array<MeshRecord>;
@group(0) @binding(2) var<storage, read_write> outEntries : array<TLASEntry>;
fn _invMat3(c0: vec3<f32>, c1: vec3<f32>, c2: vec3<f32>) -> array<vec3<f32>, 3> {
let m00 = c1.y*c2.z - c1.z*c2.y;
let m01 = c1.z*c2.x - c1.x*c2.z;
let m02 = c1.x*c2.y - c1.y*c2.x;
let det = c0.x*m00 + c0.y*m01 + c0.z*m02;
let inv = 1.0 / det;
return array<vec3<f32>, 3>(
vec3<f32>(m00, c0.z*c2.y - c0.y*c2.z, c0.y*c1.z - c0.z*c1.y) * inv,
vec3<f32>(m01, c0.x*c2.z - c0.z*c2.x, c0.z*c1.x - c0.x*c1.z) * inv,
vec3<f32>(m02, c0.y*c2.x - c0.x*c2.y, c0.x*c1.y - c0.y*c1.x) * inv,
);
}
@compute @workgroup_size(64, 1, 1)
fn tlasBuildMain(@builtin(global_invocation_id) gid: vec3<u32>) {
let i = gid.x;
if (i >= arrayLength(&inInstances)) { return; }
let inst = inInstances[i];
let meshIdx = inst.accelStructureRef.x;
let custom = inst.customIndexMask & 0xFFFFFFu;
let mask = (inst.customIndexMask >> 24u) & 0xFFu;
let sbtOffset = inst.sbtFlags & 0xFFFFFFu;
let iflags = (inst.sbtFlags >> 24u) & 0xFFu;
let rec = inMeshes[meshIdx];
// 8 corners → world AABB.
var worldMin = vec3<f32>(1e30);
var worldMax = vec3<f32>(-1e30);
for (var c: u32 = 0u; c < 8u; c = c + 1u) {
let corner = vec3<f32>(
select(rec.rootAabbMin.x, rec.rootAabbMax.x, (c & 1u) != 0u),
select(rec.rootAabbMin.y, rec.rootAabbMax.y, (c & 2u) != 0u),
select(rec.rootAabbMin.z, rec.rootAabbMax.z, (c & 4u) != 0u),
);
let wc = vec3<f32>(
dot(inst.transformR0.xyz, corner) + inst.transformR0.w,
dot(inst.transformR1.xyz, corner) + inst.transformR1.w,
dot(inst.transformR2.xyz, corner) + inst.transformR2.w,
);
worldMin = min(worldMin, wc);
worldMax = max(worldMax, wc);
}
// Inverse 3x4 affine.
let inv = _invMat3(inst.transformR0.xyz, inst.transformR1.xyz, inst.transformR2.xyz);
let T = vec3<f32>(inst.transformR0.w, inst.transformR1.w, inst.transformR2.w);
let invT = vec3<f32>(-dot(inv[0], T), -dot(inv[1], T), -dot(inv[2], T));
var e: TLASEntry;
e.aabbMin = worldMin;
e.aabbMax = worldMax;
e.maskHGOffset = mask | (sbtOffset << 8u);
e.blasMeshIdx = meshIdx;
e.objectToWorldR0 = inst.transformR0;
e.objectToWorldR1 = inst.transformR1;
e.objectToWorldR2 = inst.transformR2;
e.worldToObjectR0 = vec4<f32>(inv[0], invT.x);
e.worldToObjectR1 = vec4<f32>(inv[1], invT.y);
e.worldToObjectR2 = vec4<f32>(inv[2], invT.z);
e.customIndex = custom;
e.instanceFlags = iflags;
outEntries[i] = e;
}
`;
// ── RT runtime state ──────────────────────────────────────────────────
// Mesh heaps grow geometrically; each Mesh::Build appends + records its
// offsets in meshRecordsCpu/Buffer. nextMeshHandle is what gets returned
// to C++ as RTInstance::accelerationStructureReference.
const RT_HEAP_INITIAL_BYTES = 64 * 1024;
function makeRtHeap() {
return {
gpu: device.createBuffer({
size: RT_HEAP_INITIAL_BYTES,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
}),
capacity: RT_HEAP_INITIAL_BYTES,
cursor: 0,
};
}
function rtHeapEnsure(h, neededBytes) {
if (h.cursor + neededBytes <= h.capacity) return;
let cap = h.capacity;
while (h.cursor + neededBytes > cap) cap *= 2;
const ng = device.createBuffer({
size: cap,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
});
const enc = device.createCommandEncoder();
if (h.cursor > 0) enc.copyBufferToBuffer(h.gpu, 0, ng, 0, h.cursor);
queue.submit([enc.finish()]);
h.gpu.destroy();
h.gpu = ng;
h.capacity = cap;
// Invalidate cached bind groups that referenced the heap.
rtState.bindGroupCache.clear();
}
const rtState = {
vertHeap: null, // f32 stream (3 floats per vertex)
indexHeap: null, // u32 stream
bvhHeap: null, // BVHNode stream (32 bytes per node)
primRemapHeap: null, // u32 stream
2026-05-19 00:27:09 +02:00
attribsHeap: null, // u32 stream (per-vertex attribute payload; example-defined stride)
2026-05-18 18:43:30 +02:00
meshRecordsBuffer: null, // GPUBuffer of MeshRecord[]
meshRecordsCapacity: 0,
nextMeshHandle: 1,
rtHeader: null, // uniform buffer for RTDispatchHeader (256 B aligned)
bindGroupCache: new Map(), // key → bind group
tlasBuildPipeline: null,
tlasBuildBgl: null,
// Latest TLAS buffer handle from wgpuBuildTLAS, used by rayQuery-capable
// compute shaders at dispatch time.
currentTlas: 0,
currentTlasInstanceCount: 0,
};
function rtInit() {
rtState.vertHeap = makeRtHeap();
rtState.indexHeap = makeRtHeap();
rtState.bvhHeap = makeRtHeap();
rtState.primRemapHeap = makeRtHeap();
2026-05-19 00:27:09 +02:00
rtState.attribsHeap = makeRtHeap();
2026-05-18 18:43:30 +02:00
rtState.meshRecordsCapacity = 16;
rtState.meshRecordsBuffer = device.createBuffer({
size: rtState.meshRecordsCapacity * 48,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
rtState.rtHeader = device.createBuffer({
size: 256,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});
// TLAS-build compute pipeline. Only group(0) is used (3 SSBOs).
const mod = device.createShaderModule({ code: tlasBuildWgsl, label: "rt-tlas-build" });
const tlasBuildBgl = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
]});
rtState.tlasBuildBgl = tlasBuildBgl;
rtState.tlasBuildPipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [tlasBuildBgl] }),
compute: { module: mod, entryPoint: "tlasBuildMain" },
});
}
function rtMeshRecordsEnsure(meshCount) {
if (meshCount <= rtState.meshRecordsCapacity) return;
let cap = rtState.meshRecordsCapacity;
while (cap < meshCount) cap *= 2;
const ng = device.createBuffer({
size: cap * 48,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
const enc = device.createCommandEncoder();
enc.copyBufferToBuffer(rtState.meshRecordsBuffer, 0, ng, 0,
rtState.meshRecordsCapacity * 48);
queue.submit([enc.finish()]);
rtState.meshRecordsBuffer.destroy();
rtState.meshRecordsBuffer = ng;
rtState.meshRecordsCapacity = cap;
rtState.bindGroupCache.clear();
}
env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
verticesPtr, vertexCount,
indicesPtr, indexCount,
bvhNodesPtr, bvhNodeCount,
2026-05-19 00:27:09 +02:00
primRemapPtr, primRemapCount,
attribsPtr, attribsByteCount) => {
2026-05-18 18:43:30 +02:00
if (!rtState.vertHeap) rtInit();
2026-05-19 00:27:09 +02:00
console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`);
2026-05-18 18:43:30 +02:00
const vBytes = vertexCount * 12;
const iBytes = indexCount * 4;
const nBytes = bvhNodeCount * 32;
const rBytes = primRemapCount * 4;
2026-05-19 00:27:09 +02:00
// attribsByteCount must be a multiple of 4 (the heap is array<u32>).
// Round up the upload size; the in-MeshRecord offset is in u32 words.
const aBytes = (attribsByteCount + 3) & ~3;
2026-05-18 18:43:30 +02:00
rtHeapEnsure(rtState.vertHeap, vBytes);
rtHeapEnsure(rtState.indexHeap, iBytes);
rtHeapEnsure(rtState.bvhHeap, nBytes);
rtHeapEnsure(rtState.primRemapHeap, rBytes);
2026-05-19 00:27:09 +02:00
if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes);
2026-05-18 18:43:30 +02:00
const vOff = rtState.vertHeap.cursor / 12; // in vec3 units
const iOff = rtState.indexHeap.cursor / 4; // in u32 units
const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units
const rOff = rtState.primRemapHeap.cursor / 4;
2026-05-19 00:27:09 +02:00
const aOff = rtState.attribsHeap.cursor / 4; // in u32 units
2026-05-18 18:43:30 +02:00
// queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
// already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
queue.writeBuffer(rtState.vertHeap.gpu, rtState.vertHeap.cursor,
memU8().buffer, verticesPtr, vBytes);
queue.writeBuffer(rtState.indexHeap.gpu, rtState.indexHeap.cursor,
memU8().buffer, indicesPtr, iBytes);
queue.writeBuffer(rtState.bvhHeap.gpu, rtState.bvhHeap.cursor,
memU8().buffer, bvhNodesPtr, nBytes);
queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
memU8().buffer, primRemapPtr, rBytes);
2026-05-19 00:27:09 +02:00
if (aBytes > 0) {
queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor,
memU8().buffer, attribsPtr, aBytes);
}
2026-05-18 18:43:30 +02:00
rtState.vertHeap.cursor += vBytes;
rtState.indexHeap.cursor += iBytes;
rtState.bvhHeap.cursor += nBytes;
rtState.primRemapHeap.cursor += rBytes;
2026-05-19 00:27:09 +02:00
rtState.attribsHeap.cursor += aBytes;
2026-05-18 18:43:30 +02:00
const handle = rtState.nextMeshHandle++;
rtMeshRecordsEnsure(handle + 1);
// Build the MeshRecord (48 bytes) and write it.
const rec = new ArrayBuffer(48);
const f32 = new Float32Array(rec);
const u32 = new Uint32Array(rec);
f32[0] = minX; f32[1] = minY; f32[2] = minZ;
u32[3] = vOff;
f32[4] = maxX; f32[5] = maxY; f32[6] = maxZ;
u32[7] = iOff;
u32[8] = nOff;
u32[9] = rOff;
u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
2026-05-19 00:27:09 +02:00
u32[11] = aOff;
2026-05-18 18:43:30 +02:00
queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
return handle;
};
env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
if (!rtState.tlasBuildPipeline) return;
const inst = buffers.get(instanceBufHandle);
const out = buffers.get(tlasOutBufHandle);
if (!inst || !out) {
console.error("[crafter-wgpu] wgpuBuildTLAS: unknown buffer handle");
return;
}
const bg = device.createBindGroup({
layout: rtState.tlasBuildBgl,
entries: [
{ binding: 0, resource: { buffer: inst } },
{ binding: 1, resource: { buffer: rtState.meshRecordsBuffer } },
{ binding: 2, resource: { buffer: out } },
],
});
if (state.pass) {
// Mid-frame — close the user's compute pass, run our build pass
// on the same encoder, then reopen.
state.pass.end();
state.pass = null;
}
const enc = state.encoder || device.createCommandEncoder();
const ownEncoder = !state.encoder;
const pass = enc.beginComputePass({ label: "tlas-build" });
pass.setPipeline(rtState.tlasBuildPipeline);
pass.setBindGroup(0, bg);
const groups = Math.ceil(instanceCount / 64);
pass.dispatchWorkgroups(groups, 1, 1);
pass.end();
if (ownEncoder) {
queue.submit([enc.finish()]);
} else {
state.pass = state.encoder.beginComputePass();
}
// Publish so rayQuery-capable compute pipelines pick up the latest TLAS
// without each dispatch having to thread the handle explicitly.
rtState.currentTlas = tlasOutBufHandle;
rtState.currentTlasInstanceCount = instanceCount;
};
// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
// switches + raygen + @compute entry) with the library prelude/helpers.
2026-05-19 00:27:09 +02:00
// `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte
// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the
// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo
// textures, samplers). Pass (0, 0) for a pipeline with no user bindings.
const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups }
2026-05-18 18:43:30 +02:00
2026-05-19 00:27:09 +02:00
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
2026-05-18 18:43:30 +02:00
if (!rtState.vertHeap) rtInit();
const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
// Insert helpers at the marker; prepend prelude.
const marker = "// @CRAFTER_RT_LIBRARY_HELPERS_HERE";
let beforeHelpers = userPart;
let afterHelpers = "";
const mi = userPart.indexOf(marker);
if (mi >= 0) {
beforeHelpers = userPart.substring(0, mi);
afterHelpers = userPart.substring(mi + marker.length);
}
const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
2026-05-19 00:27:09 +02:00
// Parse user bindings (same wire format as wgpuLoadCustomShader).
const userBindings = [];
if (bindingsCount > 0) {
const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
for (let i = 0; i < bindingsCount; i++) {
const g = dv.getUint8(i*8 + 0);
if (g < 2) {
console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`);
return 0;
}
userBindings.push({
group: g,
binding: dv.getUint8(i*8 + 1),
kind: dv.getUint8(i*8 + 2),
pushOffset: dv.getUint32(i*8 + 4, true),
});
}
}
const byGroup = new Map();
for (const b of userBindings) {
if (!byGroup.has(b.group)) byGroup.set(b.group, []);
byGroup.get(b.group).push(b);
}
const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
2026-05-18 18:43:30 +02:00
let pipeline;
try {
const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
// RTDispatchHeader is 16 bytes; bind exactly that.
const headerBgl = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
buffer: { type: "uniform", minBindingSize: 16 } },
]});
const dataBgl = device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
2026-05-19 00:27:09 +02:00
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
2026-05-18 18:43:30 +02:00
]});
2026-05-19 00:27:09 +02:00
// User binding-group layouts. WebGPU pipeline layouts need a
// contiguous array up to the highest group used, so pad any gaps
// with empty bgls (same rule as wgpuLoadCustomShader).
const userBgls = [];
const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
for (let g = 2; g <= highest; g++) {
if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" };
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
return e;
});
userBgls.push(device.createBindGroupLayout({ entries }));
} else {
userBgls.push(device.createBindGroupLayout({ entries: [] }));
}
}
2026-05-18 18:43:30 +02:00
pipeline = device.createComputePipeline({
2026-05-19 00:27:09 +02:00
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }),
2026-05-18 18:43:30 +02:00
compute: { module: mod, entryPoint: "main" },
});
const handle = newHandle();
2026-05-19 00:27:09 +02:00
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups });
2026-05-18 18:43:30 +02:00
return handle;
} catch (e) {
console.error("[crafter-wgpu] RT pipeline compile failed:", e);
console.error("[crafter-wgpu] WGSL was:\n", fullWgsl);
return 0;
}
};
env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
2026-05-19 00:27:09 +02:00
tlasBufHandle, instanceCount, gx, gy,
handlesPtr, handlesCount) => {
2026-05-18 18:43:30 +02:00
if (!state.pass) return;
const pipe = rtPipelines.get(pipelineHandle);
const tlas = buffers.get(tlasBufHandle);
if (!pipe || !tlas) {
console.error("[crafter-wgpu] wgpuDispatchRT: unknown pipeline or tlas");
return;
}
// Write RT header from push data (first 16 bytes). Surface dims + instance count + flags.
const hdr32 = new Uint32Array(4);
hdr32[0] = state.width;
hdr32[1] = state.height;
hdr32[2] = instanceCount;
hdr32[3] = 0;
queue.writeBuffer(rtState.rtHeader, 0, hdr32);
const headerBg = device.createBindGroup({
layout: pipe.headerBgl,
entries: [{ binding: 0, resource: { buffer: rtState.rtHeader, offset: 0, size: 16 } }],
});
const outView = state.outIsPing ? state.pingView : state.pongView;
const dataBg = device.createBindGroup({
layout: pipe.dataBgl,
entries: [
{ binding: 0, resource: { buffer: tlas } },
{ binding: 1, resource: { buffer: rtState.bvhHeap.gpu } },
{ binding: 2, resource: { buffer: rtState.meshRecordsBuffer } },
{ binding: 3, resource: { buffer: rtState.vertHeap.gpu } },
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView },
2026-05-19 00:27:09 +02:00
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
2026-05-18 18:43:30 +02:00
],
});
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, headerBg);
state.pass.setBindGroup(1, dataBg);
2026-05-19 00:27:09 +02:00
// User bindings: walk byGroup in the same sorted order the C++ side
// packed handles[], picking up indices linearly.
if (handlesCount > 0) {
const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
let handleIdx = 0;
let bglIdx = 0;
for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) {
if (pipe.byGroup.has(g)) {
const entries = pipe.byGroup.get(g).map(b => {
const h = handles[handleIdx++];
let resource;
if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h);
else if (b.kind === 3) resource = textureViews.get(h);
return { binding: b.binding, resource };
});
const bg = device.createBindGroup({
layout: pipe.userBgls[bglIdx],
entries,
});
state.pass.setBindGroup(g, bg);
}
bglIdx++;
}
}
2026-05-18 18:43:30 +02:00
state.pass.dispatchWorkgroups(gx, gy, 1);
state.outIsPing = !state.outIsPing;
};
2026-05-18 04:58:52 +02:00
console.log("[crafter-wgpu] init complete; env handlers wired");
} catch (e) {
// Capture any throw so the stub error messages name the real cause
// instead of "(no error captured)". Re-throw so runtime.js's catch
// also logs it.
initError = e instanceof Error ? e : new Error(String(e));
console.error("[crafter-wgpu] init failed:", initError);
throw initError;
}
})(); // end window.crafter_webbuild_env_ready