Crafter.Graphics/additional/dom-webgpu.js

/*
Crafter.Graphics WebGPU bridge — DOM mode UI compute pipeline.

Surface model (high-level, deliberately not 1:1 with WebGPU):
  - JS owns the GPUDevice/queue/compute pipelines/bind-group cache.
  - C++ owns slot allocation and per-frame logic; it calls into ~15 imports.
  - Standard UI shaders are embedded as WGSL strings at the bottom of this
    file and compiled once at startup.

Ping-pong output strategy (per Decision 2 in plan):
  - Two rgba8unorm storage textures sized to the canvas.
  - Each Dispatch reads `prev` (sampled), writes `out` (storage, write-only).
  - JS swaps the two between dispatches, so item-order overdraw works.
  - At frame end, the current `out` is blitted to the canvas via
    copyTextureToTexture (canvas configured as rgba8unorm to match).

This file is loaded as <script type="module">. Top-level await blocks
runtime.js's _start() until adapter + device are resolved, so by the time
main() runs every import here is fully wired.
*/

// ─── env stubs (assigned synchronously, BEFORE any async work) ────────────
// The wasm module's import-link step needs every declared wgpu* import to
// resolve to a Function. If init below throws, the stubs stay in place so
// the wasm still links — and the call site gets a clear error at runtime
// instead of "import object field X is not a Function" at link time.

window.crafter_webbuild_env = window.crafter_webbuild_env || {};
window.crafter_webbuild_env.table = window.crafter_webbuild_env.table
    || new WebAssembly.Table({ initial: 4, element: "anyfunc" });

let initError = null;
function stub(name) {
    return (...args) => {
        const msg = `[crafter-wgpu] ${name}() called but WebGPU init failed: ${initError?.message ?? "(no error captured)"}`;
        console.error(msg);
        throw new Error(msg);
    };
}
{
    const e = window.crafter_webbuild_env;
    for (const n of [
        "wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
        "wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
        "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
        "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
        "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
    ]) {
        // Read-write ints don't need a stub-throw; return 0 for the size queries.
        e[n] = n.endsWith("Width") || n.endsWith("Height") ? () => 0 : stub(n);
    }
}

// ─── canvas + device init (runs before _start) ───────────────────────────
// Wrapped in an async IIFE assigned to window.crafter_webbuild_env_ready so
// the runtime.js shim can `await` it explicitly before calling _start().
// Sibling <script type="module"> top-level awaits are NOT reliably
// serialized in Firefox (verified 2026-05), so we can't depend on this
// file's TLA to block runtime.js by itself.

window.crafter_webbuild_env_ready = (async () => {
try {

if (!navigator.gpu) {
    document.body.innerHTML = "<p style=\"font-family:sans-serif;padding:24px\">"
        + "WebGPU not available in this browser. Try Chrome 121+ / Firefox 141+ / Safari 26+.</p>";
    initError = new Error("WebGPU unavailable");
    throw initError;
}

const canvas = document.createElement("canvas");
canvas.id = "crafter-canvas";
canvas.style.cssText = "position:fixed;inset:0;width:100vw;height:100vh;display:block;";
document.body.style.margin = "0";
document.body.appendChild(canvas);

function syncCanvasSize() {
    // Match canvas pixel size to its CSS pixel size 1:1 so MouseEvent
    // clientX/clientY (CSS pixels) and the wasm-side window.width/.height
    // share the same coordinate space. (HiDPI sharpness is a v2 concern
    // — would need DPR on the GPU side AND a scaling step in the C++
    // Window/Event glue so layout/hit-testing/dispatch counts stay
    // consistent.)
    const w = window.innerWidth;
    const h = window.innerHeight;
    if (canvas.width !== w)  canvas.width  = w;
    if (canvas.height !== h) canvas.height = h;
    return { w, h };
}
syncCanvasSize();

const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
    initError = new Error("navigator.gpu.requestAdapter() returned null (no compatible adapter)");
    console.error("[crafter-wgpu]", initError.message);
    throw initError;
}
const device  = await adapter.requestDevice();
const queue   = device.queue;
const ctx     = canvas.getContext("webgpu");
const canvasFormat = "rgba8unorm"; // match storage textures, skip swizzle blit
ctx.configure({ device, format: canvasFormat, alphaMode: "opaque",
                usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_DST });

device.lost.then((info) => {
    console.error("[crafter-wgpu] device lost:", info.message);
    state.gpuLost = true;
});

// ─── handle tables ─────────────────────────────────────────────────────

const buffers       = new Map();   // handle → GPUBuffer
const textures      = new Map();   // handle → GPUTexture
const textureViews  = new Map();   // handle → GPUTextureView  (mirrors textures key for the view)
const samplers      = new Map();   // handle → GPUSampler
let nextHandle = 1;
function newHandle() { return nextHandle++; }

// ─── ping-pong storage textures ────────────────────────────────────────

const state = {
    pingTex: null, pingView: null,
    pongTex: null, pongView: null,
    outIsPing: true,  // current "out" target
    width: 0, height: 0,
    encoder: null,
    pass: null,
    headerRing: null,        // GPUBuffer; uniform header writes ring through this
    headerRingSize: 0,
    headerRingOffset: 0,
    bindGroupCache: new Map(),  // key → GPUBindGroup
    gpuLost: false,
};

function recreatePingPong(w, h) {
    const usage = GPUTextureUsage.STORAGE_BINDING
                | GPUTextureUsage.TEXTURE_BINDING
                | GPUTextureUsage.COPY_SRC
                | GPUTextureUsage.COPY_DST;   // COPY_DST so we can clear it
    if (state.pingTex) state.pingTex.destroy();
    if (state.pongTex) state.pongTex.destroy();
    state.pingTex = device.createTexture({ size: [w, h], format: "rgba8unorm", usage });
    state.pongTex = device.createTexture({ size: [w, h], format: "rgba8unorm", usage });
    state.pingView = state.pingTex.createView();
    state.pongView = state.pongTex.createView();
    state.width = w; state.height = h;
    state.outIsPing = true;
    state.bindGroupCache.clear();
}

function ensureSized() {
    const { w, h } = syncCanvasSize();
    if (w !== state.width || h !== state.height) {
        recreatePingPong(w, h);
        // Notify the wasm side that the surface size changed so it can
        // fire onResize through Window. The wasm export is added by
        // Crafter.Graphics-Window.cpp.
        const onResize = wasmExports && wasmExports.__crafterDom_resize;
        if (onResize) onResize(1, w, h);
    }
}

// Header ring buffer: 256-byte-aligned slots holding UIDispatchHeader (48
// bytes of meaningful data, padded to 256). Wraps at frame boundary.
const HEADER_ALIGN = 256;
const HEADER_RING_SLOTS = 64;
state.headerRingSize = HEADER_ALIGN * HEADER_RING_SLOTS;
state.headerRing = device.createBuffer({
    size: state.headerRingSize,
    usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});

// ─── pipelines ─────────────────────────────────────────────────────────

const wgslShared = String.raw`
struct UIDispatchHeader {
    outImage:      u32,
    itemBuffer:    u32,
    surfaceW:      u32,
    surfaceH:      u32,
    clipX:         f32,
    clipY:         f32,
    clipW:         f32,
    clipH:         f32,
    itemCount:     u32,
    frameIdx:      u32,
    flags:         u32,
    _pad:          u32,
};

@group(0) @binding(0) var<uniform> hdr : UIDispatchHeader;
@group(1) @binding(0) var outTex  : texture_storage_2d<rgba8unorm, write>;
@group(1) @binding(1) var prevTex : texture_2d<f32>;

fn uiResolvePixel(coord: vec2<u32>) -> bool {
    if (coord.x >= hdr.surfaceW || coord.y >= hdr.surfaceH) { return false; }
    let fx = f32(coord.x); let fy = f32(coord.y);
    if (fx < hdr.clipX || fy < hdr.clipY) { return false; }
    if (fx >= hdr.clipX + hdr.clipW) { return false; }
    if (fy >= hdr.clipY + hdr.clipH) { return false; }
    return true;
}

fn uiBlendOver(dst: vec4<f32>, src: vec4<f32>) -> vec4<f32> {
    let a = clamp(src.a, 0.0, 1.0);
    let rgb = mix(dst.rgb, src.rgb, vec3<f32>(a));
    let outA = a + dst.a * (1.0 - a);
    return vec4<f32>(rgb, outA);
}

fn uiSdRoundRect(p: vec2<f32>, halfSize: vec2<f32>, r4: vec4<f32>) -> f32 {
    var r: vec4<f32> = r4;
    // Pick radius for the quadrant p is in. r order: (TL, TR, BR, BL).
    let rx = select(r.x, r.z, p.x > 0.0);
    let ry = select(r.w, r.y, p.x > 0.0);
    let radius = select(ry, rx, p.y > 0.0);
    let q = abs(p) - halfSize + vec2<f32>(radius);
    return min(max(q.x, q.y), 0.0) + length(max(q, vec2<f32>(0.0))) - radius;
}
`;

const wgslQuads = wgslShared + String.raw`
struct QuadItem {
    rect:    vec4<f32>,
    color:   vec4<f32>,
    corners: vec4<f32>,
    outline: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<QuadItem>;

@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    if (!uiResolvePixel(gid.xy)) { return; }
    let coord = vec2<i32>(i32(gid.x), i32(gid.y));
    var dst = textureLoad(prevTex, coord, 0);
    let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
    for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
        let it = items[i];
        let lo = it.rect.xy;
        let hi = it.rect.xy + it.rect.zw;
        if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
        let halfSize = it.rect.zw * 0.5;
        let p = sp - (it.rect.xy + halfSize);
        let d = uiSdRoundRect(p, halfSize, it.corners);
        let bodyA = clamp(0.5 - d, 0.0, 1.0);
        if (bodyA <= 0.0 && it.outline.x <= 0.0) { continue; }
        var src = vec4<f32>(it.color.rgb, it.color.a * bodyA);
        if (it.outline.x > 0.0) {
            let t = abs(d + it.outline.x * 0.5) - it.outline.x * 0.5;
            let outlineA = clamp(0.5 - t, 0.0, 1.0);
            src = vec4<f32>(mix(src.rgb, it.outline.yzw, vec3<f32>(outlineA)),
                            max(src.a, outlineA));
        }
        if (src.a <= 0.0) { continue; }
        dst = uiBlendOver(dst, src);
    }
    textureStore(outTex, coord, dst);
}
`;

const wgslCircles = wgslShared + String.raw`
struct CircleItem {
    centerRadius: vec4<f32>,
    color:        vec4<f32>,
    outline:      vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<CircleItem>;

@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    if (!uiResolvePixel(gid.xy)) { return; }
    let coord = vec2<i32>(i32(gid.x), i32(gid.y));
    var dst = textureLoad(prevTex, coord, 0);
    let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
    for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
        let it = items[i];
        let center = it.centerRadius.xy;
        let radius = it.centerRadius.z;
        let d = length(sp - center) - radius;
        let bodyA = clamp(0.5 - d, 0.0, 1.0);
        if (bodyA <= 0.0 && it.outline.x <= 0.0) { continue; }
        var src = vec4<f32>(it.color.rgb, it.color.a * bodyA);
        if (it.outline.x > 0.0) {
            let t = abs(d + it.outline.x * 0.5) - it.outline.x * 0.5;
            let outlineA = clamp(0.5 - t, 0.0, 1.0);
            src = vec4<f32>(mix(src.rgb, it.outline.yzw, vec3<f32>(outlineA)),
                            max(src.a, outlineA));
        }
        if (src.a <= 0.0) { continue; }
        dst = uiBlendOver(dst, src);
    }
    textureStore(outTex, coord, dst);
}
`;

const wgslImages = wgslShared + String.raw`
struct ImageItem {
    rect:  vec4<f32>,
    uv:    vec4<f32>,
    tint:  vec4<f32>,
    slots: vec4<u32>,
};
@group(2) @binding(0) var<storage, read> items : array<ImageItem>;
@group(3) @binding(0) var imgTex : texture_2d<f32>;
@group(3) @binding(1) var imgSampler : sampler;

@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    if (!uiResolvePixel(gid.xy)) { return; }
    let coord = vec2<i32>(i32(gid.x), i32(gid.y));
    var dst = textureLoad(prevTex, coord, 0);
    let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
    for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
        let it = items[i];
        let lo = it.rect.xy;
        let hi = it.rect.xy + it.rect.zw;
        if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
        let t = (sp - lo) / it.rect.zw;
        let uv = vec2<f32>(mix(it.uv.x, it.uv.z, t.x), mix(it.uv.y, it.uv.w, t.y));
        let sample = textureSampleLevel(imgTex, imgSampler, uv, 0.0);
        let src = sample * it.tint;
        if (src.a <= 0.0) { continue; }
        dst = uiBlendOver(dst, src);
    }
    textureStore(outTex, coord, dst);
}
`;

const wgslText = wgslShared + String.raw`
struct GlyphItem {
    rect:  vec4<f32>,
    uv:    vec4<f32>,
    color: vec4<f32>,
};
@group(2) @binding(0) var<storage, read> items : array<GlyphItem>;
@group(3) @binding(0) var atlasTex : texture_2d<f32>;
@group(3) @binding(1) var atlasSampler : sampler;

@compute @workgroup_size(8, 8, 1)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    if (!uiResolvePixel(gid.xy)) { return; }
    let coord = vec2<i32>(i32(gid.x), i32(gid.y));
    var dst = textureLoad(prevTex, coord, 0);
    let sp = vec2<f32>(f32(gid.x) + 0.5, f32(gid.y) + 0.5);
    for (var i: u32 = 0u; i < hdr.itemCount; i = i + 1u) {
        let it = items[i];
        let lo = it.rect.xy;
        let hi = it.rect.xy + it.rect.zw;
        if (sp.x < lo.x || sp.y < lo.y || sp.x >= hi.x || sp.y >= hi.y) { continue; }
        let t = (sp - lo) / it.rect.zw;
        let uv = vec2<f32>(mix(it.uv.x, it.uv.z, t.x), mix(it.uv.y, it.uv.w, t.y));
        // stb_truetype SDF: pixel value ~128 is the edge. Treat alpha as
        // the smoothed step around that midpoint.
        let sdf = textureSampleLevel(atlasTex, atlasSampler, uv, 0.0).r;
        let alpha = clamp((sdf - 0.5) * 8.0 + 0.5, 0.0, 1.0);
        if (alpha <= 0.0) { continue; }
        let src = vec4<f32>(it.color.rgb, it.color.a * alpha);
        dst = uiBlendOver(dst, src);
    }
    textureStore(outTex, coord, dst);
}
`;

function makePipeline(label, wgsl, hasGroup3) {
    const mod = device.createShaderModule({ label, code: wgsl });
    // Layout: group 0 uniform header, group 1 (out storage + prev sampled),
    // group 2 storage items SSBO, optional group 3 (texture + sampler).
    const bgl0 = device.createBindGroupLayout({ entries: [
        { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
    ]});
    const bgl1 = device.createBindGroupLayout({ entries: [
        { binding: 0, visibility: GPUShaderStage.COMPUTE, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
        { binding: 1, visibility: GPUShaderStage.COMPUTE, texture: { sampleType: "float", viewDimension: "2d" } },
    ]});
    const bgl2 = device.createBindGroupLayout({ entries: [
        { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
    ]});
    const layouts = [bgl0, bgl1, bgl2];
    let bgl3 = null;
    if (hasGroup3) {
        bgl3 = device.createBindGroupLayout({ entries: [
            { binding: 0, visibility: GPUShaderStage.COMPUTE, texture: { sampleType: "float", viewDimension: "2d" } },
            { binding: 1, visibility: GPUShaderStage.COMPUTE, sampler: { type: "filtering" } },
        ]});
        layouts.push(bgl3);
    }
    const pl = device.createPipelineLayout({ bindGroupLayouts: layouts });
    const pipeline = device.createComputePipeline({
        layout: pl,
        compute: { module: mod, entryPoint: "main" },
    });
    return { pipeline, bgl0, bgl1, bgl2, bgl3 };
}

const pipeQuads   = makePipeline("ui-quads",   wgslQuads,   false);
const pipeCircles = makePipeline("ui-circles", wgslCircles, false);
const pipeImages  = makePipeline("ui-images",  wgslImages,  true);
const pipeText    = makePipeline("ui-text",    wgslText,    true);

// Bind groups for group 0 (header uniform with dynamic offset) — one per
// pipeline, references the same ring buffer.
function makeHdrBG(pipe) {
    return device.createBindGroup({
        layout: pipe.bgl0,
        entries: [{ binding: 0, resource: { buffer: state.headerRing, offset: 0, size: 48 } }],
    });
}
const hdrBG = {
    quads:   makeHdrBG(pipeQuads),
    circles: makeHdrBG(pipeCircles),
    images:  makeHdrBG(pipeImages),
    text:    makeHdrBG(pipeText),
};

// Group 1 changes between dispatches because `out` and `prev` swap on the
// ping-pong. Cached by current ping-pong direction and texture size; the
// stored bind group is reusable across all pipelines that share a
// layout-compatible bgl1 (all standard pipelines and custom shaders do,
// since they declare identical group-1 entries per the contract).
function getGroup1BG(bgl1) {
    const key = `g1/${state.outIsPing ? 1 : 0}/${state.width}x${state.height}`;
    let bg = state.bindGroupCache.get(key);
    if (bg) return bg;
    const outView  = state.outIsPing ? state.pingView : state.pongView;
    const prevView = state.outIsPing ? state.pongView : state.pingView;
    bg = device.createBindGroup({
        layout: bgl1,
        entries: [
            { binding: 0, resource: outView },
            { binding: 1, resource: prevView },
        ],
    });
    state.bindGroupCache.set(key, bg);
    return bg;
}

function getGroup2BG(pipe, itemsHandle) {
    const key = `items/${pipe === pipeQuads ? "q" : pipe === pipeCircles ? "c" : pipe === pipeImages ? "i" : "t"}/${itemsHandle}`;
    let bg = state.bindGroupCache.get(key);
    if (bg) return bg;
    const buf = buffers.get(itemsHandle);
    if (!buf) throw new Error(`getGroup2BG: unknown items buffer ${itemsHandle}`);
    bg = device.createBindGroup({
        layout: pipe.bgl2,
        entries: [{ binding: 0, resource: { buffer: buf } }],
    });
    state.bindGroupCache.set(key, bg);
    return bg;
}

function getGroup3BG(pipe, texHandle, sampHandle) {
    const key = `t3/${texHandle}/${sampHandle}/${pipe === pipeImages ? "i" : "x"}`;
    let bg = state.bindGroupCache.get(key);
    if (bg) return bg;
    const tex = textureViews.get(texHandle);
    const sam = samplers.get(sampHandle);
    if (!tex || !sam) throw new Error(`getGroup3BG: unknown view ${texHandle} / sampler ${sampHandle}`);
    bg = device.createBindGroup({
        layout: pipe.bgl3,
        entries: [
            { binding: 0, resource: tex },
            { binding: 1, resource: sam },
        ],
    });
    state.bindGroupCache.set(key, bg);
    return bg;
}

// ─── wasm import surface ───────────────────────────────────────────────

let wasmExports = null;

// Crafter.Build's runtime.js exposes the wasi instance on
// window.crafter_wasi after instantiation. We grab the exports lazily so
// every import-side function works regardless of call order. memU8 /
// memF32 / memU32 always re-derive the typed-array view because the
// wasm memory's backing ArrayBuffer is detached and replaced whenever
// the wasm grows its memory; caching a typed array would alias to
// freed memory after a grow.
function getExports() {
    if (wasmExports) return wasmExports;
    const wasi = window.crafter_wasi;
    if (!wasi || !wasi.instance) {
        throw new Error("[crafter-wgpu] wasm exports not available yet (called too early)");
    }
    wasmExports = wasi.instance.exports;
    return wasmExports;
}
function memU8()  { return new Uint8Array(getExports().memory.buffer); }
function memF32() { return new Float32Array(getExports().memory.buffer); }
function memU32() { return new Uint32Array(getExports().memory.buffer); }

// Stubs were assigned at the top of this file; we now overwrite them with
// real implementations now that init has succeeded.
const env = window.crafter_webbuild_env;

env.wgpuGetCanvasWidth  = () => canvas.width;
env.wgpuGetCanvasHeight = () => canvas.height;

env.wgpuCreateBuffer = (byteSize) => {
    const h = newHandle();
    const buf = device.createBuffer({
        size: Math.max(16, byteSize),
        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
    });
    buffers.set(h, buf);
    return h;
};
env.wgpuWriteBuffer = (handle, srcPtr, byteSize) => {
    state.writeBufferCount = (state.writeBufferCount || 0) + 1;
    state.lastWriteHandle = handle;
    state.lastWriteSize = byteSize;
    const buf = buffers.get(handle);
    if (!buf) return;
    // writeBuffer requires a multiple of 4 bytes and an aligned offset.
    const aligned = (byteSize + 3) & ~3;
    queue.writeBuffer(buf, 0, memU8().buffer, srcPtr, aligned);
};
env.wgpuDestroyBuffer = (handle) => {
    const buf = buffers.get(handle);
    if (buf) { buf.destroy(); buffers.delete(handle); }
    // Invalidate any cached bind group that referenced this handle.
    for (const k of state.bindGroupCache.keys()) {
        if (k.startsWith("items/") && k.endsWith("/" + handle)) {
            state.bindGroupCache.delete(k);
        }
    }
};

env.wgpuCreateAtlasTexture = (w, h) => {
    const handle = newHandle();
    const tex = device.createTexture({
        size: [w, h],
        format: "r8unorm",
        usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
    });
    textures.set(handle, tex);
    textureViews.set(handle, tex.createView());
    return handle;
};
env.wgpuWriteAtlasRegion = (handle, srcPtr, srcW, srcH, srcBytesPerRow, dstX, dstY, copyW, copyH) => {
    const tex = textures.get(handle);
    if (!tex) return;
    // For r8unorm, 1 byte per pixel; writeTexture requires bytesPerRow >= 256
    // OR == width if width*1 % 256 === 0 — for arbitrary widths we need to
    // re-pack into a 256-aligned staging buffer.
    const alignedBPR = Math.max(256, (srcBytesPerRow + 255) & ~255);
    if (alignedBPR === srcBytesPerRow) {
        const bytes = memU8().subarray(srcPtr + dstY * srcBytesPerRow + dstX,
                                       srcPtr + (dstY + copyH) * srcBytesPerRow);
        queue.writeTexture(
            { texture: tex, origin: { x: dstX, y: dstY } },
            bytes,
            { bytesPerRow: srcBytesPerRow, rowsPerImage: copyH },
            { width: copyW, height: copyH }
        );
    } else {
        // Repack copyW × copyH starting at (dstX, dstY) in the source.
        const staging = new Uint8Array(alignedBPR * copyH);
        const src = memU8();
        for (let y = 0; y < copyH; y++) {
            const srcRow = (dstY + y) * srcBytesPerRow + dstX;
            staging.set(src.subarray(srcPtr + srcRow, srcPtr + srcRow + copyW),
                        y * alignedBPR);
        }
        queue.writeTexture(
            { texture: tex, origin: { x: dstX, y: dstY } },
            staging,
            { bytesPerRow: alignedBPR, rowsPerImage: copyH },
            { width: copyW, height: copyH }
        );
    }
};
env.wgpuDestroyTexture = (handle) => {
    const tex = textures.get(handle);
    if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
};

env.wgpuCreateLinearClampSampler = () => {
    const handle = newHandle();
    samplers.set(handle, device.createSampler({
        magFilter: "linear", minFilter: "linear",
        addressModeU: "clamp-to-edge", addressModeV: "clamp-to-edge",
    }));
    return handle;
};

// ─── per-frame ──────────────────────────────────────────────────────────

env.wgpuFrameBegin = () => {
    state.frameBeginCount = (state.frameBeginCount || 0) + 1;
    if (state.gpuLost) return;
    ensureSized();
    state.encoder = device.createCommandEncoder();
    state.outIsPing = true;  // reset so each frame starts on the same target
    state.headerRingOffset = 0;
    // DON'T clearBuffer the header ring here. queue.writeBuffer ops from
    // writeHeader() are enqueued BEFORE this command buffer's submit,
    // so an encoded clearBuffer would wipe them — the dispatches would
    // then read all-zero headers and uiResolvePixel would reject every
    // pixel (surfaceW=0).
    clearStorageTexture(state.encoder, state.outIsPing ? state.pingTex : state.pongTex,
                        state.width, state.height);
    state.pass = state.encoder.beginComputePass();
};

let zeroBuffer = null;
let zeroBufferSize = 0;
function clearStorageTexture(encoder, tex, w, h) {
    const bpr = (w * 4 + 255) & ~255;
    const need = bpr * h;
    if (!zeroBuffer || zeroBufferSize < need) {
        if (zeroBuffer) zeroBuffer.destroy();
        zeroBuffer = device.createBuffer({ size: need, usage: GPUBufferUsage.COPY_SRC, mappedAtCreation: true });
        new Uint8Array(zeroBuffer.getMappedRange()).fill(0);
        zeroBuffer.unmap();
        zeroBufferSize = need;
    }
    encoder.copyBufferToTexture(
        { buffer: zeroBuffer, bytesPerRow: bpr, rowsPerImage: h },
        { texture: tex },
        { width: w, height: h, depthOrArrayLayers: 1 }
    );
}

env.wgpuFrameEnd = () => {
    state.frameEndCount = (state.frameEndCount || 0) + 1;
    if (state.gpuLost || !state.encoder) return;
    state.pass.end();
    state.pass = null;
    // Blit last-written ping-pong texture → canvas. After N dispatches,
    // state.outIsPing points at the NEXT write target, so the latest
    // content lives in the OPPOSITE texture.
    const finalTex = state.outIsPing ? state.pongTex : state.pingTex;
    const canvasTex = ctx.getCurrentTexture();
    state.encoder.copyTextureToTexture(
        { texture: finalTex },
        { texture: canvasTex },
        { width: state.width, height: state.height, depthOrArrayLayers: 1 }
    );
    queue.submit([state.encoder.finish()]);
    state.encoder = null;
};

// Write a 48-byte UIDispatchHeader into the ring buffer at the current
// offset (which is incremented and 256-aligned). Returns the dynamic
// offset to pass to setBindGroup.
function writeHeader(headerPtr) {
    const offset = state.headerRingOffset;
    if (offset + HEADER_ALIGN > state.headerRingSize) {
        // Ring is small enough that overrun in one frame means too many
        // dispatches. Soft-wrap; correctness already requires the ring
        // be large enough.
        state.headerRingOffset = 0;
    }
    queue.writeBuffer(state.headerRing, state.headerRingOffset,
                      memU8().buffer, headerPtr, 48);
    state.headerRingOffset += HEADER_ALIGN;
    return offset;
}

function dispatchStandard(pipe, hdrBindGroup, headerPtr, gx, gy, itemsHandle, group3) {
    if (!state.pass) return;
    const off = writeHeader(headerPtr);
    state.pass.setPipeline(pipe.pipeline);
    state.pass.setBindGroup(0, hdrBindGroup, [off]);
    state.pass.setBindGroup(1, getGroup1BG(pipe.bgl1));
    state.pass.setBindGroup(2, getGroup2BG(pipe, itemsHandle));
    if (group3) state.pass.setBindGroup(3, group3);
    state.pass.dispatchWorkgroups(gx, gy, 1);
    // Flip ping-pong: the texture we just wrote becomes next dispatch's prev.
    state.outIsPing = !state.outIsPing;
}

env.wgpuDispatchQuads = (itemsHandle, headerPtr, gx, gy) => {
    state.dispatchQuadsCount = (state.dispatchQuadsCount || 0) + 1;
    dispatchStandard(pipeQuads, hdrBG.quads, headerPtr, gx, gy, itemsHandle, null);
};
env.wgpuDispatchCircles = (itemsHandle, headerPtr, gx, gy) => {
    dispatchStandard(pipeCircles, hdrBG.circles, headerPtr, gx, gy, itemsHandle, null);
};
env.wgpuDispatchImages = (itemsHandle, headerPtr, gx, gy, texHandle, sampHandle) => {
    const g3 = getGroup3BG(pipeImages, texHandle, sampHandle);
    dispatchStandard(pipeImages, hdrBG.images, headerPtr, gx, gy, itemsHandle, g3);
};
env.wgpuDispatchText = (itemsHandle, headerPtr, gx, gy, atlasHandle, sampHandle) => {
    const g3 = getGroup3BG(pipeText, atlasHandle, sampHandle);
    dispatchStandard(pipeText, hdrBG.text, headerPtr, gx, gy, itemsHandle, g3);
};

// ─── custom user-authored shaders ─────────────────────────────────────
//
// Bind-group contract (mirrors :WebGPUComputeShader.cppm):
//   group 0 binding 0 — uniform UIDispatchHeader (dynamic offset, 48b)
//   group 1 binding 0 — texture_storage_2d<rgba8unorm, write> out
//   group 1 binding 1 — texture_2d<f32>                     prev
//   group 2+          — user-declared (UICustomBinding entries)
//
// Each UICustomBinding entry on the wasm side is 8 bytes:
//   u8 group, u8 binding, u8 kind, u8 pad, u32 pushOffset
// kind: 0 = read-only-storage SSBO, 1 = sampled tex 2d, 2 = filtering sampler.

const customPipelines = new Map();   // handle → { pipeline, bgls, hdrBG, byGroup }

env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
    const wgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
    const bindings = [];
    const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
    for (let i = 0; i < bindingsCount; i++) {
        bindings.push({
            group: dv.getUint8(i*8 + 0),
            binding: dv.getUint8(i*8 + 1),
            kind: dv.getUint8(i*8 + 2),
            pushOffset: dv.getUint32(i*8 + 4, true),
        });
    }

    // Group bindings by @group(N) for layout creation.
    const byGroup = new Map();
    for (const b of bindings) {
        if (b.group < 2) {
            console.error(`[crafter-wgpu] custom shader: @group(${b.group}) reserved; use groups >= 2`);
            return 0;
        }
        if (!byGroup.has(b.group)) byGroup.set(b.group, []);
        byGroup.get(b.group).push(b);
    }

    // Group 0 = header uniform, Group 1 = ping-pong out+prev — always injected.
    const bgls = [
        device.createBindGroupLayout({ entries: [
            { binding: 0, visibility: GPUShaderStage.COMPUTE,
              buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
        ]}),
        device.createBindGroupLayout({ entries: [
            { binding: 0, visibility: GPUShaderStage.COMPUTE,
              storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
            { binding: 1, visibility: GPUShaderStage.COMPUTE,
              texture: { sampleType: "float", viewDimension: "2d" } },
        ]}),
    ];
    // Sorted custom groups. Pad any gaps with empty bgls (WebGPU pipeline
    // layouts require a contiguous array of GPUBindGroupLayout per group
    // index up to the highest used).
    const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
    const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
    for (let g = 2; g <= highest; g++) {
        if (byGroup.has(g)) {
            const entries = byGroup.get(g).map(b => {
                const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
                if      (b.kind === 0) e.buffer = { type: "read-only-storage" };
                else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
                else if (b.kind === 2) e.sampler = { type: "filtering" };
                return e;
            });
            bgls.push(device.createBindGroupLayout({ entries }));
        } else {
            bgls.push(device.createBindGroupLayout({ entries: [] }));
        }
    }

    let pipeline;
    try {
        const mod = device.createShaderModule({ code: wgsl });
        const layout = device.createPipelineLayout({ bindGroupLayouts: bgls });
        pipeline = device.createComputePipeline({ layout, compute: { module: mod, entryPoint: "main" } });
    } catch (e) {
        console.error("[crafter-wgpu] custom shader compile failed:", e);
        return 0;
    }

    const hdrBG = device.createBindGroup({
        layout: bgls[0],
        entries: [{ binding: 0, resource: { buffer: state.headerRing, offset: 0, size: 48 } }],
    });

    const handle = newHandle();
    customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups });
    return handle;
};

env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handlesCount,
                          gx, gy, gz) => {
    state.dispatchCustomCount = (state.dispatchCustomCount || 0) + 1;
    if (!state.pass) return;
    const pipe = customPipelines.get(pipelineHandle);
    if (!pipe) {
        console.error("[crafter-wgpu] wgpuDispatchCustom: unknown pipeline", pipelineHandle);
        return;
    }

    // Write header (first 48 bytes of push).
    const off = writeHeader(pushPtr);

    state.pass.setPipeline(pipe.pipeline);
    state.pass.setBindGroup(0, pipe.hdrBG, [off]);
    state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1]));

    // Walk bindings in declaration order and assemble bind groups.
    // handles[] from wasm is in the SAME order as customBindings, so we
    // pick up indices by walking byGroup in the same sorted order.
    const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
    let handleIdx = 0;
    for (const g of pipe.sortedGroups) {
        const entries = pipe.byGroup.get(g).map(b => {
            const h = handles[handleIdx++];
            let resource;
            if      (b.kind === 0) resource = { buffer: buffers.get(h) };
            else if (b.kind === 1) resource = textureViews.get(h);
            else if (b.kind === 2) resource = samplers.get(h);
            return { binding: b.binding, resource };
        });
        const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
        state.pass.setBindGroup(g, bg);
    }

    state.pass.dispatchWorkgroups(gx, gy, gz);
    state.outIsPing = !state.outIsPing;
};

// Debug accessor for browser-console diagnostics.
window.crafter_wgpu_state = state;
window.crafter_wgpu_device = device;
window.crafter_wgpu_canvasCtx = ctx;
window.crafter_wgpu_debug = () => ({
    width: state.width, height: state.height,
    outIsPing: state.outIsPing,
    encoderActive: !!state.encoder,
    passActive: !!state.pass,
    bgCacheSize: state.bindGroupCache.size,
    bufferHandles: buffers.size,
    textureHandles: textures.size,
    samplerHandles: samplers.size,
    headerRingOffset: state.headerRingOffset,
    frameBeginCount: state.frameBeginCount || 0,
    frameEndCount: state.frameEndCount || 0,
    dispatchQuadsCount: state.dispatchQuadsCount || 0,
    writeBufferCount: state.writeBufferCount || 0,
    lastWriteHandle: state.lastWriteHandle,
    lastWriteSize: state.lastWriteSize,
});

window.crafter_wgpu_bufferKeys = () => [...buffers.keys()];

// Read back the first QuadItem from a registered buffer to verify the
// GPU sees what the CPU wrote.
window.crafter_wgpu_readBuffer = async (handle, byteSize = 64) => {
    const buf = buffers.get(handle);
    if (!buf) return "no buffer for handle " + handle;
    const read = device.createBuffer({ size: 256, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST });
    const enc = device.createCommandEncoder();
    enc.copyBufferToBuffer(buf, 0, read, 0, byteSize);
    device.queue.submit([enc.finish()]);
    await read.mapAsync(GPUMapMode.READ);
    const data = new Float32Array(read.getMappedRange().slice(0, byteSize));
    read.unmap();
    return [...data];
};

// Surface size getters (the wasm side may query these on Resize events).
env.wgpuSurfaceWidth  = () => state.width || canvas.width;
env.wgpuSurfaceHeight = () => state.height || canvas.height;

// One-shot init: forces ping-pong allocation at current canvas size so
// any Buffer/Texture creation before the first frame works against a
// concrete size. Called by Crafter::Device::Initialize on the wasm side.
env.wgpuInit = () => {
    const { w, h } = syncCanvasSize();
    recreatePingPong(w, h);
};

// Resize listener — wires up to the same `resize` event dom-env.js
// listens to. We trigger sizing on next frame begin; no work here.
window.addEventListener("resize", () => { /* ensureSized in wgpuFrameBegin */ });

console.log("[crafter-wgpu] init complete; env handlers wired");
} catch (e) {
    // Capture any throw so the stub error messages name the real cause
    // instead of "(no error captured)". Re-throw so runtime.js's catch
    // also logs it.
    initError = e instanceof Error ? e : new Error(String(e));
    console.error("[crafter-wgpu] init failed:", initError);
    throw initError;
}
})(); // end window.crafter_webbuild_env_ready