From b5d0f52da0b9a4c170863f215d0289ac4c1602d0 Mon Sep 17 00:00:00 2001
From: Jorijn van der Graaf <jorijnvdgraaf@catcrafts.net>
Date: Tue, 19 May 2026 00:27:09 +0200
Subject: [PATCH] webgpu sponza

---
 additional/dom-webgpu.js                      | 218 ++++++++-
 examples/Sponza/README.md                     |  58 +++
 examples/Sponza/closesthit.glsl               |  23 +
 examples/Sponza/closesthit.wgsl               |  90 ++++
 examples/Sponza/main.cpp                      | 445 ++++++++++++++++++
 examples/Sponza/miss.glsl                     |  11 +
 examples/Sponza/miss.wgsl                     |  16 +
 examples/Sponza/project.cpp                   |  92 ++++
 examples/Sponza/raygen.glsl                   |  52 ++
 examples/Sponza/raygen.wgsl                   | 109 +++++
 .../Crafter.Graphics-Mesh-WebGPU.cpp          |  66 ++-
 .../Crafter.Graphics-PipelineRTWebGPU.cpp     |   9 +-
 ...Crafter.Graphics-DescriptorHeapWebGPU.cppm |  10 +
 interfaces/Crafter.Graphics-Image2D.cppm      | 166 +++++++
 interfaces/Crafter.Graphics-Mesh.cppm         |  10 +
 .../Crafter.Graphics-PipelineRTWebGPU.cppm    |  11 +-
 interfaces/Crafter.Graphics-RTPass.cppm       |  10 +-
 interfaces/Crafter.Graphics-WebGPU.cppm       |  45 +-
 .../Crafter.Graphics-WebGPUComputeShader.cppm |   7 +-
 interfaces/Crafter.Graphics.cppm              |   1 +
 project.cpp                                   |  35 +-
 21 files changed, 1426 insertions(+), 58 deletions(-)
 create mode 100644 examples/Sponza/README.md
 create mode 100644 examples/Sponza/closesthit.glsl
 create mode 100644 examples/Sponza/closesthit.wgsl
 create mode 100644 examples/Sponza/main.cpp
 create mode 100644 examples/Sponza/miss.glsl
 create mode 100644 examples/Sponza/miss.wgsl
 create mode 100644 examples/Sponza/project.cpp
 create mode 100644 examples/Sponza/raygen.glsl
 create mode 100644 examples/Sponza/raygen.wgsl
 create mode 100644 interfaces/Crafter.Graphics-Image2D.cppm
diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js
index 834f320..6e35c21 100644
--- a/additional/dom-webgpu.js
+++ b/additional/dom-webgpu.js
@@ -43,6 +43,8 @@ function stub(name) {
         "wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
         "wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
         "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
+        "wgpuCreateImage2D", "wgpuWriteImage2D",
+        "wgpuCreateImage2DArray", "wgpuWriteImage2DLayer",
         "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
         "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
         "wgpuLoadCustomShader", "wgpuDispatchCustom",
@@ -580,6 +582,99 @@ env.wgpuDestroyTexture = (handle) => {
     if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
 };
 
+// General-purpose 2D rgba8unorm texture, used by Image2D<RGBA8>. Distinct
+// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot
+// upload of a whole image, sized to the pixel data the caller hands over.
+env.wgpuCreateImage2D = (w, h) => {
+    const handle = newHandle();
+    const tex = device.createTexture({
+        size: [w, h],
+        format: "rgba8unorm",
+        usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
+    });
+    textures.set(handle, tex);
+    textureViews.set(handle, tex.createView());
+    return handle;
+};
+// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by
+// Image2DArray<RGBA8> to back one material albedo per layer; shaders
+// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`.
+env.wgpuCreateImage2DArray = (w, h, layerCount) => {
+    const handle = newHandle();
+    const tex = device.createTexture({
+        size: [w, h, layerCount],
+        dimension: "2d",
+        format: "rgba8unorm",
+        usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
+    });
+    textures.set(handle, tex);
+    textureViews.set(handle, tex.createView({
+        dimension: "2d-array",
+        arrayLayerCount: layerCount,
+    }));
+    return handle;
+};
+env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => {
+    const tex = textures.get(handle);
+    if (!tex) return;
+    const srcBPR = w * 4;
+    const alignedBPR = (srcBPR + 255) & ~255;
+    if (alignedBPR === srcBPR) {
+        queue.writeTexture(
+            { texture: tex, origin: [0, 0, layer] },
+            memU8().subarray(srcPtr, srcPtr + byteSize),
+            { bytesPerRow: srcBPR, rowsPerImage: h },
+            { width: w, height: h, depthOrArrayLayers: 1 }
+        );
+    } else {
+        const staging = new Uint8Array(alignedBPR * h);
+        const src = memU8();
+        for (let y = 0; y < h; y++) {
+            staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
+                        y * alignedBPR);
+        }
+        queue.writeTexture(
+            { texture: tex, origin: [0, 0, layer] },
+            staging,
+            { bytesPerRow: alignedBPR, rowsPerImage: h },
+            { width: w, height: h, depthOrArrayLayers: 1 }
+        );
+    }
+};
+
+env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => {
+    const tex = textures.get(handle);
+    if (!tex) return;
+    // queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp
+    // when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4.
+    const srcBPR = w * 4;
+    const alignedBPR = (srcBPR + 255) & ~255;
+    if (alignedBPR === srcBPR) {
+        // Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64).
+        queue.writeTexture(
+            { texture: tex },
+            memU8().subarray(srcPtr, srcPtr + byteSize),
+            { bytesPerRow: srcBPR, rowsPerImage: h },
+            { width: w, height: h }
+        );
+    } else {
+        // Repack into a 256-aligned staging buffer. One alloc per Update,
+        // freed when the function returns — fine for asset-load time use.
+        const staging = new Uint8Array(alignedBPR * h);
+        const src = memU8();
+        for (let y = 0; y < h; y++) {
+            staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
+                        y * alignedBPR);
+        }
+        queue.writeTexture(
+            { texture: tex },
+            staging,
+            { bytesPerRow: alignedBPR, rowsPerImage: h },
+            { width: w, height: h }
+        );
+    }
+};
+
 env.wgpuCreateLinearClampSampler = () => {
     const handle = newHandle();
     samplers.set(handle, device.createSampler({
@@ -756,6 +851,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
                 { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
                 { binding: 6, visibility: GPUShaderStage.COMPUTE,
                   storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+                { binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
             ]})
             : device.createBindGroupLayout({ entries: [
                 { binding: 0, visibility: GPUShaderStage.COMPUTE,
@@ -773,9 +869,10 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
         if (byGroup.has(g)) {
             const entries = byGroup.get(g).map(b => {
                 const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
-                if      (b.kind === 0) e.buffer = { type: "read-only-storage" };
+                if      (b.kind === 0) e.buffer  = { type: "read-only-storage" };
                 else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
                 else if (b.kind === 2) e.sampler = { type: "filtering" };
+                else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
                 return e;
             });
             bgls.push(device.createBindGroupLayout({ entries }));
@@ -839,6 +936,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
                 { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
                 { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
                 { binding: 6, resource: outView },
+                { binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
             ],
         });
         state.pass.setBindGroup(1, rtBG);
@@ -858,6 +956,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
             if      (b.kind === 0) resource = { buffer: buffers.get(h) };
             else if (b.kind === 1) resource = textureViews.get(h);
             else if (b.kind === 2) resource = samplers.get(h);
+            else if (b.kind === 3) resource = textureViews.get(h);
             return { binding: b.binding, resource };
         });
         const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
@@ -981,6 +1080,12 @@ struct BVHNode {
 };
 
 // Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
+// attribsOffset is the per-mesh base index (in u32 words) into the
+// vertexAttribs heap; meshes registered without per-vertex attribs leave
+// it 0 (the heap entries at that range are also 0 / never touched). The
+// per-vertex stride lives in the user's WGSL — the library doesn't store
+// it because the layout is example-defined (Sponza uses 8 u32 / vertex
+// for VertexNormalTangentUVPacked).
 struct MeshRecord {
     rootAabbMin:     vec3<f32>,
     vertexOffset:    u32,
@@ -989,7 +1094,7 @@ struct MeshRecord {
     bvhOffset:       u32,
     primRemapOffset: u32,
     triangleCount:   u32,
-    _pad:            u32,
+    attribsOffset:   u32,
 };
 
 // Per-instance TLAS record built by the TLAS-build compute pass.
@@ -1048,6 +1153,7 @@ const rtWgslMegakernelBindings = String.raw`
 @group(1) @binding(4) var<storage,read>  indices     : array<u32>;
 @group(1) @binding(5) var<storage,read>  primRemap   : array<u32>;
 @group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
+@group(1) @binding(7) var<storage,read>  vertexAttribs : array<u32>;
 `;
 
 const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
@@ -1565,6 +1671,7 @@ const rtState = {
     indexHeap: null,       // u32 stream
     bvhHeap: null,         // BVHNode stream (32 bytes per node)
     primRemapHeap: null,   // u32 stream
+    attribsHeap: null,     // u32 stream (per-vertex attribute payload; example-defined stride)
 
     meshRecordsBuffer: null,    // GPUBuffer of MeshRecord[]
     meshRecordsCapacity: 0,
@@ -1588,6 +1695,7 @@ function rtInit() {
     rtState.indexHeap     = makeRtHeap();
     rtState.bvhHeap       = makeRtHeap();
     rtState.primRemapHeap = makeRtHeap();
+    rtState.attribsHeap   = makeRtHeap();
     rtState.meshRecordsCapacity = 16;
     rtState.meshRecordsBuffer = device.createBuffer({
         size: rtState.meshRecordsCapacity * 48,
@@ -1634,23 +1742,30 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
                             verticesPtr, vertexCount,
                             indicesPtr,  indexCount,
                             bvhNodesPtr, bvhNodeCount,
-                            primRemapPtr, primRemapCount) => {
+                            primRemapPtr, primRemapCount,
+                            attribsPtr,  attribsByteCount) => {
     if (!rtState.vertHeap) rtInit();
+    console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`);
 
     const vBytes   = vertexCount  * 12;
     const iBytes   = indexCount   * 4;
     const nBytes   = bvhNodeCount * 32;
     const rBytes   = primRemapCount * 4;
+    // attribsByteCount must be a multiple of 4 (the heap is array<u32>).
+    // Round up the upload size; the in-MeshRecord offset is in u32 words.
+    const aBytes   = (attribsByteCount + 3) & ~3;
 
     rtHeapEnsure(rtState.vertHeap,      vBytes);
     rtHeapEnsure(rtState.indexHeap,     iBytes);
     rtHeapEnsure(rtState.bvhHeap,       nBytes);
     rtHeapEnsure(rtState.primRemapHeap, rBytes);
+    if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes);
 
     const vOff = rtState.vertHeap.cursor      / 12;   // in vec3 units
     const iOff = rtState.indexHeap.cursor     / 4;    // in u32 units
     const nOff = rtState.bvhHeap.cursor       / 32;   // in BVHNode units
     const rOff = rtState.primRemapHeap.cursor / 4;
+    const aOff = rtState.attribsHeap.cursor   / 4;    // in u32 units
 
     // queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
     // already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
@@ -1662,11 +1777,16 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
                       memU8().buffer, bvhNodesPtr, nBytes);
     queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
                       memU8().buffer, primRemapPtr, rBytes);
+    if (aBytes > 0) {
+        queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor,
+                          memU8().buffer, attribsPtr, aBytes);
+    }
 
     rtState.vertHeap.cursor      += vBytes;
     rtState.indexHeap.cursor     += iBytes;
     rtState.bvhHeap.cursor       += nBytes;
     rtState.primRemapHeap.cursor += rBytes;
+    rtState.attribsHeap.cursor   += aBytes;
 
     const handle = rtState.nextMeshHandle++;
     rtMeshRecordsEnsure(handle + 1);
@@ -1682,7 +1802,7 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
     u32[8] = nOff;
     u32[9] = rOff;
     u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
-    u32[11] = 0;
+    u32[11] = aOff;
     queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
 
     return handle;
@@ -1734,9 +1854,13 @@ env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
 
 // RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
 // switches + raygen + @compute entry) with the library prelude/helpers.
-const rtPipelines = new Map(); // handle → { pipeline, bgls }
+// `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte
+// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the
+// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo
+// textures, samplers). Pass (0, 0) for a pipeline with no user bindings.
+const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups }
 
-env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
+env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
     if (!rtState.vertHeap) rtInit();
     const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
 
@@ -1751,6 +1875,31 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
     }
     const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
 
+    // Parse user bindings (same wire format as wgpuLoadCustomShader).
+    const userBindings = [];
+    if (bindingsCount > 0) {
+        const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
+        for (let i = 0; i < bindingsCount; i++) {
+            const g = dv.getUint8(i*8 + 0);
+            if (g < 2) {
+                console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`);
+                return 0;
+            }
+            userBindings.push({
+                group:      g,
+                binding:    dv.getUint8(i*8 + 1),
+                kind:       dv.getUint8(i*8 + 2),
+                pushOffset: dv.getUint32(i*8 + 4, true),
+            });
+        }
+    }
+    const byGroup = new Map();
+    for (const b of userBindings) {
+        if (!byGroup.has(b.group)) byGroup.set(b.group, []);
+        byGroup.get(b.group).push(b);
+    }
+    const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
+
     let pipeline;
     try {
         const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
@@ -1768,13 +1917,34 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
             { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
             { binding: 6, visibility: GPUShaderStage.COMPUTE,
               storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+            { binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
         ]});
+        // User binding-group layouts. WebGPU pipeline layouts need a
+        // contiguous array up to the highest group used, so pad any gaps
+        // with empty bgls (same rule as wgpuLoadCustomShader).
+        const userBgls = [];
+        const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
+        for (let g = 2; g <= highest; g++) {
+            if (byGroup.has(g)) {
+                const entries = byGroup.get(g).map(b => {
+                    const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
+                    if      (b.kind === 0) e.buffer  = { type: "read-only-storage" };
+                    else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
+                    else if (b.kind === 2) e.sampler = { type: "filtering" };
+                    else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
+                    return e;
+                });
+                userBgls.push(device.createBindGroupLayout({ entries }));
+            } else {
+                userBgls.push(device.createBindGroupLayout({ entries: [] }));
+            }
+        }
         pipeline = device.createComputePipeline({
-            layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }),
+            layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }),
             compute: { module: mod, entryPoint: "main" },
         });
         const handle = newHandle();
-        rtPipelines.set(handle, { pipeline, headerBgl, dataBgl });
+        rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups });
         return handle;
     } catch (e) {
         console.error("[crafter-wgpu] RT pipeline compile failed:", e);
@@ -1784,7 +1954,8 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
 };
 
 env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
-                      tlasBufHandle, instanceCount, gx, gy) => {
+                      tlasBufHandle, instanceCount, gx, gy,
+                      handlesPtr, handlesCount) => {
     if (!state.pass) return;
     const pipe = rtPipelines.get(pipelineHandle);
     const tlas = buffers.get(tlasBufHandle);
@@ -1815,12 +1986,41 @@ env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
             { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
             { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
             { binding: 6, resource: outView },
+            { binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
         ],
     });
 
     state.pass.setPipeline(pipe.pipeline);
     state.pass.setBindGroup(0, headerBg);
     state.pass.setBindGroup(1, dataBg);
+
+    // User bindings: walk byGroup in the same sorted order the C++ side
+    // packed handles[], picking up indices linearly.
+    if (handlesCount > 0) {
+        const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
+        let handleIdx = 0;
+        let bglIdx = 0;
+        for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) {
+            if (pipe.byGroup.has(g)) {
+                const entries = pipe.byGroup.get(g).map(b => {
+                    const h = handles[handleIdx++];
+                    let resource;
+                    if      (b.kind === 0) resource = { buffer: buffers.get(h) };
+                    else if (b.kind === 1) resource = textureViews.get(h);
+                    else if (b.kind === 2) resource = samplers.get(h);
+                    else if (b.kind === 3) resource = textureViews.get(h);
+                    return { binding: b.binding, resource };
+                });
+                const bg = device.createBindGroup({
+                    layout: pipe.userBgls[bglIdx],
+                    entries,
+                });
+                state.pass.setBindGroup(g, bg);
+            }
+            bglIdx++;
+        }
+    }
+
     state.pass.dispatchWorkgroups(gx, gy, 1);
     state.outIsPing = !state.outIsPing;
 };
diff --git a/examples/Sponza/README.md b/examples/Sponza/README.md
new file mode 100644
index 0000000..5545764
--- /dev/null
+++ b/examples/Sponza/README.md
@@ -0,0 +1,58 @@
+# Sponza example
+
+Loads the Sponza atrium as a `.cmesh` + one albedo `.ctex` and renders
+it via ray tracing on both Vulkan (native) and WebGPU (wasm). Same
+`main.cpp`, `#ifdef CRAFTER_GRAPHICS_WINDOW_DOM` selects the backend.
+
+## What this example proves
+
+- `.cmesh` and `.ctex` decompression round-trip on both backends
+  (GPU via `VK_EXT_memory_decompression` on Vulkan, CPU via
+  `Compression::DecompressCPU` on WebGPU).
+- A single texture binding flowing from `Image2D<RGBA8>` through the
+  RT pipeline's closest-hit on both backends. The closest-hit samples
+  at the barycentric attribs as UVs — proof-of-binding, not visually
+  accurate. Per-vertex UV interpolation is the next step.
+
+## Asset fetch
+
+`project.cpp` calls `Crafter::GitFetch(...)` on
+[https://github.com/jimmiebergmann/Sponza](https://github.com/jimmiebergmann/Sponza)
+(pinned to commit `222338979d32f4f4818466291bdbc29f192b86ba`). The
+clone lands in the per-user crafter-build cache; first build pulls
+~280 MB once, subsequent builds reuse it.
+
+`cfg.assets` then picks two files out of that clone:
+
+| Source                                  | Compressed output       |
+|-----------------------------------------|-------------------------|
+| `sponza.obj`                            | `sponza.cmesh`          |
+| `textures/sponza_arch_diff.tga`         | `sponza_arch_diff.ctex` |
+
+Both land flat in the example's bin directory.
+
+## Building
+
+```
+crafter build                          # native Vulkan
+crafter build --target=wasm32-wasip1   # WebGPU / wasm
+```
+
+## License & attribution
+
+Sponza geometry, materials, and textures are licensed under
+[CC BY 3.0](https://creativecommons.org/licenses/by/3.0/).
+
+- **Original model:** Frank Meinl, Crytek (2010).
+- **OBJ packaging / cleanup:** Morgan McGuire, McGuire Computer
+  Graphics Archive — https://casual-effects.com/data.
+- **GitHub mirror used here:** Jimmie Bergmann's roof-material fixup —
+  https://github.com/jimmiebergmann/Sponza.
+
+When redistributing builds of this example that bundle the compressed
+Sponza outputs (`*.cmesh`, `*.ctex`), the CC BY 3.0 attribution
+requirement applies. Quoting the original credit somewhere visible to
+end users (about-screen, credits page, etc.) is enough.
+
+The Crafter.Graphics library code itself is LGPL-3.0; the two
+licenses are compatible for data + code distribution.
diff --git a/examples/Sponza/closesthit.glsl b/examples/Sponza/closesthit.glsl
new file mode 100644
index 0000000..2355d9a
--- /dev/null
+++ b/examples/Sponza/closesthit.glsl
@@ -0,0 +1,23 @@
+#version 460
+#extension GL_EXT_ray_tracing : enable
+#extension GL_EXT_shader_image_load_formatted : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
+#extension GL_EXT_descriptor_heap : enable
+#extension GL_EXT_nonuniform_qualifier : enable
+
+// Specialization constant: descriptor-heap slot of the albedo texture.
+// Set from descriptorHeap.bufferStartElement + the slot allocated for
+// the Image2D<RGBA8> on the host side. Sampling uses gl_HitAttributeEXT
+// barycentrics as UVs — proof-of-binding rather than UV-correct shading.
+// Per-vertex UV interpolation lands when Mesh on Vulkan exposes the
+// data-region buffer.
+layout(constant_id = 0) const uint16_t albedoSlot = 0us;
+layout(descriptor_heap) uniform sampler2D albedo[];
+
+hitAttributeEXT vec2 hitAttrs;
+layout(location = 0) rayPayloadInEXT vec3 hitValue;
+
+void main() {
+    vec2 bary = vec2(hitAttrs.x, hitAttrs.y);
+    hitValue = texture(albedo[albedoSlot], bary).rgb;
+}
diff --git a/examples/Sponza/closesthit.wgsl b/examples/Sponza/closesthit.wgsl
new file mode 100644
index 0000000..9fa15a8
--- /dev/null
+++ b/examples/Sponza/closesthit.wgsl
@@ -0,0 +1,90 @@
+// Payload declared here so the WGSL assembler sees it before raygen
+// (the assembler concatenates closesthit/anyhit/miss BEFORE raygen).
+//
+// WGSL forbids cycles in the function call graph, so closesthit_main
+// CAN'T call traceRay (that would create closesthit → traceRay →
+// runClosestHit → closesthit). The lighting + shadow trace therefore
+// happens in raygen; closesthit's job is just to gather surface data
+// into the payload.
+//
+//   shadowRay = 0 (primary): closesthit fills albedo/worldPos/normal/hit.
+//   shadowRay = 1 (shadow):  closesthit is skipped (RT_FLAG_SKIP_CLOSEST_HIT),
+//                            miss flips color to white = "lit".
+struct Payload {
+    color:       vec3<f32>,
+    shadowRay:   u32,
+    worldPos:    vec3<f32>,
+    hit:         u32,
+    worldNormal: vec3<f32>,
+    _pad:        f32,
+};
+
+// User-bound resources at group(2). Matches the UICustomBinding span the
+// host hands to PipelineRTWebGPU::Init.
+//   binding 0 — albedo texture_2d_array, one layer per Sponza material
+//   binding 1 — sampler (linear clamp)
+//   binding 2 — camera storage buffer (read by raygen only)
+@group(2) @binding(0) var albedos : texture_2d_array<f32>;
+@group(2) @binding(1) var samp    : sampler;
+
+// VertexNormalTangentUVPacked is `packed` on the outer struct but each
+// inner `Vector<float, N, 4>` is SIMD-aligned to a 16-byte stride. So
+// each vertex is 12 u32 words: normal at 0..2, tangent at 4..6, uv at 8..9.
+const ATTRIB_STRIDE_U32:    u32 = 12u;
+const ATTRIB_NORMAL_OFFSET: u32 = 0u;
+const ATTRIB_UV_OFFSET:     u32 = 8u;
+
+fn fetchUV(meshRec: MeshRecord, vertexIdx: u32) -> vec2<f32> {
+    let base = meshRec.attribsOffset + vertexIdx * ATTRIB_STRIDE_U32 + ATTRIB_UV_OFFSET;
+    return vec2<f32>(
+        bitcast<f32>(vertexAttribs[base + 0u]),
+        bitcast<f32>(vertexAttribs[base + 1u]),
+    );
+}
+
+fn fetchNormal(meshRec: MeshRecord, vertexIdx: u32) -> vec3<f32> {
+    let base = meshRec.attribsOffset + vertexIdx * ATTRIB_STRIDE_U32 + ATTRIB_NORMAL_OFFSET;
+    return vec3<f32>(
+        bitcast<f32>(vertexAttribs[base + 0u]),
+        bitcast<f32>(vertexAttribs[base + 1u]),
+        bitcast<f32>(vertexAttribs[base + 2u]),
+    );
+}
+
+fn closesthit_main(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {
+    // Resolve hit triangle → 3 vertex indices.
+    let meshIdx = tlasEntries[hit.instanceId].blasMeshIdx;
+    let meshRec = meshRecords[meshIdx];
+    let baseIdx = meshRec.indexOffset + hit.primitiveId * 3u;
+    let i0 = indices[baseIdx + 0u];
+    let i1 = indices[baseIdx + 1u];
+    let i2 = indices[baseIdx + 2u];
+    let bary = vec3<f32>(1.0 - hit.attribs.x - hit.attribs.y, hit.attribs.x, hit.attribs.y);
+
+    // Albedo via barycentric UV interpolation.
+    let uv0 = fetchUV(meshRec, i0);
+    let uv1 = fetchUV(meshRec, i1);
+    let uv2 = fetchUV(meshRec, i2);
+    let uv  = uv0 * bary.x + uv1 * bary.y + uv2 * bary.z;
+    // OBJ V is bottom-up; sampler is top-down. fract for manual tiling.
+    let uvTiled = vec2<f32>(fract(uv.x), fract(1.0 - uv.y));
+    let layer   = i32(hit.customIndex);
+    let albedo  = textureSampleLevel(albedos, samp, uvTiled, layer, 0.0).rgb;
+
+    // World-space smooth shading normal. Multiply through the
+    // object-to-world rotation so this stays correct if a future scene
+    // rotates instances (Sponza itself is all identities).
+    let n0 = fetchNormal(meshRec, i0);
+    let n1 = fetchNormal(meshRec, i1);
+    let n2 = fetchNormal(meshRec, i2);
+    let nObj = normalize(n0 * bary.x + n1 * bary.y + n2 * bary.z);
+    let nWorld = normalize(vec3<f32>(
+        dot(hit.objectToWorldR0.xyz, nObj),
+        dot(hit.objectToWorldR1.xyz, nObj),
+        dot(hit.objectToWorldR2.xyz, nObj)));
+
+    (*payload).color       = albedo;
+    (*payload).worldPos    = ray.origin + ray.direction * hit.t;
+    (*payload).worldNormal = nWorld;
+    (*payload).hit         = 1u;
+}
diff --git a/examples/Sponza/main.cpp b/examples/Sponza/main.cpp
new file mode 100644
index 0000000..73db99e
--- /dev/null
+++ b/examples/Sponza/main.cpp
@@ -0,0 +1,445 @@
+// Sponza on Vulkan + WebGPU. Same example source, two backends — picked
+// by CRAFTER_GRAPHICS_WINDOW_DOM. Both paths:
+//   1. Load a Sponza .cmesh (positions + indices, optional per-vertex
+//      data region) and a single albedo .ctex from disk. The source
+//      assets are fetched once by project.cpp (Crafter.Build::GitFetch)
+//      from https://github.com/jimmiebergmann/Sponza and compressed
+//      into the bin dir at build time — they don't live in this repo.
+//   2. Build BLAS + TLAS via the existing Mesh / RenderingElement3D
+//      flow. The on-disk format is identical between backends; only
+//      the decompression path differs (VK_EXT_memory_decompression
+//      on Vulkan, CPU GDeflate on WebGPU).
+//   3. Upload the albedo as Image2D<RGBA8>, register it in the
+//      backend descriptor heap, and run the RT pipeline. Closest-hit
+//      shaders sample the texture at the hit's barycentric coords —
+//      proof-of-binding rather than UV-correct shading. Per-vertex
+//      UV interpolation is follow-up work (the attribs heap is in
+//      place on WebGPU; the Vulkan side needs a sibling data buffer
+//      exposed off Mesh).
+//
+// Sponza model: CC BY 3.0 — Frank Meinl (Crytek), packaged by Jimmie
+// Bergmann and Morgan McGuire. https://casual-effects.com/data
+
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
+#include "vulkan/vulkan.h"
+#endif
+
+import Crafter.Graphics;
+import Crafter.Asset;
+import Crafter.Math;
+import Crafter.Event;
+import std;
+
+using namespace Crafter;
+namespace fs = std::filesystem;
+
+namespace {
+    struct RGBA8 { std::uint8_t r, g, b, a; };
+
+    void RequireAssets(const fs::path& mesh, const fs::path& tex) {
+        const bool haveMesh = fs::exists(mesh);
+        const bool haveTex  = fs::exists(tex);
+        if (haveMesh && haveTex) return;
+        std::println(std::cerr,
+            "[Sponza] missing asset(s):\n"
+            "  mesh:    {} {}\n"
+            "  albedo:  {} {}\n"
+            "The build should have populated these via cfg.assets +\n"
+            "GitFetch (see examples/Sponza/project.cpp). If you ran\n"
+            "the binary from outside its bin dir, cd into the bin dir\n"
+            "first — asset paths are relative to cwd.",
+            mesh.string(), haveMesh ? "OK" : "MISSING",
+            tex.string(),  haveTex  ? "OK" : "MISSING");
+        std::abort();
+    }
+}
+
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
+int main() {
+    // Native Vulkan path is single-material for now (see file header) —
+    // pick up just the first per-material output the build emits. The
+    // WebGPU branch below uses every mesh + a texture array.
+    const fs::path meshPath = "mesh_0.cmesh";
+    const fs::path texPath  = "tex_0.ctex";
+    RequireAssets(meshPath, texPath);
+
+    CompressedMeshAsset    loadedMesh = LoadCompressedMesh(meshPath);
+    CompressedTextureAsset loadedTex  = LoadCompressedTexture(texPath);
+    std::println("[Sponza] loaded {} verts, {} idx, {}x{} albedo",
+                 loadedMesh.vertexCount, loadedMesh.indexCount,
+                 loadedTex.sizeX, loadedTex.sizeY);
+
+    Device::Initialize();
+    Window window(1280, 720, "Sponza");
+    VkCommandBuffer cmd = window.StartInit();
+
+    DescriptorHeapVulkan descriptorHeap;
+    descriptorHeap.Initialize(/*images*/ 2, /*buffers*/ 1, /*samplers*/ 0);
+
+    // Two specialization constants: the TLAS slot offset (shared with
+    // VulkanTriangle pattern) and the albedo slot index for closesthit.
+    VkSpecializationMapEntry raygenEntry = { .constantID = 0, .offset = 0, .size = sizeof(std::uint16_t) };
+    VkSpecializationInfo raygenSpec = {
+        .mapEntryCount = 1, .pMapEntries = &raygenEntry,
+        .dataSize = sizeof(std::uint16_t), .pData = &descriptorHeap.bufferStartElement,
+    };
+
+    // Allocate the albedo slot first so its index is known when we
+    // compile closesthit.spv.
+    auto imgSlots    = descriptorHeap.AllocateImageSlots(2);
+    auto bufSlots    = descriptorHeap.AllocateBufferSlots(1);
+    std::uint16_t albedoHeapSlot = static_cast<std::uint16_t>(imgSlots.firstElement + 1);
+
+    VkSpecializationMapEntry hitEntry = { .constantID = 0, .offset = 0, .size = sizeof(std::uint16_t) };
+    VkSpecializationInfo hitSpec = {
+        .mapEntryCount = 1, .pMapEntries = &hitEntry,
+        .dataSize = sizeof(std::uint16_t), .pData = &albedoHeapSlot,
+    };
+
+    std::array<VulkanShader, 3> shaders {{
+        { "raygen.spv",     "main", VK_SHADER_STAGE_RAYGEN_BIT_KHR,      &raygenSpec },
+        { "miss.spv",       "main", VK_SHADER_STAGE_MISS_BIT_KHR,        nullptr     },
+        { "closesthit.spv", "main", VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, &hitSpec    },
+    }};
+    ShaderBindingTableVulkan shaderTable;
+    shaderTable.Init(shaders);
+
+    std::array<VkRayTracingShaderGroupCreateInfoKHR, 1> raygenGroups {{ {
+        .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
+        .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
+        .generalShader = 0, .closestHitShader = VK_SHADER_UNUSED_KHR,
+        .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR,
+    } }};
+    std::array<VkRayTracingShaderGroupCreateInfoKHR, 1> missGroups {{ {
+        .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
+        .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
+        .generalShader = 1, .closestHitShader = VK_SHADER_UNUSED_KHR,
+        .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR,
+    } }};
+    std::array<VkRayTracingShaderGroupCreateInfoKHR, 1> hitGroups {{ {
+        .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
+        .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR,
+        .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = 2,
+        .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR,
+    } }};
+
+    PipelineRTVulkan pipeline;
+    pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, shaderTable);
+
+    Mesh sponzaMesh;
+    sponzaMesh.Build(loadedMesh, cmd);
+
+    Image2D<RGBA8> albedo;
+    albedo.Create(loadedTex.sizeX, loadedTex.sizeY, /*mipLevels*/ 1, cmd,
+                  VK_FORMAT_R8G8B8A8_UNORM,
+                  VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+                  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    albedo.Update(loadedTex, cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    SamplerVulkan<RGBA8> sampler;
+
+    static RenderingElement3D renderer;
+    renderer.instance = {
+        .transform                              = {},
+        .instanceCustomIndex                    = 0,
+        .mask                                   = 0xFF,
+        .instanceShaderBindingTableRecordOffset = 0,
+        .flags                                  = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR,
+        .accelerationStructureReference         = sponzaMesh.blasAddr,
+    };
+    MatrixRowMajor<float, 4, 3, 1>::Identity()
+        .Store(reinterpret_cast<float*>(renderer.instance.transform.matrix));
+    RenderingElement3D::elements.emplace_back(&renderer);
+    RenderingElement3D::BuildTLAS(cmd, 0);
+    RenderingElement3D::BuildTLAS(cmd, 1);
+    RenderingElement3D::BuildTLAS(cmd, 2);
+
+    window.FinishInit();
+
+    // Write descriptors: TLAS at bufSlots[0], output image at imgSlots[0],
+    // albedo (combined image+sampler) at imgSlots[1]. Per-frame replicated.
+    VkDeviceAddressRangeKHR tlasRanges[Window::numFrames];
+    VkImageDescriptorInfoEXT outImgInfos[Window::numFrames];
+    VkDescriptorImageInfo albedoInfo {
+        .sampler = sampler.textureSampler,
+        .imageView = albedo.imageView,
+        .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+    };
+    for (std::uint32_t f = 0; f < Window::numFrames; ++f) {
+        tlasRanges[f] = { .address = RenderingElement3D::tlases[f].address };
+        outImgInfos[f] = {
+            .sType = VK_STRUCTURE_TYPE_IMAGE_DESCRIPTOR_INFO_EXT,
+            .pView = &window.imageViews[f],
+            .layout = VK_IMAGE_LAYOUT_GENERAL,
+        };
+    }
+
+    std::vector<VkResourceDescriptorInfoEXT> resources;
+    std::vector<VkHostAddressRangeEXT>       destinations;
+    resources.reserve(Window::numFrames * 3);
+    destinations.reserve(Window::numFrames * 3);
+    for (std::uint32_t f = 0; f < Window::numFrames; ++f) {
+        resources.push_back({
+            .sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT,
+            .type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
+            .data = { .pAddressRange = &tlasRanges[f] },
+        });
+        destinations.push_back({
+            .address = descriptorHeap.resourceHeap[f].value
+                     + descriptorHeap.BufferByteOffset(bufSlots.firstElement),
+            .size = Device::descriptorHeapProperties.bufferDescriptorSize,
+        });
+        resources.push_back({
+            .sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT,
+            .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .data = { .pImage = &outImgInfos[f] },
+        });
+        destinations.push_back({
+            .address = descriptorHeap.resourceHeap[f].value
+                     + descriptorHeap.ImageByteOffset(imgSlots.firstElement),
+            .size = Device::descriptorHeapProperties.imageDescriptorSize,
+        });
+        resources.push_back({
+            .sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT,
+            .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .data = { .pCombinedImageSampler = &albedoInfo },
+        });
+        destinations.push_back({
+            .address = descriptorHeap.resourceHeap[f].value
+                     + descriptorHeap.ImageByteOffset(albedoHeapSlot),
+            .size = Device::descriptorHeapProperties.imageDescriptorSize,
+        });
+    }
+    Device::vkWriteResourceDescriptorsEXT(Device::device,
+        static_cast<std::uint32_t>(resources.size()),
+        resources.data(), destinations.data());
+    for (std::uint32_t f = 0; f < Window::numFrames; ++f) {
+        descriptorHeap.resourceHeap[f].FlushDevice();
+    }
+
+    window.descriptorHeap = &descriptorHeap;
+    RTPass rtPass(&pipeline);
+    window.passes.push_back(&rtPass);
+
+    window.Render();
+    window.StartSync();
+    return 0;
+}
+#else
+int main() {
+    // ── Read scene manifest (produced by project.cpp's ImportSponzaBundle).
+    //
+    //   line 1: albedoCount
+    //   line 2: meshCount
+    //   line 3..: per-mesh albedoIdx (-1 means "no albedo")
+    const fs::path manifestPath = "scene.txt";
+    if (!fs::exists(manifestPath)) {
+        std::println(std::cerr,
+            "[Sponza] missing scene.txt — the build should have produced "
+            "it (see examples/Sponza/project.cpp). If you ran the binary "
+            "from outside its bin dir, cd in first.");
+        std::abort();
+    }
+    std::ifstream manifest(manifestPath);
+    std::uint32_t albedoCount = 0, meshCount = 0;
+    manifest >> albedoCount >> meshCount;
+    std::vector<std::int32_t> meshAlbedo(meshCount);
+    for (std::uint32_t i = 0; i < meshCount; ++i) manifest >> meshAlbedo[i];
+    std::println("[Sponza] scene: {} albedos, {} meshes", albedoCount, meshCount);
+
+    Device::Initialize();
+    static Window window(1280, 720, "Sponza");
+    auto cmd = window.StartInit();
+
+    DescriptorHeapWebGPU heap;
+    heap.Initialize(/*images*/ 2, /*buffers*/ 2, /*samplers*/ 2);
+
+    std::array<WebGPUShader, 3> shaders {{
+        WebGPUShader(fs::path("raygen.wgsl"),     "raygen_main",     WebGPURTStage::Raygen),
+        WebGPUShader(fs::path("miss.wgsl"),       "miss_main",       WebGPURTStage::Miss),
+        WebGPUShader(fs::path("closesthit.wgsl"), "closesthit_main", WebGPURTStage::ClosestHit),
+    }};
+    ShaderBindingTableWebGPU sbt;
+    sbt.Init(shaders);
+
+    std::array<RTShaderGroup, 1> raygenGroups {{
+        { .type = RTShaderGroupType::General,           .generalShader = 0 },
+    }};
+    std::array<RTShaderGroup, 1> missGroups {{
+        { .type = RTShaderGroupType::General,           .generalShader = 1 },
+    }};
+    std::array<RTShaderGroup, 1> hitGroups {{
+        { .type = RTShaderGroupType::TrianglesHitGroup, .closestHitShader = 2 },
+    }};
+
+    // Three user bindings at @group(2):
+    //   binding 0 — albedo texture_2d_array (one layer per material)
+    //   binding 1 — sampler (linear clamp)
+    //   binding 2 — Camera storage buffer (host-driven, updated per frame)
+    std::array<UICustomBinding, 3> bindings {{
+        { .group = 2, .binding = 0, .kind = UICustomBindingKind::SampledTextureArray, ._pad = 0, .pushOffset = 0 },
+        { .group = 2, .binding = 1, .kind = UICustomBindingKind::Sampler,             ._pad = 0, .pushOffset = 0 },
+        { .group = 2, .binding = 2, .kind = UICustomBindingKind::Buffer,              ._pad = 0, .pushOffset = 0 },
+    }};
+
+    PipelineRTWebGPU pipeline;
+    pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, sbt, bindings);
+
+    // ── Albedo texture array — one rgba8unorm layer per material. ──────
+    //
+    // Probe layer 0 for the canonical layer dimensions; project.cpp
+    // already resized every albedo to the same square so any tex_N.ctex
+    // would do, layer 0 is just the first one we have.
+    Image2DArray<RGBA8> albedoArray;
+    {
+        CompressedTextureAsset probe = LoadCompressedTexture("tex_0.ctex");
+        albedoArray.Create(probe.sizeX, probe.sizeY, static_cast<std::uint16_t>(albedoCount));
+        albedoArray.UpdateLayer(0, probe);
+        for (std::uint32_t i = 1; i < albedoCount; ++i) {
+            CompressedTextureAsset tex = LoadCompressedTexture(std::format("tex_{}.ctex", i));
+            albedoArray.UpdateLayer(static_cast<std::uint16_t>(i), tex);
+        }
+    }
+    auto albedoArraySlot = albedoArray.AllocateSlot(heap);
+    SamplerSlot samplerSlot = AllocateLinearClampSampler(heap);
+
+    // Camera storage buffer — host writes (origin, right, up, forward,
+    // aspect, tanHalf) every frame from the input-driven free camera
+    // below. Layout matches the WGSL Camera struct in raygen.wgsl
+    // (vec3-aligned, std430). 64 bytes total.
+    struct CameraGPU {
+        float origin[3];   float pad0;
+        float right[3];    float tanHalf;
+        float up[3];       float aspect;
+        float forward[3];  float pad1;
+    };
+    static_assert(sizeof(CameraGPU) == 64);
+    WebGPUBuffer<CameraGPU, true> cameraBuf;
+    cameraBuf.Create(1);
+
+    // Handle array fed to RTPass — order matches the bindings declaration.
+    static std::array<std::uint32_t, 3> userHandles {
+        heap.imageTable  [albedoArraySlot.firstElement],
+        heap.samplerTable[samplerSlot.firstElement],
+        cameraBuf.handle,
+    };
+
+    // ── Meshes + scene instances ───────────────────────────────────────
+    //
+    // One Mesh + one RenderingElement3D per material group from
+    // scene.txt. Meshes whose albedoIdx is -1 (the .obj's `usemtl` named
+    // something without a map_Kd in .mtl) get dropped — they're rare in
+    // Sponza and we'd have nothing to sample for them anyway.
+    //
+    // Vector capacity is reserved up-front: RenderingElement3D::Add
+    // takes a pointer that's stored in the static elements[] vector, so
+    // any later vector reallocation would dangle those pointers.
+    static std::vector<Mesh> meshes;
+    static std::vector<RenderingElement3D> renderers;
+    meshes.reserve(meshCount);
+    renderers.reserve(meshCount);
+
+    for (std::uint32_t i = 0; i < meshCount; ++i) {
+        if (meshAlbedo[i] < 0) continue;
+        CompressedMeshAsset loaded = LoadCompressedMesh(std::format("mesh_{}.cmesh", i));
+        meshes.emplace_back();
+        meshes.back().Build(loaded, cmd);
+
+        renderers.emplace_back();
+        RenderingElement3D& r = renderers.back();
+        auto& tx = r.instance.transform.matrix;
+        tx[0][0] = 1; tx[0][1] = 0; tx[0][2] = 0; tx[0][3] = 0;
+        tx[1][0] = 0; tx[1][1] = 1; tx[1][2] = 0; tx[1][3] = 0;
+        tx[2][0] = 0; tx[2][1] = 0; tx[2][2] = 1; tx[2][3] = 0;
+        // 24-bit instanceCustomIndex carries the albedo array layer that
+        // closesthit.wgsl reads as `hit.customIndex`.
+        r.instance.instanceCustomIndex                    = static_cast<std::uint32_t>(meshAlbedo[i]);
+        r.instance.mask                                   = 0xFF;
+        r.instance.instanceShaderBindingTableRecordOffset = 0;
+        r.instance.flags                                  = kRTGeometryInstanceForceOpaque;
+        r.instance.accelerationStructureReference         = meshes.back().blasAddr;
+        RenderingElement3D::Add(&r);
+    }
+    RenderingElement3D::BuildTLAS(cmd, 0);
+
+    window.descriptorHeap = &heap;
+    window.FinishInit();
+
+    RTPass rtPass(&pipeline);
+    rtPass.handlesPtr   = userHandles.data();
+    rtPass.handlesCount = static_cast<std::uint32_t>(userHandles.size());
+    window.passes.push_back(&rtPass);
+
+    // ── Free camera: WASD + mouse-delta look ───────────────────────────
+    //
+    // Initial pose puts the camera near one end of the atrium at eye
+    // height, looking +X down the long axis (bbox: X[-1921..1800],
+    // Y[-126..1429], Z[-1183..1105]). The user can fine-tune from there.
+    struct CamState {
+        Vector<float, 3, 4> position{ -1500.0f, 200.0f, 0.0f };
+        float yaw   = 0.0f;   // radians, around world +Y
+        float pitch = 0.0f;   // radians, +pitch looks up
+    } cam;
+
+    Input::Map inputMap;
+    Input::Action& moveAct = inputMap.AddAction("Move", Input::ActionType::Vector2);
+    Input::Action& lookAct = inputMap.AddAction("Look", Input::ActionType::Vector2);
+    moveAct.bindings = {
+        Input::WASDBind{
+            Key(CrafterKeys::W), Key(CrafterKeys::S),
+            Key(CrafterKeys::A), Key(CrafterKeys::D),
+        },
+    };
+    lookAct.bindings = {
+        Input::MouseDeltaBind{ 1.0f },
+    };
+    inputMap.Attach(window);
+
+    constexpr float kMoveSpeed = 1200.0f;  // Sponza units / second (room is ~3700 wide)
+    constexpr float kLookSens  = 0.05f;   // radians per mouse pixel
+    constexpr float kDt        = 1.0f / 60.0f;
+
+    EventListener<void> camTick(&window.onBeforeUpdate, [&]() {
+        inputMap.Tick();
+
+        cam.yaw   += lookAct.vector2.x * kLookSens;
+        cam.pitch -= lookAct.vector2.y * kLookSens;
+        // Keep pitch just shy of straight up/down so the basis vectors
+        // don't collapse (cross(forward, world_up) would go zero).
+        cam.pitch = std::clamp(cam.pitch, -1.55f, 1.55f);
+
+        const float cp = std::cos(cam.pitch), sp = std::sin(cam.pitch);
+        const float cy = std::cos(cam.yaw),   sy = std::sin(cam.yaw);
+        Vector<float, 3, 4> forward { cp * cy, sp, cp * sy };
+        Vector<float, 3, 4> worldUp { 0.0f, 1.0f, 0.0f };
+        Vector<float, 3, 4> right { forward.y * worldUp.z - forward.z * worldUp.y,
+                                    forward.z * worldUp.x - forward.x * worldUp.z,
+                                    forward.x * worldUp.y - forward.y * worldUp.x };
+        const float rLen = std::sqrt(right.x*right.x + right.y*right.y + right.z*right.z);
+        right.x /= rLen; right.y /= rLen; right.z /= rLen;
+        Vector<float, 3, 4> up { right.y * forward.z - right.z * forward.y,
+                                 right.z * forward.x - right.x * forward.z,
+                                 right.x * forward.y - right.y * forward.x };
+
+        const float dx = moveAct.vector2.x * kMoveSpeed * kDt;
+        const float dy = moveAct.vector2.y * kMoveSpeed * kDt;
+        cam.position.x += right.x * dx + forward.x * dy;
+        cam.position.y += right.y * dx + forward.y * dy;
+        cam.position.z += right.z * dx + forward.z * dy;
+
+        CameraGPU& g  = cameraBuf.value[0];
+        g.origin[0]   = cam.position.x; g.origin[1]   = cam.position.y; g.origin[2]   = cam.position.z; g.pad0 = 0.0f;
+        g.right[0]    = right.x;        g.right[1]    = right.y;        g.right[2]    = right.z;
+        g.up[0]       = up.x;           g.up[1]       = up.y;           g.up[2]       = up.z;
+        g.forward[0]  = forward.x;      g.forward[1]  = forward.y;      g.forward[2]  = forward.z;
+        g.aspect      = float(window.width) / float(window.height);
+        g.tanHalf     = std::tan(70.0f * 3.14159265f / 360.0f);
+        g.pad1        = 0.0f;
+        cameraBuf.FlushDevice();
+    });
+
+    window.Render();
+    window.StartUpdate();
+    window.StartSync();
+    return 0;
+}
+#endif
diff --git a/examples/Sponza/miss.glsl b/examples/Sponza/miss.glsl
new file mode 100644
index 0000000..909d9ca
--- /dev/null
+++ b/examples/Sponza/miss.glsl
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_EXT_ray_tracing : enable
+
+layout(location = 0) rayPayloadInEXT vec3 hitValue;
+
+void main() {
+    // Soft sky gradient based on ray direction Y. The actual ray dir
+    // isn't accessible without an extra payload field; use a flat warm
+    // tone that matches Sponza's interior lighting.
+    hitValue = vec3(0.10, 0.08, 0.06);
+}
diff --git a/examples/Sponza/miss.wgsl b/examples/Sponza/miss.wgsl
new file mode 100644
index 0000000..39ff71d
--- /dev/null
+++ b/examples/Sponza/miss.wgsl
@@ -0,0 +1,16 @@
+fn miss_main(ray: RayDesc, payload: ptr<function, Payload>) {
+    if ((*payload).shadowRay == 1u) {
+        // Shadow ray escaped to infinity — the sun is visible from the
+        // origin, so the surface there should pick up full direct light.
+        // raygen reads color.x as the visibility coefficient.
+        (*payload).color = vec3<f32>(1.0);
+        return;
+    }
+
+    // Primary miss: cheap two-stop sky gradient. (*payload).hit stays 0
+    // so raygen knows to skip the lighting path and just use this color.
+    let t = clamp(ray.direction.y * 0.5 + 0.5, 0.0, 1.0);
+    let sky    = vec3<f32>(0.45, 0.65, 0.95);
+    let zenith = vec3<f32>(0.95, 0.85, 0.65);
+    (*payload).color = mix(sky, zenith, t);
+}
diff --git a/examples/Sponza/project.cpp b/examples/Sponza/project.cpp
new file mode 100644
index 0000000..b850c90
--- /dev/null
+++ b/examples/Sponza/project.cpp
@@ -0,0 +1,92 @@
+import std;
+import Crafter.Build;
+namespace fs = std::filesystem;
+using namespace Crafter;
+
+// Sponza geometry + albedo: CC BY 3.0, Frank Meinl (Crytek), packaged by
+// Jimmie Bergmann (https://github.com/jimmiebergmann/Sponza) and Morgan
+// McGuire (https://casual-effects.com/data). The full asset bundle is
+// ~280 MB — too large to live in this repo. GitFetch lands it in the
+// per-user crafter-build cache on first build and reuses thereafter.
+constexpr std::string_view kSponzaGitUrl    = "https://github.com/jimmiebergmann/Sponza.git";
+constexpr std::string_view kSponzaCommitSHA = "222338979d32f4f4818466291bdbc29f192b86ba";
+// Every albedo is normalized to this size so they can live as layers of
+// one texture_2d_array on the GPU (WebGPU array textures require
+// identical layer dimensions). 1024 matches the majority of Sponza's
+// textures; the few outliers (256×1024 chain, 512² thorn, 2048² curtains)
+// get bilinear-resized via stb_image_resize2.
+constexpr std::uint16_t    kAlbedoSize      = 1024u;
+
+extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> args) {
+    bool isWasm = false;
+    for (std::string_view a : args) {
+        if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) {
+            isWasm = true;
+            break;
+        }
+    }
+
+    std::vector<std::string> graphicsArgs(args.begin(), args.end());
+    Configuration* graphics = LocalProject({
+        .projectFile = "../../project.cpp",
+        .args = graphicsArgs,
+    });
+
+    Configuration cfg;
+    cfg.path = "./";
+    cfg.name = "Sponza";
+    cfg.outputName = "Sponza";
+    cfg.type = ConfigurationType::Executable;
+    if (isWasm) {
+        cfg.target = "wasm32-wasip1";
+        cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_DOM", ""});
+        cfg.compileFlags.push_back("-msimd128");
+    }
+    ApplyStandardArgs(cfg, args);
+    cfg.dependencies = { graphics };
+
+    std::array<fs::path, 0> ifaces = {};
+    std::array<fs::path, 1> impls = { "main" };
+    cfg.GetInterfacesAndImplementations(ifaces, impls);
+
+    // Fetch Sponza once into the shared crafter-build cache, then process
+    // it into a per-material bundle under build/sponza-bundle-<hash>/.
+    // Hashing on (sha, albedoSize) so changing either invalidates the
+    // bundle without touching the rest of the example's build tree.
+    fs::path sponzaRoot = GitFetch({
+        .url    = std::string(kSponzaGitUrl),
+        .commit = std::string(kSponzaCommitSHA),
+    });
+    std::string bundleKey = std::format("{}|{}", kSponzaCommitSHA, kAlbedoSize);
+    auto bundleHash = std::hash<std::string>{}(bundleKey);
+    fs::path bundleDir = fs::path("build") / std::format("sponza-bundle-{:016x}", bundleHash);
+
+    if (auto err = BuildOBJBundle(
+            sponzaRoot / "sponza.obj",
+            sponzaRoot / "sponza.mtl",
+            bundleDir,
+            kAlbedoSize); !err.empty()) {
+        std::println(std::cerr, "Sponza bundle error: {}", err);
+        std::exit(1);
+    }
+
+    // Forward every produced file (.cmesh, .ctex, scene.txt) as a
+    // passthrough — they're already compressed by Crafter.Asset, no
+    // further compression needed. cfg.files copies them flat into
+    // the executable's bin dir.
+    for (const auto& entry : fs::directory_iterator(bundleDir)) {
+        if (entry.is_regular_file()) cfg.files.push_back(entry.path());
+    }
+
+    if (isWasm) {
+        cfg.files.emplace_back(fs::path("raygen.wgsl"));
+        cfg.files.emplace_back(fs::path("closesthit.wgsl"));
+        cfg.files.emplace_back(fs::path("miss.wgsl"));
+        EnableWasiBrowserRuntime(cfg);
+    } else {
+        cfg.shaders.emplace_back(fs::path("raygen.glsl"),     std::string("main"), ShaderType::RayGen);
+        cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit);
+        cfg.shaders.emplace_back(fs::path("miss.glsl"),       std::string("main"), ShaderType::Miss);
+    }
+    return cfg;
+}
diff --git a/examples/Sponza/raygen.glsl b/examples/Sponza/raygen.glsl
new file mode 100644
index 0000000..7eafa4b
--- /dev/null
+++ b/examples/Sponza/raygen.glsl
@@ -0,0 +1,52 @@
+#version 460
+#extension GL_EXT_ray_tracing : enable
+#extension GL_EXT_shader_image_load_formatted : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
+#extension GL_EXT_descriptor_heap : enable
+#extension GL_EXT_nonuniform_qualifier : enable
+
+// Specialization constant set from descriptorHeap.bufferStartElement —
+// shared with closesthit.glsl. The TLAS lives at descriptor_heap slot
+// `bufferStart` (it's an SSBO-typed entry), the per-frame output image
+// at heap slot 0.
+layout(constant_id = 0) const uint16_t bufferStart = 0us;
+layout(descriptor_heap) uniform accelerationStructureEXT topLevelAS[];
+layout(descriptor_heap) uniform writeonly image2D image[];
+
+layout(location = 0) rayPayloadEXT vec3 hitValue;
+
+void main() {
+    uvec2 pixel      = gl_LaunchIDEXT.xy;
+    uvec2 resolution = gl_LaunchSizeEXT.xy;
+    vec2  uv         = (vec2(pixel) + 0.5) / vec2(resolution);
+    vec2  ndc        = uv * 2.0 - 1.0;
+
+    // Camera positioned to look down the Sponza atrium axis. Sponza-OBJ
+    // from McGuire's archive is roughly 30 units wide × 13 tall × 18 deep,
+    // axis-aligned, with the floor near y=0 and the atrium centered on
+    // origin. -X faces the long end, so we sit inside looking +X.
+    vec3  origin    = vec3(-10.0, 5.0, 0.0);
+    float aspect    = float(resolution.x) / float(resolution.y);
+    float fov       = radians(70.0);
+    float tanHalf   = tan(fov * 0.5);
+    vec3  direction = normalize(vec3(
+        ndc.x * aspect * tanHalf,
+        -ndc.y * tanHalf,
+        1.0));
+
+    // Rotate +Z forward → +X forward (90° about Y).
+    direction = vec3(direction.z, direction.y, -direction.x);
+
+    traceRayEXT(
+        topLevelAS[bufferStart],
+        gl_RayFlagsNoneEXT,
+        0xff,
+        0, 0, 0,
+        origin,
+        0.001,
+        direction,
+        10000.0,
+        0);
+
+    imageStore(image[0], ivec2(pixel), vec4(hitValue, 1.0));
+}
diff --git a/examples/Sponza/raygen.wgsl b/examples/Sponza/raygen.wgsl
new file mode 100644
index 0000000..a90ca84
--- /dev/null
+++ b/examples/Sponza/raygen.wgsl
@@ -0,0 +1,109 @@
+// WebGPU raygen. Camera state comes from the host every frame via a
+// storage buffer bound at @group(2) @binding(2); main.cpp drives that
+// from WASD + mouse-delta through Crafter::Input.
+//
+// The shading + shadow trace all happens here because WGSL forbids
+// recursive function call graphs — closesthit_main can't call traceRay
+// (that would loop closesthit → traceRay → runClosestHit → closesthit).
+// Raygen is the entry point and not called by anyone, so it can call
+// traceRay twice (once primary, once shadow) without forming a cycle.
+
+struct Camera {
+    origin:  vec3<f32>,
+    pad0:    f32,
+    right:   vec3<f32>,
+    tanHalf: f32,
+    up:      vec3<f32>,
+    aspect:  f32,
+    forward: vec3<f32>,
+    pad1:    f32,
+};
+@group(2) @binding(2) var<storage, read> camera : Camera;
+
+// Sun coming through Sponza's open roof. Y is up; this points "down and
+// slightly along +X" so the light grazes the colonnades on one side.
+const SUN_DIR_TO_LIGHT: vec3<f32> = vec3<f32>(-0.35,  1.00, -0.20);
+const SUN_COLOR:        vec3<f32> = vec3<f32>( 1.10,  1.00,  0.85);
+const AMBIENT_COLOR:    vec3<f32> = vec3<f32>( 0.18,  0.20,  0.28);
+
+fn raygen_main(gid: vec3<u32>) {
+    if (gid.x >= hdr.surfaceW || gid.y >= hdr.surfaceH) { return; }
+
+    let pixel      = vec2<f32>(f32(gid.x), f32(gid.y));
+    let resolution = vec2<f32>(f32(hdr.surfaceW), f32(hdr.surfaceH));
+    let uv         = (pixel + vec2<f32>(0.5)) / resolution;
+    let ndc        = uv * 2.0 - vec2<f32>(1.0);
+
+    // Pinhole camera reconstructed from the host basis. ndc.x runs left-
+    // to-right across the screen → +right; ndc.y is top-down so we
+    // negate before applying +up.
+    let direction = normalize(
+        camera.right   * (ndc.x  * camera.aspect * camera.tanHalf) +
+        camera.up      * (-ndc.y * camera.tanHalf) +
+        camera.forward);
+
+    // ── Primary ray ────────────────────────────────────────────────────
+    var payload: Payload;
+    payload.color     = vec3<f32>(0.0);
+    payload.shadowRay = 0u;
+    payload.hit       = 0u;
+
+    traceRay(
+        0u, 0u, 0xFFu,
+        0u, 0u, 0u,
+        camera.origin, 0.001,
+        direction,     10000.0,
+        &payload);
+
+    var finalColor: vec3<f32>;
+    if (payload.hit == 1u) {
+        // Closesthit filled albedo/worldPos/worldNormal. Two-sided
+        // shading: flip the normal toward the camera if we hit the back
+        // face — Sponza's curtains in particular have inconsistent
+        // winding, and without this half the surface would go black.
+        let albedo = payload.color;
+        let nFacing = select(-payload.worldNormal,
+                              payload.worldNormal,
+                              dot(payload.worldNormal, direction) < 0.0);
+        let lightDir = normalize(SUN_DIR_TO_LIGHT);
+        let nDotL    = max(0.0, dot(nFacing, lightDir));
+
+        // ── Shadow ray ────────────────────────────────────────────────
+        // Only worth tracing if the surface faces the sun at all.
+        var visibility = 0.0;
+        if (nDotL > 0.0) {
+            // Normal-offset bias on Sponza's units (~3700 wide atrium)
+            // is hefty; 0.5 keeps the shadow ray clear of the originating
+            // triangle without producing visible "floating" shadows.
+            let shadowOrigin = payload.worldPos + nFacing * 0.5;
+
+            var shadowPayload: Payload;
+            shadowPayload.color     = vec3<f32>(0.0);  // default: blocked
+            shadowPayload.shadowRay = 1u;
+            shadowPayload.hit       = 0u;
+            traceRay(
+                0u,
+                RT_FLAG_SKIP_CLOSEST_HIT | RT_FLAG_TERMINATE_ON_FIRST_HIT,
+                0xFFu,
+                0u, 0u, 0u,
+                shadowOrigin, 0.001,
+                lightDir,     10000.0,
+                &shadowPayload);
+            visibility = shadowPayload.color.x;
+        }
+
+        let lit = AMBIENT_COLOR + SUN_COLOR * (nDotL * visibility);
+        finalColor = albedo * lit;
+    } else {
+        // Sky color was filled by miss_main.
+        finalColor = payload.color;
+    }
+
+    // Reinhard tonemap + gamma 2.2 so sun-lit albedos don't clip and
+    // shadow detail stays readable.
+    let mapped = finalColor / (finalColor + vec3<f32>(1.0));
+    let gamma  = pow(mapped, vec3<f32>(1.0 / 2.2));
+    textureStore(outImage,
+                 vec2<i32>(i32(gid.x), i32(gid.y)),
+                 vec4<f32>(gamma, 1.0));
+}
diff --git a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
index 4c86ad7..5a61871 100644
--- a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
+++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
@@ -19,6 +19,7 @@ module Crafter.Graphics:Mesh_implWebGPU;
 
 import :Mesh;
 import :WebGPU;
+import Crafter.Asset;
 import Crafter.Math;
 import std;
 
@@ -215,26 +216,59 @@ namespace {
     };
 }
 
+namespace {
+    // Shared between the positions-only and the compressed-asset Build paths.
+    // attribsBytes is empty for positions-only meshes; the JS bridge skips
+    // the attribs-heap append in that case.
+    void BuildBVHAndRegister(Mesh& mesh,
+                             std::span<const Vector<float, 3, 3>> vertices,
+                             std::span<const std::uint32_t>       indices,
+                             std::span<const std::byte>           attribsBytes) {
+        mesh.triangleCount = static_cast<std::uint32_t>(indices.size()) / 3;
+
+        Builder builder;
+        builder.Build(vertices, indices);
+
+        std::vector<std::uint32_t> primRemap(mesh.triangleCount);
+        for (std::uint32_t i = 0; i < mesh.triangleCount; ++i) {
+            primRemap[i] = builder.prims[i].triIndex;
+        }
+
+        const BVHNode& root = builder.nodes[0];
+        mesh.blasAddr = WebGPU::wgpuRegisterMeshBLAS(
+            root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
+            root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
+            vertices.data(),       static_cast<std::int32_t>(vertices.size()),
+            indices.data(),        static_cast<std::int32_t>(indices.size()),
+            builder.nodes.data(),  static_cast<std::int32_t>(builder.nodes.size()),
+            primRemap.data(),      static_cast<std::int32_t>(primRemap.size()),
+            attribsBytes.data(),   static_cast<std::int32_t>(attribsBytes.size()));
+    }
+}
+
 void Mesh::Build(std::span<Vector<float, 3, 3>> vertices,
                  std::span<std::uint32_t>       indices,
                  WebGPUCommandEncoderRef        /*cmd*/) {
-    triangleCount = static_cast<std::uint32_t>(indices.size()) / 3;
+    BuildBVHAndRegister(*this, vertices, indices, {});
+}
 
-    Builder builder;
-    builder.Build(vertices, indices);
+void Mesh::Build(const CompressedMeshAsset& asset,
+                 WebGPUCommandEncoderRef    /*cmd*/) {
+    std::vector<Vector<float, 3, 3>> vertices(asset.vertexCount);
+    std::vector<std::uint32_t>       indices(asset.indexCount);
+    std::vector<std::byte>           dataBytes(
+        static_cast<std::size_t>(asset.dataCount) * asset.dataStride);
 
-    std::vector<std::uint32_t> primRemap(triangleCount);
-    for (std::uint32_t i = 0; i < triangleCount; ++i) {
-        primRemap[i] = builder.prims[i].triIndex;
-    }
+    // CompressedBlob always carries 3 regions for MeshAsset (the data region
+    // can have decompressedSize=0). DecompressCPU validates output sizes
+    // against region sizes, so the empty-data path needs the empty span.
+    std::array<std::span<std::byte>, 3> outputs = {
+        std::as_writable_bytes(std::span(vertices)),
+        std::as_writable_bytes(std::span(indices)),
+        std::span<std::byte>(dataBytes),
+    };
+    Compression::DecompressCPU(asset.blob,
+        std::span(outputs).first(asset.blob.regions.size()));
 
-    const BVHNode& root = builder.nodes[0];
-    std::uint32_t h = WebGPU::wgpuRegisterMeshBLAS(
-        root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
-        root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
-        vertices.data(),       static_cast<std::int32_t>(vertices.size()),
-        indices.data(),        static_cast<std::int32_t>(indices.size()),
-        builder.nodes.data(),  static_cast<std::int32_t>(builder.nodes.size()),
-        primRemap.data(),      static_cast<std::int32_t>(primRemap.size()));
-    blasAddr = h;
+    BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes));
 }
diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
index bf9e115..c4265da 100644
--- a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
+++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
@@ -22,6 +22,7 @@ module Crafter.Graphics:PipelineRTWebGPU_impl;
 
 import :PipelineRTWebGPU;
 import :ShaderBindingTableWebGPU;
+import :WebGPUComputeShader;
 import :RT;
 import :WebGPU;
 import std;
@@ -65,7 +66,9 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef                 /*cmd*/,
                             std::span<const RTShaderGroup>          raygenGroups,
                             std::span<const RTShaderGroup>          missGroups,
                             std::span<const RTShaderGroup>          hitGroups,
-                            const ShaderBindingTableWebGPU&         sbt) {
+                            const ShaderBindingTableWebGPU&         sbt,
+                            std::span<const UICustomBinding>        bindings) {
+    userBindings.assign(bindings.begin(), bindings.end());
     std::string wgsl;
     wgsl.reserve(8 * 1024);
 
@@ -183,5 +186,7 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef                 /*cmd*/,
 
     pipelineHandle = WebGPU::wgpuLoadRTPipeline(
         wgsl.data(),
-        static_cast<std::int32_t>(wgsl.size()));
+        static_cast<std::int32_t>(wgsl.size()),
+        userBindings.empty() ? nullptr : userBindings.data(),
+        static_cast<std::int32_t>(userBindings.size()));
 }
diff --git a/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm b/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm
index e36003c..9e50d45 100644
--- a/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm
+++ b/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm
@@ -181,5 +181,15 @@ export namespace Crafter {
         }
         return *this;
     }
+
+    // Convenience: create the "standard" linear-filter clamp-to-edge sampler,
+    // allocate a slot for it, and return the slot. The wgpu* bridge call is
+    // intentionally kept inside the library — example code shouldn't need to
+    // reach into Crafter::WebGPU directly.
+    inline SamplerSlot AllocateLinearClampSampler(DescriptorHeapWebGPU& heap) {
+        DescriptorRange r = heap.AllocateSamplerSlots(1);
+        heap.samplerTable[r.firstElement] = WebGPU::wgpuCreateLinearClampSampler();
+        return SamplerSlot(&heap, r.firstElement);
+    }
 }
 #endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-Image2D.cppm b/interfaces/Crafter.Graphics-Image2D.cppm
new file mode 100644
index 0000000..01643e2
--- /dev/null
+++ b/interfaces/Crafter.Graphics-Image2D.cppm
@@ -0,0 +1,166 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License version 3.0 as published by the Free Software Foundation;
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+*/
+
+// Image2D<T> — portable 2D image type whose API surface is intentionally
+// backend-specific via #ifdef. On Vulkan it aliases the existing
+// ImageVulkan<T> (full VkFormat / usage / layout control). On WebGPU it's
+// a thin handle around an rgba8unorm GPUTexture; sizes are u16 and the
+// only update path is from a CompressedTextureAsset.
+//
+// The "no shared no-op signatures" principle is deliberate: callers do
+// the same #ifdef the library does, and write the backend-specific
+// invocation. The unified type name Image2D<T> is the only thing
+// portable between the two — that's the whole point.
+
+export module Crafter.Graphics:Image2D;
+
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
+import :ImageVulkan;
+
+export namespace Crafter {
+    // Vulkan target: Image2D is just the existing ImageVulkan. New name,
+    // same shape — keeps existing ImageVulkan callers (e.g. examples/
+    // Decompression) working without a churn-rename.
+    template <typename PixelType>
+    using Image2D = ImageVulkan<PixelType>;
+}
+#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
+
+#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
+import std;
+import Crafter.Asset;
+import :DescriptorHeapWebGPU;
+import :WebGPU;
+
+export namespace Crafter {
+    template <typename PixelType>
+    class Image2D {
+    public:
+        WebGPUTextureRef handle = 0;
+        std::uint16_t width  = 0;
+        std::uint16_t height = 0;
+
+        void Create(std::uint16_t w, std::uint16_t h) {
+            width  = w;
+            height = h;
+            handle = WebGPU::wgpuCreateImage2D(w, h);
+        }
+
+        // CPU-decompress the .ctex blob (no GPU decompression on WebGPU)
+        // and upload via wgpuWriteImage2D. The intermediate `pixels` vector
+        // lives only for the duration of this call — the underlying
+        // queue.writeTexture in JS makes its own copy.
+        void Update(const CompressedTextureAsset& asset) {
+            if (asset.pixelStride != sizeof(PixelType)) {
+                std::println(std::cerr,
+                    "Image2D::Update: pixel stride mismatch (got {}, expected {})",
+                    asset.pixelStride, sizeof(PixelType));
+                std::abort();
+            }
+            std::vector<PixelType> pixels(
+                static_cast<std::size_t>(asset.sizeX) * asset.sizeY);
+            std::array<std::span<std::byte>, 1> outputs = {
+                std::as_writable_bytes(std::span(pixels)),
+            };
+            Compression::DecompressCPU(asset.blob, outputs);
+            WebGPU::wgpuWriteImage2D(
+                handle,
+                pixels.data(),
+                static_cast<std::int32_t>(pixels.size() * sizeof(PixelType)),
+                asset.sizeX, asset.sizeY);
+        }
+
+        // Register the texture in a descriptor heap slot so a custom RT
+        // pipeline can bind it via UICustomBinding::SampledTexture.
+        ImageSlot AllocateSlot(DescriptorHeapWebGPU& heap) {
+            DescriptorRange r = heap.AllocateImageSlots(1);
+            heap.imageTable[r.firstElement] = handle;
+            return ImageSlot(&heap, r.firstElement);
+        }
+
+        void Destroy() {
+            if (handle != 0) {
+                WebGPU::wgpuDestroyTexture(handle);
+                handle = 0;
+            }
+        }
+    };
+
+    // 2D texture array — `layers` × (w × h) rgba8unorm. Each layer is
+    // populated independently from a CompressedTextureAsset whose dims
+    // must match the array's (w × h). Layer 0 is sampled at array
+    // index 0 in WGSL; bind through UICustomBindingKind::SampledTextureArray.
+    template <typename PixelType>
+    class Image2DArray {
+    public:
+        WebGPUTextureRef handle = 0;
+        std::uint16_t width  = 0;
+        std::uint16_t height = 0;
+        std::uint16_t layers = 0;
+
+        void Create(std::uint16_t w, std::uint16_t h, std::uint16_t layerCount) {
+            width  = w;
+            height = h;
+            layers = layerCount;
+            handle = WebGPU::wgpuCreateImage2DArray(w, h, layerCount);
+        }
+
+        // Decompress `tex` and upload to `layer`. The asset's dims must
+        // match the array's (w × h) — resize beforehand on the host with
+        // TextureAsset<RGBA8>::Resize() if they don't.
+        void UpdateLayer(std::uint16_t layer, const CompressedTextureAsset& tex) {
+            if (tex.pixelStride != sizeof(PixelType)) {
+                std::println(std::cerr,
+                    "Image2DArray::UpdateLayer: pixel stride mismatch (got {}, expected {})",
+                    tex.pixelStride, sizeof(PixelType));
+                std::abort();
+            }
+            if (tex.sizeX != width || tex.sizeY != height) {
+                std::println(std::cerr,
+                    "Image2DArray::UpdateLayer: layer {} dims {}x{} don't match array dims {}x{}",
+                    layer, tex.sizeX, tex.sizeY, width, height);
+                std::abort();
+            }
+            std::vector<PixelType> pixels(static_cast<std::size_t>(width) * height);
+            std::array<std::span<std::byte>, 1> outputs = {
+                std::as_writable_bytes(std::span(pixels)),
+            };
+            Compression::DecompressCPU(tex.blob, outputs);
+            WebGPU::wgpuWriteImage2DLayer(
+                handle, layer,
+                pixels.data(),
+                static_cast<std::int32_t>(pixels.size() * sizeof(PixelType)),
+                width, height);
+        }
+
+        ImageSlot AllocateSlot(DescriptorHeapWebGPU& heap) {
+            DescriptorRange r = heap.AllocateImageSlots(1);
+            heap.imageTable[r.firstElement] = handle;
+            return ImageSlot(&heap, r.firstElement);
+        }
+
+        void Destroy() {
+            if (handle != 0) {
+                WebGPU::wgpuDestroyTexture(handle);
+                handle = 0;
+            }
+        }
+    };
+}
+#endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-Mesh.cppm b/interfaces/Crafter.Graphics-Mesh.cppm
index d8385ad..087d94f 100644
--- a/interfaces/Crafter.Graphics-Mesh.cppm
+++ b/interfaces/Crafter.Graphics-Mesh.cppm
@@ -64,6 +64,7 @@ export namespace Crafter {
 #ifdef CRAFTER_GRAPHICS_WINDOW_DOM
 import std;
 import Crafter.Math;
+import Crafter.Asset;
 import :WebGPU;
 
 export namespace Crafter {
@@ -108,6 +109,15 @@ export namespace Crafter {
         void Build(std::span<Crafter::Vector<float, 3, 3>> vertices,
                    std::span<std::uint32_t>                indices,
                    WebGPUCommandEncoderRef                  cmd = 0);
+
+        // CPU-decompress the .cmesh blob (no VK_EXT_memory_decompression
+        // equivalent in WebGPU) and forward to the positions+indices path,
+        // plus push the optional `data` region into the per-vertex attribs
+        // heap so closest-hit shaders can sample UVs / normals / tangents.
+        // The data layout is example-defined — the heap is exposed in WGSL
+        // as `vertexAttribs : array<u32>` with a per-mesh u32-word offset.
+        void Build(const ::Crafter::CompressedMeshAsset& asset,
+                   WebGPUCommandEncoderRef                cmd = 0);
     };
 }
 #endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm
index b3df012..6825fbd 100644
--- a/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm
+++ b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm
@@ -26,22 +26,31 @@ import std;
 import :RT;
 import :WebGPU;
 import :ShaderBindingTableWebGPU;
+import :WebGPUComputeShader;
 
 export namespace Crafter {
     class PipelineRTWebGPU {
     public:
         std::uint32_t pipelineHandle = 0;
+        // Mirror of the bindings handed to Init. Kept for the example /
+        // RTPass to consult when packing the handles[] array at dispatch
+        // time (one resolved u32 handle per binding, in declaration order).
+        std::vector<UICustomBinding> userBindings;
 
         // Build the megakernel pipeline. Groups carry indices into
         // `sbt.shaders`. The library generates one `case` per registered
         // group: closest-hit groups dispatch to their closestHitShader's
         // entryFn, miss groups to their generalShader's entryFn, etc.
         // The `cmd` parameter is unused on WebGPU; kept for API symmetry.
+        // `userBindings` declares extra @group(2)+ resources the user's
+        // closest-hit / miss / raygen WGSL touches (material SSBOs,
+        // albedo textures, samplers).
         void Init(WebGPUCommandEncoderRef                 cmd,
                   std::span<const RTShaderGroup>          raygenGroups,
                   std::span<const RTShaderGroup>          missGroups,
                   std::span<const RTShaderGroup>          hitGroups,
-                  const ShaderBindingTableWebGPU&         sbt);
+                  const ShaderBindingTableWebGPU&         sbt,
+                  std::span<const UICustomBinding>        bindings = {});
 
         PipelineRTWebGPU() = default;
         PipelineRTWebGPU(const PipelineRTWebGPU&) = delete;
diff --git a/interfaces/Crafter.Graphics-RTPass.cppm b/interfaces/Crafter.Graphics-RTPass.cppm
index 78064c0..1397a68 100644
--- a/interfaces/Crafter.Graphics-RTPass.cppm
+++ b/interfaces/Crafter.Graphics-RTPass.cppm
@@ -66,6 +66,12 @@ export namespace Crafter {
         // RTDispatchHeader. Null means "no extra data".
         const void*       pushPtr   = nullptr;
         std::uint32_t     pushBytes = 0;
+        // Resolved WebGPU resource handles for each user binding the
+        // pipeline was loaded with, in declaration order. The example
+        // owns the storage (typically a small std::array of u32). Null /
+        // 0 means "no user bindings".
+        const void*       handlesPtr   = nullptr;
+        std::uint32_t     handlesCount = 0;
 
         RTPass(PipelineRTWebGPU* p) : pipeline(p) {}
 
@@ -80,7 +86,9 @@ export namespace Crafter {
                 tlas.buffer.handle,
                 static_cast<std::int32_t>(tlas.builtInstanceCount),
                 static_cast<std::int32_t>(gx),
-                static_cast<std::int32_t>(gy));
+                static_cast<std::int32_t>(gy),
+                handlesPtr,
+                static_cast<std::int32_t>(handlesCount));
         }
     };
 }
diff --git a/interfaces/Crafter.Graphics-WebGPU.cppm b/interfaces/Crafter.Graphics-WebGPU.cppm
index 616c8fe..799c8c0 100644
--- a/interfaces/Crafter.Graphics-WebGPU.cppm
+++ b/interfaces/Crafter.Graphics-WebGPU.cppm
@@ -49,6 +49,27 @@ namespace Crafter::WebGPU {
     __attribute__((import_module("env"), import_name("wgpuDestroyTexture")))
     extern "C" void wgpuDestroyTexture(std::uint32_t handle);
 
+    // General-purpose rgba8unorm 2D texture for material albedo etc.
+    // Separate from the atlas path because atlas uses r8unorm + sub-region
+    // writes; this one takes the whole image in one shot.
+    __attribute__((import_module("env"), import_name("wgpuCreateImage2D")))
+    extern "C" std::uint32_t wgpuCreateImage2D(std::int32_t w, std::int32_t h);
+    __attribute__((import_module("env"), import_name("wgpuWriteImage2D")))
+    extern "C" void wgpuWriteImage2D(std::uint32_t handle, const void* srcPtr,
+                                     std::int32_t byteSize,
+                                     std::int32_t w, std::int32_t h);
+
+    // 2D texture array — `layerCount` rgba8unorm layers of identical (w × h).
+    // Sampled via `texture_2d_array<f32>` in WGSL (UICustomBindingKind 3).
+    // Used by Image2DArray<RGBA8> to stack per-material albedos for one
+    // multi-material scene.
+    __attribute__((import_module("env"), import_name("wgpuCreateImage2DArray")))
+    extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h, std::int32_t layerCount);
+    __attribute__((import_module("env"), import_name("wgpuWriteImage2DLayer")))
+    extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer,
+                                          const void* srcPtr, std::int32_t byteSize,
+                                          std::int32_t w, std::int32_t h);
+
     __attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler")))
     extern "C" std::uint32_t wgpuCreateLinearClampSampler();
 
@@ -96,6 +117,11 @@ namespace Crafter::WebGPU {
     // stores in RTInstance::accelerationStructureReference; the WebGPU
     // TLAS-build compute shader resolves it back to root AABB + heap
     // offsets at dispatch time. Returns 0 on failure.
+    // The optional `attribsPtr` / `attribsByteCount` carry per-vertex
+    // attribute payload (normals, UVs, etc. — layout is example-defined)
+    // that gets appended to a global attribs heap and exposed to RT
+    // closest-hit shaders as `vertexAttribs : array<u32>` at
+    // @group(1) @binding(7). Pass (nullptr, 0) for positions-only meshes.
     __attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS")))
     extern "C" std::uint32_t wgpuRegisterMeshBLAS(
         float minX, float minY, float minZ,
@@ -103,25 +129,34 @@ namespace Crafter::WebGPU {
         const void* verticesPtr, std::int32_t vertexCount,
         const void* indicesPtr,  std::int32_t indexCount,
         const void* bvhNodesPtr, std::int32_t bvhNodeCount,
-        const void* primRemapPtr, std::int32_t primRemapCount);
+        const void* primRemapPtr, std::int32_t primRemapCount,
+        const void* attribsPtr,  std::int32_t attribsByteCount);
 
     // RT pipeline build. The library composes WGSL by concatenating the
     // traversal library, generated hit-group switches, and the user-
-    // supplied raygen / miss / closesthit / anyhit bodies. Returns an
-    // opaque pipeline handle.
+    // supplied raygen / miss / closesthit / anyhit bodies. `bindings` is
+    // UICustomBinding-shaped (8 bytes each) declaring extra @group(2)+
+    // resources the user's closest-hit / miss / raygen WGSL references.
+    // Pass (nullptr, 0) for a pipeline with no user-declared bindings.
+    // Returns an opaque pipeline handle.
     __attribute__((import_module("env"), import_name("wgpuLoadRTPipeline")))
-    extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen);
+    extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen,
+                                                const void* bindingsPtr, std::int32_t bindingsCount);
 
     // Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched
     // over a (gx, gy) tile grid; the library writes the push data (camera,
     // payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS
     // + global mesh heap, and runs one workgroup per 8x8 screen tile.
+    // `handles[]` carries resolved WebGPU resource handles for every user
+    // binding declared at pipeline-load time, in the same order. Pass
+    // (nullptr, 0) for a pipeline with no user bindings.
     __attribute__((import_module("env"), import_name("wgpuDispatchRT")))
     extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle,
                                    const void* pushPtr, std::int32_t pushBytes,
                                    std::uint32_t tlasBufHandle,
                                    std::int32_t  instanceCount,
-                                   std::int32_t  gx, std::int32_t gy);
+                                   std::int32_t  gx, std::int32_t gy,
+                                   const void* handlesPtr, std::int32_t handlesCount);
 
     // GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or
     // GPU-written), produces per-instance world-space AABBs + per-instance
diff --git a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm
index 867bb26..9726d76 100644
--- a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm
+++ b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm
@@ -32,9 +32,10 @@ import :WebGPU;
 
 export namespace Crafter {
     enum class UICustomBindingKind : std::uint8_t {
-        Buffer         = 0,   // read-only-storage SSBO, handle is a slot into heap.bufferTable
-        SampledTexture = 1,   // sampled texture_2d<f32>, handle is a slot into heap.imageTable
-        Sampler        = 2,   // filtering sampler, handle is a slot into heap.samplerTable
+        Buffer              = 0,   // read-only-storage SSBO, handle is a slot into heap.bufferTable
+        SampledTexture      = 1,   // sampled texture_2d<f32>, handle is a slot into heap.imageTable
+        Sampler             = 2,   // filtering sampler, handle is a slot into heap.samplerTable
+        SampledTextureArray = 3,   // sampled texture_2d_array<f32>, handle is a slot into heap.imageTable
     };
 
     struct UICustomBinding {
diff --git a/interfaces/Crafter.Graphics.cppm b/interfaces/Crafter.Graphics.cppm
index b48f029..5c94db9 100644
--- a/interfaces/Crafter.Graphics.cppm
+++ b/interfaces/Crafter.Graphics.cppm
@@ -47,6 +47,7 @@ export import :ShaderBindingTableVulkan;
 export import :PipelineRTVulkan;
 export import :RenderingElement3D;
 export import :ImageVulkan;
+export import :Image2D;
 export import :SamplerVulkan;
 export import :DescriptorHeapVulkan;
 export import :RenderPass;
diff --git a/project.cpp b/project.cpp
index c1a162c..e1fd83e 100644
--- a/project.cpp
+++ b/project.cpp
@@ -31,23 +31,9 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
         });
     };
 
-    // Sniff the requested target from args before any deps resolve — the
-    // Crafter.Asset dependency is heavy and not wasm-ready (uses `throw`
-    // under -fno-exceptions, references `_Float16`). The DOM build stubs
-    // the renderer entirely so the dep doesn't apply anyway.
-    bool isWasm = false;
-    for (std::string_view a : args) {
-        if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) {
-            isWasm = true;
-            break;
-        }
-    }
-
     Configuration* event = resolveDep("Crafter.Event", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Event.git");
     Configuration* math  = resolveDep("Crafter.Math",  "https://forgejo.catcrafts.net/Catcrafts/Crafter.Math.git");
-    Configuration* asset = isWasm
-        ? nullptr
-        : resolveDep("Crafter.Asset", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Asset.git");
+    Configuration* asset = resolveDep("Crafter.Asset", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Asset.git");
 
     Configuration cfg;
     cfg.path = "./";
@@ -55,11 +41,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
     cfg.outputName = "Crafter.Graphics";
     cfg.type = ConfigurationType::LibraryStatic;
     auto opts = ApplyStandardArgs(cfg, args);
-    if (asset) {
-        cfg.dependencies = { event, math, asset };
-    } else {
-        cfg.dependencies = { event, math };
-    }
+    cfg.dependencies = { event, math, asset };
 
     // Window backend follows the target triple. V1 had separate lib-wayland /
     // lib-win32 configurations; V2 picks the right one automatically based on
@@ -78,6 +60,16 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
         // strips -march/-mtune from the clang command line for any wasm32-*
         // triple, so cfg.march/mtune can stay at their defaults — keeping them
         // matches the VariantId of dependency PCMs.
+        //
+        // WasmAlloc / WasmFree live in Crafter.Graphics-Dom.cpp and back
+        // dom-env.js's __writeUtf8 path (every keyboard / text-input event
+        // routes through them). The TU defines no symbols main.cpp would
+        // reference, so wasm-ld dead-strips it from libCrafter.Graphics.a
+        // for examples that don't touch the `Dom::HtmlElement*` API (like
+        // Sponza). `--export=` both forces the export AND pulls the
+        // defining .o in — solving both halves of the dead-strip problem.
+        cfg.linkFlags.push_back("-Wl,--export=WasmAlloc");
+        cfg.linkFlags.push_back("-Wl,--export=WasmFree");
     } else if (windows) {
         cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_WIN32", ""});
         cfg.linkFlags.push_back("-lkernel32");
@@ -131,7 +123,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
     // when its body is gated out. Vulkan-typed partitions stub to empty
     // modules under CRAFTER_GRAPHICS_WINDOW_DOM; the Dom/DomEvents/Router
     // partitions stub to empty modules in the opposite direction.
-    std::array<fs::path, 40> ifaces = {
+    std::array<fs::path, 41> ifaces = {
         "interfaces/Crafter.Graphics",
         "interfaces/Crafter.Graphics-Animation",
         "interfaces/Crafter.Graphics-Clipboard",
@@ -147,6 +139,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
         "interfaces/Crafter.Graphics-ForwardDeclarations",
         "interfaces/Crafter.Graphics-Gamepad",
         "interfaces/Crafter.Graphics-GraphicsTypes",
+        "interfaces/Crafter.Graphics-Image2D",
         "interfaces/Crafter.Graphics-ImageVulkan",
         "interfaces/Crafter.Graphics-Input",
         "interfaces/Crafter.Graphics-InputField",