webgpu sponza

2026-05-19 00:27:09 +02:00 · 2026-05-19 00:27:09 +02:00 · b5d0f52da0
commit b5d0f52da0
parent 5553ded476
21 changed files with 1426 additions and 58 deletions
--- a/additional/dom-webgpu.js
+++ b/additional/dom-webgpu.js
@ -43,6 +43,8 @@ function stub(name) {
        "wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
        "wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
        "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
+        "wgpuCreateImage2D", "wgpuWriteImage2D",
+        "wgpuCreateImage2DArray", "wgpuWriteImage2DLayer",
        "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
        "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
        "wgpuLoadCustomShader", "wgpuDispatchCustom",
@ -580,6 +582,99 @@ env.wgpuDestroyTexture = (handle) => {
    if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
 };

+// General-purpose 2D rgba8unorm texture, used by Image2D<RGBA8>. Distinct
+// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot
+// upload of a whole image, sized to the pixel data the caller hands over.
+env.wgpuCreateImage2D = (w, h) => {
+    const handle = newHandle();
+    const tex = device.createTexture({
+        size: [w, h],
+        format: "rgba8unorm",
+        usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
+    });
+    textures.set(handle, tex);
+    textureViews.set(handle, tex.createView());
+    return handle;
+};
+// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by
+// Image2DArray<RGBA8> to back one material albedo per layer; shaders
+// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`.
+env.wgpuCreateImage2DArray = (w, h, layerCount) => {
+    const handle = newHandle();
+    const tex = device.createTexture({
+        size: [w, h, layerCount],
+        dimension: "2d",
+        format: "rgba8unorm",
+        usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
+    });
+    textures.set(handle, tex);
+    textureViews.set(handle, tex.createView({
+        dimension: "2d-array",
+        arrayLayerCount: layerCount,
+    }));
+    return handle;
+};
+env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => {
+    const tex = textures.get(handle);
+    if (!tex) return;
+    const srcBPR = w * 4;
+    const alignedBPR = (srcBPR + 255) & ~255;
+    if (alignedBPR === srcBPR) {
+        queue.writeTexture(
+            { texture: tex, origin: [0, 0, layer] },
+            memU8().subarray(srcPtr, srcPtr + byteSize),
+            { bytesPerRow: srcBPR, rowsPerImage: h },
+            { width: w, height: h, depthOrArrayLayers: 1 }
+        );
+    } else {
+        const staging = new Uint8Array(alignedBPR * h);
+        const src = memU8();
+        for (let y = 0; y < h; y++) {
+            staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
+                        y * alignedBPR);
+        }
+        queue.writeTexture(
+            { texture: tex, origin: [0, 0, layer] },
+            staging,
+            { bytesPerRow: alignedBPR, rowsPerImage: h },
+            { width: w, height: h, depthOrArrayLayers: 1 }
+        );
+    }
+};
+
+env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => {
+    const tex = textures.get(handle);
+    if (!tex) return;
+    // queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp
+    // when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4.
+    const srcBPR = w * 4;
+    const alignedBPR = (srcBPR + 255) & ~255;
+    if (alignedBPR === srcBPR) {
+        // Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64).
+        queue.writeTexture(
+            { texture: tex },
+            memU8().subarray(srcPtr, srcPtr + byteSize),
+            { bytesPerRow: srcBPR, rowsPerImage: h },
+            { width: w, height: h }
+        );
+    } else {
+        // Repack into a 256-aligned staging buffer. One alloc per Update,
+        // freed when the function returns — fine for asset-load time use.
+        const staging = new Uint8Array(alignedBPR * h);
+        const src = memU8();
+        for (let y = 0; y < h; y++) {
+            staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
+                        y * alignedBPR);
+        }
+        queue.writeTexture(
+            { texture: tex },
+            staging,
+            { bytesPerRow: alignedBPR, rowsPerImage: h },
+            { width: w, height: h }
+        );
+    }
+};
+
 env.wgpuCreateLinearClampSampler = () => {
    const handle = newHandle();
    samplers.set(handle, device.createSampler({
@ -756,6 +851,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
                { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
                { binding: 6, visibility: GPUShaderStage.COMPUTE,
                  storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+                { binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
            ]})
            : device.createBindGroupLayout({ entries: [
                { binding: 0, visibility: GPUShaderStage.COMPUTE,
@ -773,9 +869,10 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
        if (byGroup.has(g)) {
            const entries = byGroup.get(g).map(b => {
                const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
-                if      (b.kind === 0) e.buffer = { type: "read-only-storage" };
+                if      (b.kind === 0) e.buffer  = { type: "read-only-storage" };
                else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
                else if (b.kind === 2) e.sampler = { type: "filtering" };
+                else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
                return e;
            });
            bgls.push(device.createBindGroupLayout({ entries }));
@ -839,6 +936,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
                { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
                { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
                { binding: 6, resource: outView },
+                { binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
            ],
        });
        state.pass.setBindGroup(1, rtBG);
@ -858,6 +956,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
            if      (b.kind === 0) resource = { buffer: buffers.get(h) };
            else if (b.kind === 1) resource = textureViews.get(h);
            else if (b.kind === 2) resource = samplers.get(h);
+            else if (b.kind === 3) resource = textureViews.get(h);
            return { binding: b.binding, resource };
        });
        const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
@ -981,6 +1080,12 @@ struct BVHNode {
 };

 // Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
+// attribsOffset is the per-mesh base index (in u32 words) into the
+// vertexAttribs heap; meshes registered without per-vertex attribs leave
+// it 0 (the heap entries at that range are also 0 / never touched). The
+// per-vertex stride lives in the user's WGSL — the library doesn't store
+// it because the layout is example-defined (Sponza uses 8 u32 / vertex
+// for VertexNormalTangentUVPacked).
 struct MeshRecord {
    rootAabbMin:     vec3<f32>,
    vertexOffset:    u32,
@ -989,7 +1094,7 @@ struct MeshRecord {
    bvhOffset:       u32,
    primRemapOffset: u32,
    triangleCount:   u32,
-    _pad:            u32,
+    attribsOffset:   u32,
 };

 // Per-instance TLAS record built by the TLAS-build compute pass.
@ -1048,6 +1153,7 @@ const rtWgslMegakernelBindings = String.raw`
@group(1) @binding(4) var<storage,read>  indices     : array<u32>;
@group(1) @binding(5) var<storage,read>  primRemap   : array<u32>;
@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
+@group(1) @binding(7) var<storage,read>  vertexAttribs : array<u32>;
 `;

 const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
@ -1565,6 +1671,7 @@ const rtState = {
    indexHeap: null,       // u32 stream
    bvhHeap: null,         // BVHNode stream (32 bytes per node)
    primRemapHeap: null,   // u32 stream
+    attribsHeap: null,     // u32 stream (per-vertex attribute payload; example-defined stride)

    meshRecordsBuffer: null,    // GPUBuffer of MeshRecord[]
    meshRecordsCapacity: 0,
@ -1588,6 +1695,7 @@ function rtInit() {
    rtState.indexHeap     = makeRtHeap();
    rtState.bvhHeap       = makeRtHeap();
    rtState.primRemapHeap = makeRtHeap();
+    rtState.attribsHeap   = makeRtHeap();
    rtState.meshRecordsCapacity = 16;
    rtState.meshRecordsBuffer = device.createBuffer({
        size: rtState.meshRecordsCapacity * 48,
@ -1634,23 +1742,30 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
                            verticesPtr, vertexCount,
                            indicesPtr,  indexCount,
                            bvhNodesPtr, bvhNodeCount,
-                            primRemapPtr, primRemapCount) => {
+                            primRemapPtr, primRemapCount,
+                            attribsPtr,  attribsByteCount) => {
    if (!rtState.vertHeap) rtInit();
+    console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`);

    const vBytes   = vertexCount  * 12;
    const iBytes   = indexCount   * 4;
    const nBytes   = bvhNodeCount * 32;
    const rBytes   = primRemapCount * 4;
+    // attribsByteCount must be a multiple of 4 (the heap is array<u32>).
+    // Round up the upload size; the in-MeshRecord offset is in u32 words.
+    const aBytes   = (attribsByteCount + 3) & ~3;

    rtHeapEnsure(rtState.vertHeap,      vBytes);
    rtHeapEnsure(rtState.indexHeap,     iBytes);
    rtHeapEnsure(rtState.bvhHeap,       nBytes);
    rtHeapEnsure(rtState.primRemapHeap, rBytes);
+    if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes);

    const vOff = rtState.vertHeap.cursor      / 12;   // in vec3 units
    const iOff = rtState.indexHeap.cursor     / 4;    // in u32 units
    const nOff = rtState.bvhHeap.cursor       / 32;   // in BVHNode units
    const rOff = rtState.primRemapHeap.cursor / 4;
+    const aOff = rtState.attribsHeap.cursor   / 4;    // in u32 units

    // queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
    // already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
@ -1662,11 +1777,16 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
                      memU8().buffer, bvhNodesPtr, nBytes);
    queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
                      memU8().buffer, primRemapPtr, rBytes);
+    if (aBytes > 0) {
+        queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor,
+                          memU8().buffer, attribsPtr, aBytes);
+    }

    rtState.vertHeap.cursor      += vBytes;
    rtState.indexHeap.cursor     += iBytes;
    rtState.bvhHeap.cursor       += nBytes;
    rtState.primRemapHeap.cursor += rBytes;
+    rtState.attribsHeap.cursor   += aBytes;

    const handle = rtState.nextMeshHandle++;
    rtMeshRecordsEnsure(handle + 1);
@ -1682,7 +1802,7 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
    u32[8] = nOff;
    u32[9] = rOff;
    u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
-    u32[11] = 0;
+    u32[11] = aOff;
    queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);

    return handle;
@ -1734,9 +1854,13 @@ env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {

 // RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
 // switches + raygen + @compute entry) with the library prelude/helpers.
-const rtPipelines = new Map(); // handle → { pipeline, bgls }
+// `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte
+// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the
+// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo
+// textures, samplers). Pass (0, 0) for a pipeline with no user bindings.
+const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups }

-env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
+env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
    if (!rtState.vertHeap) rtInit();
    const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));

@ -1751,6 +1875,31 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
    }
    const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;

+    // Parse user bindings (same wire format as wgpuLoadCustomShader).
+    const userBindings = [];
+    if (bindingsCount > 0) {
+        const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
+        for (let i = 0; i < bindingsCount; i++) {
+            const g = dv.getUint8(i*8 + 0);
+            if (g < 2) {
+                console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`);
+                return 0;
+            }
+            userBindings.push({
+                group:      g,
+                binding:    dv.getUint8(i*8 + 1),
+                kind:       dv.getUint8(i*8 + 2),
+                pushOffset: dv.getUint32(i*8 + 4, true),
+            });
+        }
+    }
+    const byGroup = new Map();
+    for (const b of userBindings) {
+        if (!byGroup.has(b.group)) byGroup.set(b.group, []);
+        byGroup.get(b.group).push(b);
+    }
+    const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
+
    let pipeline;
    try {
        const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
@ -1768,13 +1917,34 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
            { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
            { binding: 6, visibility: GPUShaderStage.COMPUTE,
              storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+            { binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
        ]});
+        // User binding-group layouts. WebGPU pipeline layouts need a
+        // contiguous array up to the highest group used, so pad any gaps
+        // with empty bgls (same rule as wgpuLoadCustomShader).
+        const userBgls = [];
+        const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
+        for (let g = 2; g <= highest; g++) {
+            if (byGroup.has(g)) {
+                const entries = byGroup.get(g).map(b => {
+                    const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
+                    if      (b.kind === 0) e.buffer  = { type: "read-only-storage" };
+                    else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
+                    else if (b.kind === 2) e.sampler = { type: "filtering" };
+                    else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
+                    return e;
+                });
+                userBgls.push(device.createBindGroupLayout({ entries }));
+            } else {
+                userBgls.push(device.createBindGroupLayout({ entries: [] }));
+            }
+        }
        pipeline = device.createComputePipeline({
-            layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }),
+            layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }),
            compute: { module: mod, entryPoint: "main" },
        });
        const handle = newHandle();
-        rtPipelines.set(handle, { pipeline, headerBgl, dataBgl });
+        rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups });
        return handle;
    } catch (e) {
        console.error("[crafter-wgpu] RT pipeline compile failed:", e);
@ -1784,7 +1954,8 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
 };

 env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
-                      tlasBufHandle, instanceCount, gx, gy) => {
+                      tlasBufHandle, instanceCount, gx, gy,
+                      handlesPtr, handlesCount) => {
    if (!state.pass) return;
    const pipe = rtPipelines.get(pipelineHandle);
    const tlas = buffers.get(tlasBufHandle);
@ -1815,12 +1986,41 @@ env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
            { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
            { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
            { binding: 6, resource: outView },
+            { binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
        ],
    });

    state.pass.setPipeline(pipe.pipeline);
    state.pass.setBindGroup(0, headerBg);
    state.pass.setBindGroup(1, dataBg);
+
+    // User bindings: walk byGroup in the same sorted order the C++ side
+    // packed handles[], picking up indices linearly.
+    if (handlesCount > 0) {
+        const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
+        let handleIdx = 0;
+        let bglIdx = 0;
+        for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) {
+            if (pipe.byGroup.has(g)) {
+                const entries = pipe.byGroup.get(g).map(b => {
+                    const h = handles[handleIdx++];
+                    let resource;
+                    if      (b.kind === 0) resource = { buffer: buffers.get(h) };
+                    else if (b.kind === 1) resource = textureViews.get(h);
+                    else if (b.kind === 2) resource = samplers.get(h);
+                    else if (b.kind === 3) resource = textureViews.get(h);
+                    return { binding: b.binding, resource };
+                });
+                const bg = device.createBindGroup({
+                    layout: pipe.userBgls[bglIdx],
+                    entries,
+                });
+                state.pass.setBindGroup(g, bg);
+            }
+            bglIdx++;
+        }
+    }
+
    state.pass.dispatchWorkgroups(gx, gy, 1);
    state.outIsPing = !state.outIsPing;
 };