webgpu sponza

This commit is contained in:
Jorijn van der Graaf 2026-05-19 00:27:09 +02:00
commit b5d0f52da0
21 changed files with 1426 additions and 58 deletions

View file

@ -43,6 +43,8 @@ function stub(name) {
"wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
"wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
"wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
"wgpuCreateImage2D", "wgpuWriteImage2D",
"wgpuCreateImage2DArray", "wgpuWriteImage2DLayer",
"wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
"wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
"wgpuLoadCustomShader", "wgpuDispatchCustom",
@ -580,6 +582,99 @@ env.wgpuDestroyTexture = (handle) => {
if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
};
// General-purpose 2D rgba8unorm texture, used by Image2D<RGBA8>. Distinct
// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot
// upload of a whole image, sized to the pixel data the caller hands over.
env.wgpuCreateImage2D = (w, h) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h],
format: "rgba8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView());
return handle;
};
// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by
// Image2DArray<RGBA8> to back one material albedo per layer; shaders
// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`.
env.wgpuCreateImage2DArray = (w, h, layerCount) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h, layerCount],
dimension: "2d",
format: "rgba8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView({
dimension: "2d-array",
arrayLayerCount: layerCount,
}));
return handle;
};
env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => {
const tex = textures.get(handle);
if (!tex) return;
const srcBPR = w * 4;
const alignedBPR = (srcBPR + 255) & ~255;
if (alignedBPR === srcBPR) {
queue.writeTexture(
{ texture: tex, origin: [0, 0, layer] },
memU8().subarray(srcPtr, srcPtr + byteSize),
{ bytesPerRow: srcBPR, rowsPerImage: h },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
} else {
const staging = new Uint8Array(alignedBPR * h);
const src = memU8();
for (let y = 0; y < h; y++) {
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex, origin: [0, 0, layer] },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: h },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
}
};
env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => {
const tex = textures.get(handle);
if (!tex) return;
// queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp
// when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4.
const srcBPR = w * 4;
const alignedBPR = (srcBPR + 255) & ~255;
if (alignedBPR === srcBPR) {
// Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64).
queue.writeTexture(
{ texture: tex },
memU8().subarray(srcPtr, srcPtr + byteSize),
{ bytesPerRow: srcBPR, rowsPerImage: h },
{ width: w, height: h }
);
} else {
// Repack into a 256-aligned staging buffer. One alloc per Update,
// freed when the function returns — fine for asset-load time use.
const staging = new Uint8Array(alignedBPR * h);
const src = memU8();
for (let y = 0; y < h; y++) {
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: h },
{ width: w, height: h }
);
}
};
env.wgpuCreateLinearClampSampler = () => {
const handle = newHandle();
samplers.set(handle, device.createSampler({
@ -756,6 +851,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
]})
: device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
@ -773,9 +869,10 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" };
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
return e;
});
bgls.push(device.createBindGroupLayout({ entries }));
@ -839,6 +936,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView },
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
],
});
state.pass.setBindGroup(1, rtBG);
@ -858,6 +956,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h);
else if (b.kind === 3) resource = textureViews.get(h);
return { binding: b.binding, resource };
});
const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
@ -981,6 +1080,12 @@ struct BVHNode {
};
// Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
// attribsOffset is the per-mesh base index (in u32 words) into the
// vertexAttribs heap; meshes registered without per-vertex attribs leave
// it 0 (the heap entries at that range are also 0 / never touched). The
// per-vertex stride lives in the user's WGSL — the library doesn't store
// it because the layout is example-defined (Sponza uses 8 u32 / vertex
// for VertexNormalTangentUVPacked).
struct MeshRecord {
rootAabbMin: vec3<f32>,
vertexOffset: u32,
@ -989,7 +1094,7 @@ struct MeshRecord {
bvhOffset: u32,
primRemapOffset: u32,
triangleCount: u32,
_pad: u32,
attribsOffset: u32,
};
// Per-instance TLAS record built by the TLAS-build compute pass.
@ -1048,6 +1153,7 @@ const rtWgslMegakernelBindings = String.raw`
@group(1) @binding(4) var<storage,read> indices : array<u32>;
@group(1) @binding(5) var<storage,read> primRemap : array<u32>;
@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
@group(1) @binding(7) var<storage,read> vertexAttribs : array<u32>;
`;
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
@ -1565,6 +1671,7 @@ const rtState = {
indexHeap: null, // u32 stream
bvhHeap: null, // BVHNode stream (32 bytes per node)
primRemapHeap: null, // u32 stream
attribsHeap: null, // u32 stream (per-vertex attribute payload; example-defined stride)
meshRecordsBuffer: null, // GPUBuffer of MeshRecord[]
meshRecordsCapacity: 0,
@ -1588,6 +1695,7 @@ function rtInit() {
rtState.indexHeap = makeRtHeap();
rtState.bvhHeap = makeRtHeap();
rtState.primRemapHeap = makeRtHeap();
rtState.attribsHeap = makeRtHeap();
rtState.meshRecordsCapacity = 16;
rtState.meshRecordsBuffer = device.createBuffer({
size: rtState.meshRecordsCapacity * 48,
@ -1634,23 +1742,30 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
verticesPtr, vertexCount,
indicesPtr, indexCount,
bvhNodesPtr, bvhNodeCount,
primRemapPtr, primRemapCount) => {
primRemapPtr, primRemapCount,
attribsPtr, attribsByteCount) => {
if (!rtState.vertHeap) rtInit();
console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`);
const vBytes = vertexCount * 12;
const iBytes = indexCount * 4;
const nBytes = bvhNodeCount * 32;
const rBytes = primRemapCount * 4;
// attribsByteCount must be a multiple of 4 (the heap is array<u32>).
// Round up the upload size; the in-MeshRecord offset is in u32 words.
const aBytes = (attribsByteCount + 3) & ~3;
rtHeapEnsure(rtState.vertHeap, vBytes);
rtHeapEnsure(rtState.indexHeap, iBytes);
rtHeapEnsure(rtState.bvhHeap, nBytes);
rtHeapEnsure(rtState.primRemapHeap, rBytes);
if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes);
const vOff = rtState.vertHeap.cursor / 12; // in vec3 units
const iOff = rtState.indexHeap.cursor / 4; // in u32 units
const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units
const rOff = rtState.primRemapHeap.cursor / 4;
const aOff = rtState.attribsHeap.cursor / 4; // in u32 units
// queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
// already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
@ -1662,11 +1777,16 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
memU8().buffer, bvhNodesPtr, nBytes);
queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
memU8().buffer, primRemapPtr, rBytes);
if (aBytes > 0) {
queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor,
memU8().buffer, attribsPtr, aBytes);
}
rtState.vertHeap.cursor += vBytes;
rtState.indexHeap.cursor += iBytes;
rtState.bvhHeap.cursor += nBytes;
rtState.primRemapHeap.cursor += rBytes;
rtState.attribsHeap.cursor += aBytes;
const handle = rtState.nextMeshHandle++;
rtMeshRecordsEnsure(handle + 1);
@ -1682,7 +1802,7 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
u32[8] = nOff;
u32[9] = rOff;
u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
u32[11] = 0;
u32[11] = aOff;
queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
return handle;
@ -1734,9 +1854,13 @@ env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
// switches + raygen + @compute entry) with the library prelude/helpers.
const rtPipelines = new Map(); // handle → { pipeline, bgls }
// `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte
// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the
// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo
// textures, samplers). Pass (0, 0) for a pipeline with no user bindings.
const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups }
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
if (!rtState.vertHeap) rtInit();
const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
@ -1751,6 +1875,31 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
}
const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
// Parse user bindings (same wire format as wgpuLoadCustomShader).
const userBindings = [];
if (bindingsCount > 0) {
const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
for (let i = 0; i < bindingsCount; i++) {
const g = dv.getUint8(i*8 + 0);
if (g < 2) {
console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`);
return 0;
}
userBindings.push({
group: g,
binding: dv.getUint8(i*8 + 1),
kind: dv.getUint8(i*8 + 2),
pushOffset: dv.getUint32(i*8 + 4, true),
});
}
}
const byGroup = new Map();
for (const b of userBindings) {
if (!byGroup.has(b.group)) byGroup.set(b.group, []);
byGroup.get(b.group).push(b);
}
const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
let pipeline;
try {
const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
@ -1768,13 +1917,34 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
]});
// User binding-group layouts. WebGPU pipeline layouts need a
// contiguous array up to the highest group used, so pad any gaps
// with empty bgls (same rule as wgpuLoadCustomShader).
const userBgls = [];
const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
for (let g = 2; g <= highest; g++) {
if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" };
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
return e;
});
userBgls.push(device.createBindGroupLayout({ entries }));
} else {
userBgls.push(device.createBindGroupLayout({ entries: [] }));
}
}
pipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }),
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }),
compute: { module: mod, entryPoint: "main" },
});
const handle = newHandle();
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl });
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups });
return handle;
} catch (e) {
console.error("[crafter-wgpu] RT pipeline compile failed:", e);
@ -1784,7 +1954,8 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
};
env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
tlasBufHandle, instanceCount, gx, gy) => {
tlasBufHandle, instanceCount, gx, gy,
handlesPtr, handlesCount) => {
if (!state.pass) return;
const pipe = rtPipelines.get(pipelineHandle);
const tlas = buffers.get(tlasBufHandle);
@ -1815,12 +1986,41 @@ env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView },
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
],
});
state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, headerBg);
state.pass.setBindGroup(1, dataBg);
// User bindings: walk byGroup in the same sorted order the C++ side
// packed handles[], picking up indices linearly.
if (handlesCount > 0) {
const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
let handleIdx = 0;
let bglIdx = 0;
for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) {
if (pipe.byGroup.has(g)) {
const entries = pipe.byGroup.get(g).map(b => {
const h = handles[handleIdx++];
let resource;
if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h);
else if (b.kind === 3) resource = textureViews.get(h);
return { binding: b.binding, resource };
});
const bg = device.createBindGroup({
layout: pipe.userBgls[bglIdx],
entries,
});
state.pass.setBindGroup(g, bg);
}
bglIdx++;
}
}
state.pass.dispatchWorkgroups(gx, gy, 1);
state.outIsPing = !state.outIsPing;
};