webgpu sponza
This commit is contained in:
parent
5553ded476
commit
b5d0f52da0
21 changed files with 1426 additions and 58 deletions
|
|
@ -43,6 +43,8 @@ function stub(name) {
|
|||
"wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
|
||||
"wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
|
||||
"wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
|
||||
"wgpuCreateImage2D", "wgpuWriteImage2D",
|
||||
"wgpuCreateImage2DArray", "wgpuWriteImage2DLayer",
|
||||
"wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
|
||||
"wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
|
||||
"wgpuLoadCustomShader", "wgpuDispatchCustom",
|
||||
|
|
@ -580,6 +582,99 @@ env.wgpuDestroyTexture = (handle) => {
|
|||
if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
|
||||
};
|
||||
|
||||
// General-purpose 2D rgba8unorm texture, used by Image2D<RGBA8>. Distinct
|
||||
// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot
|
||||
// upload of a whole image, sized to the pixel data the caller hands over.
|
||||
env.wgpuCreateImage2D = (w, h) => {
|
||||
const handle = newHandle();
|
||||
const tex = device.createTexture({
|
||||
size: [w, h],
|
||||
format: "rgba8unorm",
|
||||
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
|
||||
});
|
||||
textures.set(handle, tex);
|
||||
textureViews.set(handle, tex.createView());
|
||||
return handle;
|
||||
};
|
||||
// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by
|
||||
// Image2DArray<RGBA8> to back one material albedo per layer; shaders
|
||||
// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`.
|
||||
env.wgpuCreateImage2DArray = (w, h, layerCount) => {
|
||||
const handle = newHandle();
|
||||
const tex = device.createTexture({
|
||||
size: [w, h, layerCount],
|
||||
dimension: "2d",
|
||||
format: "rgba8unorm",
|
||||
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
|
||||
});
|
||||
textures.set(handle, tex);
|
||||
textureViews.set(handle, tex.createView({
|
||||
dimension: "2d-array",
|
||||
arrayLayerCount: layerCount,
|
||||
}));
|
||||
return handle;
|
||||
};
|
||||
env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => {
|
||||
const tex = textures.get(handle);
|
||||
if (!tex) return;
|
||||
const srcBPR = w * 4;
|
||||
const alignedBPR = (srcBPR + 255) & ~255;
|
||||
if (alignedBPR === srcBPR) {
|
||||
queue.writeTexture(
|
||||
{ texture: tex, origin: [0, 0, layer] },
|
||||
memU8().subarray(srcPtr, srcPtr + byteSize),
|
||||
{ bytesPerRow: srcBPR, rowsPerImage: h },
|
||||
{ width: w, height: h, depthOrArrayLayers: 1 }
|
||||
);
|
||||
} else {
|
||||
const staging = new Uint8Array(alignedBPR * h);
|
||||
const src = memU8();
|
||||
for (let y = 0; y < h; y++) {
|
||||
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
|
||||
y * alignedBPR);
|
||||
}
|
||||
queue.writeTexture(
|
||||
{ texture: tex, origin: [0, 0, layer] },
|
||||
staging,
|
||||
{ bytesPerRow: alignedBPR, rowsPerImage: h },
|
||||
{ width: w, height: h, depthOrArrayLayers: 1 }
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => {
|
||||
const tex = textures.get(handle);
|
||||
if (!tex) return;
|
||||
// queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp
|
||||
// when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4.
|
||||
const srcBPR = w * 4;
|
||||
const alignedBPR = (srcBPR + 255) & ~255;
|
||||
if (alignedBPR === srcBPR) {
|
||||
// Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64).
|
||||
queue.writeTexture(
|
||||
{ texture: tex },
|
||||
memU8().subarray(srcPtr, srcPtr + byteSize),
|
||||
{ bytesPerRow: srcBPR, rowsPerImage: h },
|
||||
{ width: w, height: h }
|
||||
);
|
||||
} else {
|
||||
// Repack into a 256-aligned staging buffer. One alloc per Update,
|
||||
// freed when the function returns — fine for asset-load time use.
|
||||
const staging = new Uint8Array(alignedBPR * h);
|
||||
const src = memU8();
|
||||
for (let y = 0; y < h; y++) {
|
||||
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
|
||||
y * alignedBPR);
|
||||
}
|
||||
queue.writeTexture(
|
||||
{ texture: tex },
|
||||
staging,
|
||||
{ bytesPerRow: alignedBPR, rowsPerImage: h },
|
||||
{ width: w, height: h }
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
env.wgpuCreateLinearClampSampler = () => {
|
||||
const handle = newHandle();
|
||||
samplers.set(handle, device.createSampler({
|
||||
|
|
@ -756,6 +851,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
|
|||
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
||||
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
|
||||
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
|
||||
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
||||
]})
|
||||
: device.createBindGroupLayout({ entries: [
|
||||
{ binding: 0, visibility: GPUShaderStage.COMPUTE,
|
||||
|
|
@ -773,9 +869,10 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
|
|||
if (byGroup.has(g)) {
|
||||
const entries = byGroup.get(g).map(b => {
|
||||
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
|
||||
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
|
||||
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
|
||||
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
|
||||
else if (b.kind === 2) e.sampler = { type: "filtering" };
|
||||
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
|
||||
return e;
|
||||
});
|
||||
bgls.push(device.createBindGroupLayout({ entries }));
|
||||
|
|
@ -839,6 +936,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
|
|||
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
|
||||
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
|
||||
{ binding: 6, resource: outView },
|
||||
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
|
||||
],
|
||||
});
|
||||
state.pass.setBindGroup(1, rtBG);
|
||||
|
|
@ -858,6 +956,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
|
|||
if (b.kind === 0) resource = { buffer: buffers.get(h) };
|
||||
else if (b.kind === 1) resource = textureViews.get(h);
|
||||
else if (b.kind === 2) resource = samplers.get(h);
|
||||
else if (b.kind === 3) resource = textureViews.get(h);
|
||||
return { binding: b.binding, resource };
|
||||
});
|
||||
const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
|
||||
|
|
@ -981,6 +1080,12 @@ struct BVHNode {
|
|||
};
|
||||
|
||||
// Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
|
||||
// attribsOffset is the per-mesh base index (in u32 words) into the
|
||||
// vertexAttribs heap; meshes registered without per-vertex attribs leave
|
||||
// it 0 (the heap entries at that range are also 0 / never touched). The
|
||||
// per-vertex stride lives in the user's WGSL — the library doesn't store
|
||||
// it because the layout is example-defined (Sponza uses 8 u32 / vertex
|
||||
// for VertexNormalTangentUVPacked).
|
||||
struct MeshRecord {
|
||||
rootAabbMin: vec3<f32>,
|
||||
vertexOffset: u32,
|
||||
|
|
@ -989,7 +1094,7 @@ struct MeshRecord {
|
|||
bvhOffset: u32,
|
||||
primRemapOffset: u32,
|
||||
triangleCount: u32,
|
||||
_pad: u32,
|
||||
attribsOffset: u32,
|
||||
};
|
||||
|
||||
// Per-instance TLAS record built by the TLAS-build compute pass.
|
||||
|
|
@ -1048,6 +1153,7 @@ const rtWgslMegakernelBindings = String.raw`
|
|||
@group(1) @binding(4) var<storage,read> indices : array<u32>;
|
||||
@group(1) @binding(5) var<storage,read> primRemap : array<u32>;
|
||||
@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
|
||||
@group(1) @binding(7) var<storage,read> vertexAttribs : array<u32>;
|
||||
`;
|
||||
|
||||
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
|
||||
|
|
@ -1565,6 +1671,7 @@ const rtState = {
|
|||
indexHeap: null, // u32 stream
|
||||
bvhHeap: null, // BVHNode stream (32 bytes per node)
|
||||
primRemapHeap: null, // u32 stream
|
||||
attribsHeap: null, // u32 stream (per-vertex attribute payload; example-defined stride)
|
||||
|
||||
meshRecordsBuffer: null, // GPUBuffer of MeshRecord[]
|
||||
meshRecordsCapacity: 0,
|
||||
|
|
@ -1588,6 +1695,7 @@ function rtInit() {
|
|||
rtState.indexHeap = makeRtHeap();
|
||||
rtState.bvhHeap = makeRtHeap();
|
||||
rtState.primRemapHeap = makeRtHeap();
|
||||
rtState.attribsHeap = makeRtHeap();
|
||||
rtState.meshRecordsCapacity = 16;
|
||||
rtState.meshRecordsBuffer = device.createBuffer({
|
||||
size: rtState.meshRecordsCapacity * 48,
|
||||
|
|
@ -1634,23 +1742,30 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
|
|||
verticesPtr, vertexCount,
|
||||
indicesPtr, indexCount,
|
||||
bvhNodesPtr, bvhNodeCount,
|
||||
primRemapPtr, primRemapCount) => {
|
||||
primRemapPtr, primRemapCount,
|
||||
attribsPtr, attribsByteCount) => {
|
||||
if (!rtState.vertHeap) rtInit();
|
||||
console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`);
|
||||
|
||||
const vBytes = vertexCount * 12;
|
||||
const iBytes = indexCount * 4;
|
||||
const nBytes = bvhNodeCount * 32;
|
||||
const rBytes = primRemapCount * 4;
|
||||
// attribsByteCount must be a multiple of 4 (the heap is array<u32>).
|
||||
// Round up the upload size; the in-MeshRecord offset is in u32 words.
|
||||
const aBytes = (attribsByteCount + 3) & ~3;
|
||||
|
||||
rtHeapEnsure(rtState.vertHeap, vBytes);
|
||||
rtHeapEnsure(rtState.indexHeap, iBytes);
|
||||
rtHeapEnsure(rtState.bvhHeap, nBytes);
|
||||
rtHeapEnsure(rtState.primRemapHeap, rBytes);
|
||||
if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes);
|
||||
|
||||
const vOff = rtState.vertHeap.cursor / 12; // in vec3 units
|
||||
const iOff = rtState.indexHeap.cursor / 4; // in u32 units
|
||||
const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units
|
||||
const rOff = rtState.primRemapHeap.cursor / 4;
|
||||
const aOff = rtState.attribsHeap.cursor / 4; // in u32 units
|
||||
|
||||
// queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
|
||||
// already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
|
||||
|
|
@ -1662,11 +1777,16 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
|
|||
memU8().buffer, bvhNodesPtr, nBytes);
|
||||
queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
|
||||
memU8().buffer, primRemapPtr, rBytes);
|
||||
if (aBytes > 0) {
|
||||
queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor,
|
||||
memU8().buffer, attribsPtr, aBytes);
|
||||
}
|
||||
|
||||
rtState.vertHeap.cursor += vBytes;
|
||||
rtState.indexHeap.cursor += iBytes;
|
||||
rtState.bvhHeap.cursor += nBytes;
|
||||
rtState.primRemapHeap.cursor += rBytes;
|
||||
rtState.attribsHeap.cursor += aBytes;
|
||||
|
||||
const handle = rtState.nextMeshHandle++;
|
||||
rtMeshRecordsEnsure(handle + 1);
|
||||
|
|
@ -1682,7 +1802,7 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
|
|||
u32[8] = nOff;
|
||||
u32[9] = rOff;
|
||||
u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
|
||||
u32[11] = 0;
|
||||
u32[11] = aOff;
|
||||
queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
|
||||
|
||||
return handle;
|
||||
|
|
@ -1734,9 +1854,13 @@ env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
|
|||
|
||||
// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
|
||||
// switches + raygen + @compute entry) with the library prelude/helpers.
|
||||
const rtPipelines = new Map(); // handle → { pipeline, bgls }
|
||||
// `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte
|
||||
// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the
|
||||
// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo
|
||||
// textures, samplers). Pass (0, 0) for a pipeline with no user bindings.
|
||||
const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups }
|
||||
|
||||
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
|
||||
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
|
||||
if (!rtState.vertHeap) rtInit();
|
||||
const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
|
||||
|
||||
|
|
@ -1751,6 +1875,31 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
|
|||
}
|
||||
const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
|
||||
|
||||
// Parse user bindings (same wire format as wgpuLoadCustomShader).
|
||||
const userBindings = [];
|
||||
if (bindingsCount > 0) {
|
||||
const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
|
||||
for (let i = 0; i < bindingsCount; i++) {
|
||||
const g = dv.getUint8(i*8 + 0);
|
||||
if (g < 2) {
|
||||
console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`);
|
||||
return 0;
|
||||
}
|
||||
userBindings.push({
|
||||
group: g,
|
||||
binding: dv.getUint8(i*8 + 1),
|
||||
kind: dv.getUint8(i*8 + 2),
|
||||
pushOffset: dv.getUint32(i*8 + 4, true),
|
||||
});
|
||||
}
|
||||
}
|
||||
const byGroup = new Map();
|
||||
for (const b of userBindings) {
|
||||
if (!byGroup.has(b.group)) byGroup.set(b.group, []);
|
||||
byGroup.get(b.group).push(b);
|
||||
}
|
||||
const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
|
||||
|
||||
let pipeline;
|
||||
try {
|
||||
const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
|
||||
|
|
@ -1768,13 +1917,34 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
|
|||
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
||||
{ binding: 6, visibility: GPUShaderStage.COMPUTE,
|
||||
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
|
||||
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
||||
]});
|
||||
// User binding-group layouts. WebGPU pipeline layouts need a
|
||||
// contiguous array up to the highest group used, so pad any gaps
|
||||
// with empty bgls (same rule as wgpuLoadCustomShader).
|
||||
const userBgls = [];
|
||||
const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
|
||||
for (let g = 2; g <= highest; g++) {
|
||||
if (byGroup.has(g)) {
|
||||
const entries = byGroup.get(g).map(b => {
|
||||
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
|
||||
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
|
||||
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
|
||||
else if (b.kind === 2) e.sampler = { type: "filtering" };
|
||||
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
|
||||
return e;
|
||||
});
|
||||
userBgls.push(device.createBindGroupLayout({ entries }));
|
||||
} else {
|
||||
userBgls.push(device.createBindGroupLayout({ entries: [] }));
|
||||
}
|
||||
}
|
||||
pipeline = device.createComputePipeline({
|
||||
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }),
|
||||
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }),
|
||||
compute: { module: mod, entryPoint: "main" },
|
||||
});
|
||||
const handle = newHandle();
|
||||
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl });
|
||||
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups });
|
||||
return handle;
|
||||
} catch (e) {
|
||||
console.error("[crafter-wgpu] RT pipeline compile failed:", e);
|
||||
|
|
@ -1784,7 +1954,8 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
|
|||
};
|
||||
|
||||
env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
|
||||
tlasBufHandle, instanceCount, gx, gy) => {
|
||||
tlasBufHandle, instanceCount, gx, gy,
|
||||
handlesPtr, handlesCount) => {
|
||||
if (!state.pass) return;
|
||||
const pipe = rtPipelines.get(pipelineHandle);
|
||||
const tlas = buffers.get(tlasBufHandle);
|
||||
|
|
@ -1815,12 +1986,41 @@ env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
|
|||
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
|
||||
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
|
||||
{ binding: 6, resource: outView },
|
||||
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
|
||||
],
|
||||
});
|
||||
|
||||
state.pass.setPipeline(pipe.pipeline);
|
||||
state.pass.setBindGroup(0, headerBg);
|
||||
state.pass.setBindGroup(1, dataBg);
|
||||
|
||||
// User bindings: walk byGroup in the same sorted order the C++ side
|
||||
// packed handles[], picking up indices linearly.
|
||||
if (handlesCount > 0) {
|
||||
const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
|
||||
let handleIdx = 0;
|
||||
let bglIdx = 0;
|
||||
for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) {
|
||||
if (pipe.byGroup.has(g)) {
|
||||
const entries = pipe.byGroup.get(g).map(b => {
|
||||
const h = handles[handleIdx++];
|
||||
let resource;
|
||||
if (b.kind === 0) resource = { buffer: buffers.get(h) };
|
||||
else if (b.kind === 1) resource = textureViews.get(h);
|
||||
else if (b.kind === 2) resource = samplers.get(h);
|
||||
else if (b.kind === 3) resource = textureViews.get(h);
|
||||
return { binding: b.binding, resource };
|
||||
});
|
||||
const bg = device.createBindGroup({
|
||||
layout: pipe.userBgls[bglIdx],
|
||||
entries,
|
||||
});
|
||||
state.pass.setBindGroup(g, bg);
|
||||
}
|
||||
bglIdx++;
|
||||
}
|
||||
}
|
||||
|
||||
state.pass.dispatchWorkgroups(gx, gy, 1);
|
||||
state.outIsPing = !state.outIsPing;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue