webgpu sponza

This commit is contained in:
Jorijn van der Graaf 2026-05-19 00:27:09 +02:00
commit b5d0f52da0
21 changed files with 1426 additions and 58 deletions

View file

@ -43,6 +43,8 @@ function stub(name) {
"wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight", "wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight",
"wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer", "wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer",
"wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture", "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
"wgpuCreateImage2D", "wgpuWriteImage2D",
"wgpuCreateImage2DArray", "wgpuWriteImage2DLayer",
"wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd", "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
"wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText", "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
"wgpuLoadCustomShader", "wgpuDispatchCustom", "wgpuLoadCustomShader", "wgpuDispatchCustom",
@ -580,6 +582,99 @@ env.wgpuDestroyTexture = (handle) => {
if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); } if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); }
}; };
// General-purpose 2D rgba8unorm texture, used by Image2D<RGBA8>. Distinct
// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot
// upload of a whole image, sized to the pixel data the caller hands over.
env.wgpuCreateImage2D = (w, h) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h],
format: "rgba8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView());
return handle;
};
// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by
// Image2DArray<RGBA8> to back one material albedo per layer; shaders
// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`.
env.wgpuCreateImage2DArray = (w, h, layerCount) => {
const handle = newHandle();
const tex = device.createTexture({
size: [w, h, layerCount],
dimension: "2d",
format: "rgba8unorm",
usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
});
textures.set(handle, tex);
textureViews.set(handle, tex.createView({
dimension: "2d-array",
arrayLayerCount: layerCount,
}));
return handle;
};
env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => {
const tex = textures.get(handle);
if (!tex) return;
const srcBPR = w * 4;
const alignedBPR = (srcBPR + 255) & ~255;
if (alignedBPR === srcBPR) {
queue.writeTexture(
{ texture: tex, origin: [0, 0, layer] },
memU8().subarray(srcPtr, srcPtr + byteSize),
{ bytesPerRow: srcBPR, rowsPerImage: h },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
} else {
const staging = new Uint8Array(alignedBPR * h);
const src = memU8();
for (let y = 0; y < h; y++) {
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex, origin: [0, 0, layer] },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: h },
{ width: w, height: h, depthOrArrayLayers: 1 }
);
}
};
env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => {
const tex = textures.get(handle);
if (!tex) return;
// queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp
// when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4.
const srcBPR = w * 4;
const alignedBPR = (srcBPR + 255) & ~255;
if (alignedBPR === srcBPR) {
// Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64).
queue.writeTexture(
{ texture: tex },
memU8().subarray(srcPtr, srcPtr + byteSize),
{ bytesPerRow: srcBPR, rowsPerImage: h },
{ width: w, height: h }
);
} else {
// Repack into a 256-aligned staging buffer. One alloc per Update,
// freed when the function returns — fine for asset-load time use.
const staging = new Uint8Array(alignedBPR * h);
const src = memU8();
for (let y = 0; y < h; y++) {
staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR),
y * alignedBPR);
}
queue.writeTexture(
{ texture: tex },
staging,
{ bytesPerRow: alignedBPR, rowsPerImage: h },
{ width: w, height: h }
);
}
};
env.wgpuCreateLinearClampSampler = () => { env.wgpuCreateLinearClampSampler = () => {
const handle = newHandle(); const handle = newHandle();
samplers.set(handle, device.createSampler({ samplers.set(handle, device.createSampler({
@ -756,6 +851,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE, { binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
]}) ]})
: device.createBindGroupLayout({ entries: [ : device.createBindGroupLayout({ entries: [
{ binding: 0, visibility: GPUShaderStage.COMPUTE, { binding: 0, visibility: GPUShaderStage.COMPUTE,
@ -773,9 +869,10 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue
if (byGroup.has(g)) { if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => { const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE }; const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
if (b.kind === 0) e.buffer = { type: "read-only-storage" }; if (b.kind === 0) e.buffer = { type: "read-only-storage" };
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" }; else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" }; else if (b.kind === 2) e.sampler = { type: "filtering" };
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
return e; return e;
}); });
bgls.push(device.createBindGroupLayout({ entries })); bgls.push(device.createBindGroupLayout({ entries }));
@ -839,6 +936,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } }, { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } }, { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView }, { binding: 6, resource: outView },
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
], ],
}); });
state.pass.setBindGroup(1, rtBG); state.pass.setBindGroup(1, rtBG);
@ -858,6 +956,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
if (b.kind === 0) resource = { buffer: buffers.get(h) }; if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h); else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h); else if (b.kind === 2) resource = samplers.get(h);
else if (b.kind === 3) resource = textureViews.get(h);
return { binding: b.binding, resource }; return { binding: b.binding, resource };
}); });
const bg = device.createBindGroup({ layout: pipe.bgls[g], entries }); const bg = device.createBindGroup({ layout: pipe.bgls[g], entries });
@ -981,6 +1080,12 @@ struct BVHNode {
}; };
// Per-mesh record. Indexed by RTInstance::accelerationStructureReference. // Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
// attribsOffset is the per-mesh base index (in u32 words) into the
// vertexAttribs heap; meshes registered without per-vertex attribs leave
// it 0 (the heap entries at that range are also 0 / never touched). The
// per-vertex stride lives in the user's WGSL — the library doesn't store
// it because the layout is example-defined (Sponza uses 8 u32 / vertex
// for VertexNormalTangentUVPacked).
struct MeshRecord { struct MeshRecord {
rootAabbMin: vec3<f32>, rootAabbMin: vec3<f32>,
vertexOffset: u32, vertexOffset: u32,
@ -989,7 +1094,7 @@ struct MeshRecord {
bvhOffset: u32, bvhOffset: u32,
primRemapOffset: u32, primRemapOffset: u32,
triangleCount: u32, triangleCount: u32,
_pad: u32, attribsOffset: u32,
}; };
// Per-instance TLAS record built by the TLAS-build compute pass. // Per-instance TLAS record built by the TLAS-build compute pass.
@ -1048,6 +1153,7 @@ const rtWgslMegakernelBindings = String.raw`
@group(1) @binding(4) var<storage,read> indices : array<u32>; @group(1) @binding(4) var<storage,read> indices : array<u32>;
@group(1) @binding(5) var<storage,read> primRemap : array<u32>; @group(1) @binding(5) var<storage,read> primRemap : array<u32>;
@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>; @group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
@group(1) @binding(7) var<storage,read> vertexAttribs : array<u32>;
`; `;
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings; const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
@ -1565,6 +1671,7 @@ const rtState = {
indexHeap: null, // u32 stream indexHeap: null, // u32 stream
bvhHeap: null, // BVHNode stream (32 bytes per node) bvhHeap: null, // BVHNode stream (32 bytes per node)
primRemapHeap: null, // u32 stream primRemapHeap: null, // u32 stream
attribsHeap: null, // u32 stream (per-vertex attribute payload; example-defined stride)
meshRecordsBuffer: null, // GPUBuffer of MeshRecord[] meshRecordsBuffer: null, // GPUBuffer of MeshRecord[]
meshRecordsCapacity: 0, meshRecordsCapacity: 0,
@ -1588,6 +1695,7 @@ function rtInit() {
rtState.indexHeap = makeRtHeap(); rtState.indexHeap = makeRtHeap();
rtState.bvhHeap = makeRtHeap(); rtState.bvhHeap = makeRtHeap();
rtState.primRemapHeap = makeRtHeap(); rtState.primRemapHeap = makeRtHeap();
rtState.attribsHeap = makeRtHeap();
rtState.meshRecordsCapacity = 16; rtState.meshRecordsCapacity = 16;
rtState.meshRecordsBuffer = device.createBuffer({ rtState.meshRecordsBuffer = device.createBuffer({
size: rtState.meshRecordsCapacity * 48, size: rtState.meshRecordsCapacity * 48,
@ -1634,23 +1742,30 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
verticesPtr, vertexCount, verticesPtr, vertexCount,
indicesPtr, indexCount, indicesPtr, indexCount,
bvhNodesPtr, bvhNodeCount, bvhNodesPtr, bvhNodeCount,
primRemapPtr, primRemapCount) => { primRemapPtr, primRemapCount,
attribsPtr, attribsByteCount) => {
if (!rtState.vertHeap) rtInit(); if (!rtState.vertHeap) rtInit();
console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`);
const vBytes = vertexCount * 12; const vBytes = vertexCount * 12;
const iBytes = indexCount * 4; const iBytes = indexCount * 4;
const nBytes = bvhNodeCount * 32; const nBytes = bvhNodeCount * 32;
const rBytes = primRemapCount * 4; const rBytes = primRemapCount * 4;
// attribsByteCount must be a multiple of 4 (the heap is array<u32>).
// Round up the upload size; the in-MeshRecord offset is in u32 words.
const aBytes = (attribsByteCount + 3) & ~3;
rtHeapEnsure(rtState.vertHeap, vBytes); rtHeapEnsure(rtState.vertHeap, vBytes);
rtHeapEnsure(rtState.indexHeap, iBytes); rtHeapEnsure(rtState.indexHeap, iBytes);
rtHeapEnsure(rtState.bvhHeap, nBytes); rtHeapEnsure(rtState.bvhHeap, nBytes);
rtHeapEnsure(rtState.primRemapHeap, rBytes); rtHeapEnsure(rtState.primRemapHeap, rBytes);
if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes);
const vOff = rtState.vertHeap.cursor / 12; // in vec3 units const vOff = rtState.vertHeap.cursor / 12; // in vec3 units
const iOff = rtState.indexHeap.cursor / 4; // in u32 units const iOff = rtState.indexHeap.cursor / 4; // in u32 units
const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units
const rOff = rtState.primRemapHeap.cursor / 4; const rOff = rtState.primRemapHeap.cursor / 4;
const aOff = rtState.attribsHeap.cursor / 4; // in u32 units
// queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is // queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
// already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4. // already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
@ -1662,11 +1777,16 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
memU8().buffer, bvhNodesPtr, nBytes); memU8().buffer, bvhNodesPtr, nBytes);
queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor, queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
memU8().buffer, primRemapPtr, rBytes); memU8().buffer, primRemapPtr, rBytes);
if (aBytes > 0) {
queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor,
memU8().buffer, attribsPtr, aBytes);
}
rtState.vertHeap.cursor += vBytes; rtState.vertHeap.cursor += vBytes;
rtState.indexHeap.cursor += iBytes; rtState.indexHeap.cursor += iBytes;
rtState.bvhHeap.cursor += nBytes; rtState.bvhHeap.cursor += nBytes;
rtState.primRemapHeap.cursor += rBytes; rtState.primRemapHeap.cursor += rBytes;
rtState.attribsHeap.cursor += aBytes;
const handle = rtState.nextMeshHandle++; const handle = rtState.nextMeshHandle++;
rtMeshRecordsEnsure(handle + 1); rtMeshRecordsEnsure(handle + 1);
@ -1682,7 +1802,7 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
u32[8] = nOff; u32[8] = nOff;
u32[9] = rOff; u32[9] = rOff;
u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0; u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
u32[11] = 0; u32[11] = aOff;
queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec); queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
return handle; return handle;
@ -1734,9 +1854,13 @@ env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega // RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
// switches + raygen + @compute entry) with the library prelude/helpers. // switches + raygen + @compute entry) with the library prelude/helpers.
const rtPipelines = new Map(); // handle → { pipeline, bgls } // `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte
// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the
// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo
// textures, samplers). Pass (0, 0) for a pipeline with no user bindings.
const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups }
env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => { env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
if (!rtState.vertHeap) rtInit(); if (!rtState.vertHeap) rtInit();
const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen)); const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
@ -1751,6 +1875,31 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
} }
const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers; const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
// Parse user bindings (same wire format as wgpuLoadCustomShader).
const userBindings = [];
if (bindingsCount > 0) {
const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
for (let i = 0; i < bindingsCount; i++) {
const g = dv.getUint8(i*8 + 0);
if (g < 2) {
console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`);
return 0;
}
userBindings.push({
group: g,
binding: dv.getUint8(i*8 + 1),
kind: dv.getUint8(i*8 + 2),
pushOffset: dv.getUint32(i*8 + 4, true),
});
}
}
const byGroup = new Map();
for (const b of userBindings) {
if (!byGroup.has(b.group)) byGroup.set(b.group, []);
byGroup.get(b.group).push(b);
}
const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b);
let pipeline; let pipeline;
try { try {
const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" }); const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
@ -1768,13 +1917,34 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
{ binding: 6, visibility: GPUShaderStage.COMPUTE, { binding: 6, visibility: GPUShaderStage.COMPUTE,
storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
{ binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
]}); ]});
// User binding-group layouts. WebGPU pipeline layouts need a
// contiguous array up to the highest group used, so pad any gaps
// with empty bgls (same rule as wgpuLoadCustomShader).
const userBgls = [];
const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1;
for (let g = 2; g <= highest; g++) {
if (byGroup.has(g)) {
const entries = byGroup.get(g).map(b => {
const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE };
if (b.kind === 0) e.buffer = { type: "read-only-storage" };
else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" };
else if (b.kind === 2) e.sampler = { type: "filtering" };
else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" };
return e;
});
userBgls.push(device.createBindGroupLayout({ entries }));
} else {
userBgls.push(device.createBindGroupLayout({ entries: [] }));
}
}
pipeline = device.createComputePipeline({ pipeline = device.createComputePipeline({
layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }), layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }),
compute: { module: mod, entryPoint: "main" }, compute: { module: mod, entryPoint: "main" },
}); });
const handle = newHandle(); const handle = newHandle();
rtPipelines.set(handle, { pipeline, headerBgl, dataBgl }); rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups });
return handle; return handle;
} catch (e) { } catch (e) {
console.error("[crafter-wgpu] RT pipeline compile failed:", e); console.error("[crafter-wgpu] RT pipeline compile failed:", e);
@ -1784,7 +1954,8 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
}; };
env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes, env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
tlasBufHandle, instanceCount, gx, gy) => { tlasBufHandle, instanceCount, gx, gy,
handlesPtr, handlesCount) => {
if (!state.pass) return; if (!state.pass) return;
const pipe = rtPipelines.get(pipelineHandle); const pipe = rtPipelines.get(pipelineHandle);
const tlas = buffers.get(tlasBufHandle); const tlas = buffers.get(tlasBufHandle);
@ -1815,12 +1986,41 @@ env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
{ binding: 4, resource: { buffer: rtState.indexHeap.gpu } }, { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
{ binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } }, { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
{ binding: 6, resource: outView }, { binding: 6, resource: outView },
{ binding: 7, resource: { buffer: rtState.attribsHeap.gpu } },
], ],
}); });
state.pass.setPipeline(pipe.pipeline); state.pass.setPipeline(pipe.pipeline);
state.pass.setBindGroup(0, headerBg); state.pass.setBindGroup(0, headerBg);
state.pass.setBindGroup(1, dataBg); state.pass.setBindGroup(1, dataBg);
// User bindings: walk byGroup in the same sorted order the C++ side
// packed handles[], picking up indices linearly.
if (handlesCount > 0) {
const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount);
let handleIdx = 0;
let bglIdx = 0;
for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) {
if (pipe.byGroup.has(g)) {
const entries = pipe.byGroup.get(g).map(b => {
const h = handles[handleIdx++];
let resource;
if (b.kind === 0) resource = { buffer: buffers.get(h) };
else if (b.kind === 1) resource = textureViews.get(h);
else if (b.kind === 2) resource = samplers.get(h);
else if (b.kind === 3) resource = textureViews.get(h);
return { binding: b.binding, resource };
});
const bg = device.createBindGroup({
layout: pipe.userBgls[bglIdx],
entries,
});
state.pass.setBindGroup(g, bg);
}
bglIdx++;
}
}
state.pass.dispatchWorkgroups(gx, gy, 1); state.pass.dispatchWorkgroups(gx, gy, 1);
state.outIsPing = !state.outIsPing; state.outIsPing = !state.outIsPing;
}; };

58
examples/Sponza/README.md Normal file
View file

@ -0,0 +1,58 @@
# Sponza example
Loads the Sponza atrium as a `.cmesh` + one albedo `.ctex` and renders
it via ray tracing on both Vulkan (native) and WebGPU (wasm). Same
`main.cpp`, `#ifdef CRAFTER_GRAPHICS_WINDOW_DOM` selects the backend.
## What this example proves
- `.cmesh` and `.ctex` decompression round-trip on both backends
(GPU via `VK_EXT_memory_decompression` on Vulkan, CPU via
`Compression::DecompressCPU` on WebGPU).
- A single texture binding flowing from `Image2D<RGBA8>` through the
RT pipeline's closest-hit on both backends. The closest-hit samples
at the barycentric attribs as UVs — proof-of-binding, not visually
accurate. Per-vertex UV interpolation is the next step.
## Asset fetch
`project.cpp` calls `Crafter::GitFetch(...)` on
[https://github.com/jimmiebergmann/Sponza](https://github.com/jimmiebergmann/Sponza)
(pinned to commit `222338979d32f4f4818466291bdbc29f192b86ba`). The
clone lands in the per-user crafter-build cache; first build pulls
~280 MB once, subsequent builds reuse it.
`cfg.assets` then picks two files out of that clone:
| Source | Compressed output |
|-----------------------------------------|-------------------------|
| `sponza.obj` | `sponza.cmesh` |
| `textures/sponza_arch_diff.tga` | `sponza_arch_diff.ctex` |
Both land flat in the example's bin directory.
## Building
```
crafter build # native Vulkan
crafter build --target=wasm32-wasip1 # WebGPU / wasm
```
## License & attribution
Sponza geometry, materials, and textures are licensed under
[CC BY 3.0](https://creativecommons.org/licenses/by/3.0/).
- **Original model:** Frank Meinl, Crytek (2010).
- **OBJ packaging / cleanup:** Morgan McGuire, McGuire Computer
Graphics Archive — https://casual-effects.com/data.
- **GitHub mirror used here:** Jimmie Bergmann's roof-material fixup —
https://github.com/jimmiebergmann/Sponza.
When redistributing builds of this example that bundle the compressed
Sponza outputs (`*.cmesh`, `*.ctex`), the CC BY 3.0 attribution
requirement applies. Quoting the original credit somewhere visible to
end users (about-screen, credits page, etc.) is enough.
The Crafter.Graphics library code itself is LGPL-3.0; the two
licenses are compatible for data + code distribution.

View file

@ -0,0 +1,23 @@
#version 460
#extension GL_EXT_ray_tracing : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
#extension GL_EXT_descriptor_heap : enable
#extension GL_EXT_nonuniform_qualifier : enable
// Specialization constant: descriptor-heap slot of the albedo texture.
// Set from descriptorHeap.bufferStartElement + the slot allocated for
// the Image2D<RGBA8> on the host side. Sampling uses gl_HitAttributeEXT
// barycentrics as UVs — proof-of-binding rather than UV-correct shading.
// Per-vertex UV interpolation lands when Mesh on Vulkan exposes the
// data-region buffer.
layout(constant_id = 0) const uint16_t albedoSlot = 0us;
layout(descriptor_heap) uniform sampler2D albedo[];
hitAttributeEXT vec2 hitAttrs;
layout(location = 0) rayPayloadInEXT vec3 hitValue;
void main() {
vec2 bary = vec2(hitAttrs.x, hitAttrs.y);
hitValue = texture(albedo[albedoSlot], bary).rgb;
}

View file

@ -0,0 +1,90 @@
// Payload declared here so the WGSL assembler sees it before raygen
// (the assembler concatenates closesthit/anyhit/miss BEFORE raygen).
//
// WGSL forbids cycles in the function call graph, so closesthit_main
// CAN'T call traceRay (that would create closesthit traceRay
// runClosestHit closesthit). The lighting + shadow trace therefore
// happens in raygen; closesthit's job is just to gather surface data
// into the payload.
//
// shadowRay = 0 (primary): closesthit fills albedo/worldPos/normal/hit.
// shadowRay = 1 (shadow): closesthit is skipped (RT_FLAG_SKIP_CLOSEST_HIT),
// miss flips color to white = "lit".
struct Payload {
color: vec3<f32>,
shadowRay: u32,
worldPos: vec3<f32>,
hit: u32,
worldNormal: vec3<f32>,
_pad: f32,
};
// User-bound resources at group(2). Matches the UICustomBinding span the
// host hands to PipelineRTWebGPU::Init.
// binding 0 albedo texture_2d_array, one layer per Sponza material
// binding 1 sampler (linear clamp)
// binding 2 camera storage buffer (read by raygen only)
@group(2) @binding(0) var albedos : texture_2d_array<f32>;
@group(2) @binding(1) var samp : sampler;
// VertexNormalTangentUVPacked is `packed` on the outer struct but each
// inner `Vector<float, N, 4>` is SIMD-aligned to a 16-byte stride. So
// each vertex is 12 u32 words: normal at 0..2, tangent at 4..6, uv at 8..9.
const ATTRIB_STRIDE_U32: u32 = 12u;
const ATTRIB_NORMAL_OFFSET: u32 = 0u;
const ATTRIB_UV_OFFSET: u32 = 8u;
fn fetchUV(meshRec: MeshRecord, vertexIdx: u32) -> vec2<f32> {
let base = meshRec.attribsOffset + vertexIdx * ATTRIB_STRIDE_U32 + ATTRIB_UV_OFFSET;
return vec2<f32>(
bitcast<f32>(vertexAttribs[base + 0u]),
bitcast<f32>(vertexAttribs[base + 1u]),
);
}
fn fetchNormal(meshRec: MeshRecord, vertexIdx: u32) -> vec3<f32> {
let base = meshRec.attribsOffset + vertexIdx * ATTRIB_STRIDE_U32 + ATTRIB_NORMAL_OFFSET;
return vec3<f32>(
bitcast<f32>(vertexAttribs[base + 0u]),
bitcast<f32>(vertexAttribs[base + 1u]),
bitcast<f32>(vertexAttribs[base + 2u]),
);
}
fn closesthit_main(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {
// Resolve hit triangle 3 vertex indices.
let meshIdx = tlasEntries[hit.instanceId].blasMeshIdx;
let meshRec = meshRecords[meshIdx];
let baseIdx = meshRec.indexOffset + hit.primitiveId * 3u;
let i0 = indices[baseIdx + 0u];
let i1 = indices[baseIdx + 1u];
let i2 = indices[baseIdx + 2u];
let bary = vec3<f32>(1.0 - hit.attribs.x - hit.attribs.y, hit.attribs.x, hit.attribs.y);
// Albedo via barycentric UV interpolation.
let uv0 = fetchUV(meshRec, i0);
let uv1 = fetchUV(meshRec, i1);
let uv2 = fetchUV(meshRec, i2);
let uv = uv0 * bary.x + uv1 * bary.y + uv2 * bary.z;
// OBJ V is bottom-up; sampler is top-down. fract for manual tiling.
let uvTiled = vec2<f32>(fract(uv.x), fract(1.0 - uv.y));
let layer = i32(hit.customIndex);
let albedo = textureSampleLevel(albedos, samp, uvTiled, layer, 0.0).rgb;
// World-space smooth shading normal. Multiply through the
// object-to-world rotation so this stays correct if a future scene
// rotates instances (Sponza itself is all identities).
let n0 = fetchNormal(meshRec, i0);
let n1 = fetchNormal(meshRec, i1);
let n2 = fetchNormal(meshRec, i2);
let nObj = normalize(n0 * bary.x + n1 * bary.y + n2 * bary.z);
let nWorld = normalize(vec3<f32>(
dot(hit.objectToWorldR0.xyz, nObj),
dot(hit.objectToWorldR1.xyz, nObj),
dot(hit.objectToWorldR2.xyz, nObj)));
(*payload).color = albedo;
(*payload).worldPos = ray.origin + ray.direction * hit.t;
(*payload).worldNormal = nWorld;
(*payload).hit = 1u;
}

445
examples/Sponza/main.cpp Normal file
View file

@ -0,0 +1,445 @@
// Sponza on Vulkan + WebGPU. Same example source, two backends — picked
// by CRAFTER_GRAPHICS_WINDOW_DOM. Both paths:
// 1. Load a Sponza .cmesh (positions + indices, optional per-vertex
// data region) and a single albedo .ctex from disk. The source
// assets are fetched once by project.cpp (Crafter.Build::GitFetch)
// from https://github.com/jimmiebergmann/Sponza and compressed
// into the bin dir at build time — they don't live in this repo.
// 2. Build BLAS + TLAS via the existing Mesh / RenderingElement3D
// flow. The on-disk format is identical between backends; only
// the decompression path differs (VK_EXT_memory_decompression
// on Vulkan, CPU GDeflate on WebGPU).
// 3. Upload the albedo as Image2D<RGBA8>, register it in the
// backend descriptor heap, and run the RT pipeline. Closest-hit
// shaders sample the texture at the hit's barycentric coords —
// proof-of-binding rather than UV-correct shading. Per-vertex
// UV interpolation is follow-up work (the attribs heap is in
// place on WebGPU; the Vulkan side needs a sibling data buffer
// exposed off Mesh).
//
// Sponza model: CC BY 3.0 — Frank Meinl (Crytek), packaged by Jimmie
// Bergmann and Morgan McGuire. https://casual-effects.com/data
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
#include "vulkan/vulkan.h"
#endif
import Crafter.Graphics;
import Crafter.Asset;
import Crafter.Math;
import Crafter.Event;
import std;
using namespace Crafter;
namespace fs = std::filesystem;
namespace {
struct RGBA8 { std::uint8_t r, g, b, a; };
void RequireAssets(const fs::path& mesh, const fs::path& tex) {
const bool haveMesh = fs::exists(mesh);
const bool haveTex = fs::exists(tex);
if (haveMesh && haveTex) return;
std::println(std::cerr,
"[Sponza] missing asset(s):\n"
" mesh: {} {}\n"
" albedo: {} {}\n"
"The build should have populated these via cfg.assets +\n"
"GitFetch (see examples/Sponza/project.cpp). If you ran\n"
"the binary from outside its bin dir, cd into the bin dir\n"
"first — asset paths are relative to cwd.",
mesh.string(), haveMesh ? "OK" : "MISSING",
tex.string(), haveTex ? "OK" : "MISSING");
std::abort();
}
}
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
int main() {
// Native Vulkan path is single-material for now (see file header) —
// pick up just the first per-material output the build emits. The
// WebGPU branch below uses every mesh + a texture array.
const fs::path meshPath = "mesh_0.cmesh";
const fs::path texPath = "tex_0.ctex";
RequireAssets(meshPath, texPath);
CompressedMeshAsset loadedMesh = LoadCompressedMesh(meshPath);
CompressedTextureAsset loadedTex = LoadCompressedTexture(texPath);
std::println("[Sponza] loaded {} verts, {} idx, {}x{} albedo",
loadedMesh.vertexCount, loadedMesh.indexCount,
loadedTex.sizeX, loadedTex.sizeY);
Device::Initialize();
Window window(1280, 720, "Sponza");
VkCommandBuffer cmd = window.StartInit();
DescriptorHeapVulkan descriptorHeap;
descriptorHeap.Initialize(/*images*/ 2, /*buffers*/ 1, /*samplers*/ 0);
// Two specialization constants: the TLAS slot offset (shared with
// VulkanTriangle pattern) and the albedo slot index for closesthit.
VkSpecializationMapEntry raygenEntry = { .constantID = 0, .offset = 0, .size = sizeof(std::uint16_t) };
VkSpecializationInfo raygenSpec = {
.mapEntryCount = 1, .pMapEntries = &raygenEntry,
.dataSize = sizeof(std::uint16_t), .pData = &descriptorHeap.bufferStartElement,
};
// Allocate the albedo slot first so its index is known when we
// compile closesthit.spv.
auto imgSlots = descriptorHeap.AllocateImageSlots(2);
auto bufSlots = descriptorHeap.AllocateBufferSlots(1);
std::uint16_t albedoHeapSlot = static_cast<std::uint16_t>(imgSlots.firstElement + 1);
VkSpecializationMapEntry hitEntry = { .constantID = 0, .offset = 0, .size = sizeof(std::uint16_t) };
VkSpecializationInfo hitSpec = {
.mapEntryCount = 1, .pMapEntries = &hitEntry,
.dataSize = sizeof(std::uint16_t), .pData = &albedoHeapSlot,
};
std::array<VulkanShader, 3> shaders {{
{ "raygen.spv", "main", VK_SHADER_STAGE_RAYGEN_BIT_KHR, &raygenSpec },
{ "miss.spv", "main", VK_SHADER_STAGE_MISS_BIT_KHR, nullptr },
{ "closesthit.spv", "main", VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, &hitSpec },
}};
ShaderBindingTableVulkan shaderTable;
shaderTable.Init(shaders);
std::array<VkRayTracingShaderGroupCreateInfoKHR, 1> raygenGroups {{ {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
.generalShader = 0, .closestHitShader = VK_SHADER_UNUSED_KHR,
.anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR,
} }};
std::array<VkRayTracingShaderGroupCreateInfoKHR, 1> missGroups {{ {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
.generalShader = 1, .closestHitShader = VK_SHADER_UNUSED_KHR,
.anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR,
} }};
std::array<VkRayTracingShaderGroupCreateInfoKHR, 1> hitGroups {{ {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR,
.generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = 2,
.anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR,
} }};
PipelineRTVulkan pipeline;
pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, shaderTable);
Mesh sponzaMesh;
sponzaMesh.Build(loadedMesh, cmd);
Image2D<RGBA8> albedo;
albedo.Create(loadedTex.sizeX, loadedTex.sizeY, /*mipLevels*/ 1, cmd,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
albedo.Update(loadedTex, cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
SamplerVulkan<RGBA8> sampler;
static RenderingElement3D renderer;
renderer.instance = {
.transform = {},
.instanceCustomIndex = 0,
.mask = 0xFF,
.instanceShaderBindingTableRecordOffset = 0,
.flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR,
.accelerationStructureReference = sponzaMesh.blasAddr,
};
MatrixRowMajor<float, 4, 3, 1>::Identity()
.Store(reinterpret_cast<float*>(renderer.instance.transform.matrix));
RenderingElement3D::elements.emplace_back(&renderer);
RenderingElement3D::BuildTLAS(cmd, 0);
RenderingElement3D::BuildTLAS(cmd, 1);
RenderingElement3D::BuildTLAS(cmd, 2);
window.FinishInit();
// Write descriptors: TLAS at bufSlots[0], output image at imgSlots[0],
// albedo (combined image+sampler) at imgSlots[1]. Per-frame replicated.
VkDeviceAddressRangeKHR tlasRanges[Window::numFrames];
VkImageDescriptorInfoEXT outImgInfos[Window::numFrames];
VkDescriptorImageInfo albedoInfo {
.sampler = sampler.textureSampler,
.imageView = albedo.imageView,
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
};
for (std::uint32_t f = 0; f < Window::numFrames; ++f) {
tlasRanges[f] = { .address = RenderingElement3D::tlases[f].address };
outImgInfos[f] = {
.sType = VK_STRUCTURE_TYPE_IMAGE_DESCRIPTOR_INFO_EXT,
.pView = &window.imageViews[f],
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
}
std::vector<VkResourceDescriptorInfoEXT> resources;
std::vector<VkHostAddressRangeEXT> destinations;
resources.reserve(Window::numFrames * 3);
destinations.reserve(Window::numFrames * 3);
for (std::uint32_t f = 0; f < Window::numFrames; ++f) {
resources.push_back({
.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT,
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.data = { .pAddressRange = &tlasRanges[f] },
});
destinations.push_back({
.address = descriptorHeap.resourceHeap[f].value
+ descriptorHeap.BufferByteOffset(bufSlots.firstElement),
.size = Device::descriptorHeapProperties.bufferDescriptorSize,
});
resources.push_back({
.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT,
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.data = { .pImage = &outImgInfos[f] },
});
destinations.push_back({
.address = descriptorHeap.resourceHeap[f].value
+ descriptorHeap.ImageByteOffset(imgSlots.firstElement),
.size = Device::descriptorHeapProperties.imageDescriptorSize,
});
resources.push_back({
.sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT,
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.data = { .pCombinedImageSampler = &albedoInfo },
});
destinations.push_back({
.address = descriptorHeap.resourceHeap[f].value
+ descriptorHeap.ImageByteOffset(albedoHeapSlot),
.size = Device::descriptorHeapProperties.imageDescriptorSize,
});
}
Device::vkWriteResourceDescriptorsEXT(Device::device,
static_cast<std::uint32_t>(resources.size()),
resources.data(), destinations.data());
for (std::uint32_t f = 0; f < Window::numFrames; ++f) {
descriptorHeap.resourceHeap[f].FlushDevice();
}
window.descriptorHeap = &descriptorHeap;
RTPass rtPass(&pipeline);
window.passes.push_back(&rtPass);
window.Render();
window.StartSync();
return 0;
}
#else
int main() {
// ── Read scene manifest (produced by project.cpp's ImportSponzaBundle).
//
// line 1: albedoCount
// line 2: meshCount
// line 3..: per-mesh albedoIdx (-1 means "no albedo")
const fs::path manifestPath = "scene.txt";
if (!fs::exists(manifestPath)) {
std::println(std::cerr,
"[Sponza] missing scene.txt — the build should have produced "
"it (see examples/Sponza/project.cpp). If you ran the binary "
"from outside its bin dir, cd in first.");
std::abort();
}
std::ifstream manifest(manifestPath);
std::uint32_t albedoCount = 0, meshCount = 0;
manifest >> albedoCount >> meshCount;
std::vector<std::int32_t> meshAlbedo(meshCount);
for (std::uint32_t i = 0; i < meshCount; ++i) manifest >> meshAlbedo[i];
std::println("[Sponza] scene: {} albedos, {} meshes", albedoCount, meshCount);
Device::Initialize();
static Window window(1280, 720, "Sponza");
auto cmd = window.StartInit();
DescriptorHeapWebGPU heap;
heap.Initialize(/*images*/ 2, /*buffers*/ 2, /*samplers*/ 2);
std::array<WebGPUShader, 3> shaders {{
WebGPUShader(fs::path("raygen.wgsl"), "raygen_main", WebGPURTStage::Raygen),
WebGPUShader(fs::path("miss.wgsl"), "miss_main", WebGPURTStage::Miss),
WebGPUShader(fs::path("closesthit.wgsl"), "closesthit_main", WebGPURTStage::ClosestHit),
}};
ShaderBindingTableWebGPU sbt;
sbt.Init(shaders);
std::array<RTShaderGroup, 1> raygenGroups {{
{ .type = RTShaderGroupType::General, .generalShader = 0 },
}};
std::array<RTShaderGroup, 1> missGroups {{
{ .type = RTShaderGroupType::General, .generalShader = 1 },
}};
std::array<RTShaderGroup, 1> hitGroups {{
{ .type = RTShaderGroupType::TrianglesHitGroup, .closestHitShader = 2 },
}};
// Three user bindings at @group(2):
// binding 0 — albedo texture_2d_array (one layer per material)
// binding 1 — sampler (linear clamp)
// binding 2 — Camera storage buffer (host-driven, updated per frame)
std::array<UICustomBinding, 3> bindings {{
{ .group = 2, .binding = 0, .kind = UICustomBindingKind::SampledTextureArray, ._pad = 0, .pushOffset = 0 },
{ .group = 2, .binding = 1, .kind = UICustomBindingKind::Sampler, ._pad = 0, .pushOffset = 0 },
{ .group = 2, .binding = 2, .kind = UICustomBindingKind::Buffer, ._pad = 0, .pushOffset = 0 },
}};
PipelineRTWebGPU pipeline;
pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, sbt, bindings);
// ── Albedo texture array — one rgba8unorm layer per material. ──────
//
// Probe layer 0 for the canonical layer dimensions; project.cpp
// already resized every albedo to the same square so any tex_N.ctex
// would do, layer 0 is just the first one we have.
Image2DArray<RGBA8> albedoArray;
{
CompressedTextureAsset probe = LoadCompressedTexture("tex_0.ctex");
albedoArray.Create(probe.sizeX, probe.sizeY, static_cast<std::uint16_t>(albedoCount));
albedoArray.UpdateLayer(0, probe);
for (std::uint32_t i = 1; i < albedoCount; ++i) {
CompressedTextureAsset tex = LoadCompressedTexture(std::format("tex_{}.ctex", i));
albedoArray.UpdateLayer(static_cast<std::uint16_t>(i), tex);
}
}
auto albedoArraySlot = albedoArray.AllocateSlot(heap);
SamplerSlot samplerSlot = AllocateLinearClampSampler(heap);
// Camera storage buffer — host writes (origin, right, up, forward,
// aspect, tanHalf) every frame from the input-driven free camera
// below. Layout matches the WGSL Camera struct in raygen.wgsl
// (vec3-aligned, std430). 64 bytes total.
struct CameraGPU {
float origin[3]; float pad0;
float right[3]; float tanHalf;
float up[3]; float aspect;
float forward[3]; float pad1;
};
static_assert(sizeof(CameraGPU) == 64);
WebGPUBuffer<CameraGPU, true> cameraBuf;
cameraBuf.Create(1);
// Handle array fed to RTPass — order matches the bindings declaration.
static std::array<std::uint32_t, 3> userHandles {
heap.imageTable [albedoArraySlot.firstElement],
heap.samplerTable[samplerSlot.firstElement],
cameraBuf.handle,
};
// ── Meshes + scene instances ───────────────────────────────────────
//
// One Mesh + one RenderingElement3D per material group from
// scene.txt. Meshes whose albedoIdx is -1 (the .obj's `usemtl` named
// something without a map_Kd in .mtl) get dropped — they're rare in
// Sponza and we'd have nothing to sample for them anyway.
//
// Vector capacity is reserved up-front: RenderingElement3D::Add
// takes a pointer that's stored in the static elements[] vector, so
// any later vector reallocation would dangle those pointers.
static std::vector<Mesh> meshes;
static std::vector<RenderingElement3D> renderers;
meshes.reserve(meshCount);
renderers.reserve(meshCount);
for (std::uint32_t i = 0; i < meshCount; ++i) {
if (meshAlbedo[i] < 0) continue;
CompressedMeshAsset loaded = LoadCompressedMesh(std::format("mesh_{}.cmesh", i));
meshes.emplace_back();
meshes.back().Build(loaded, cmd);
renderers.emplace_back();
RenderingElement3D& r = renderers.back();
auto& tx = r.instance.transform.matrix;
tx[0][0] = 1; tx[0][1] = 0; tx[0][2] = 0; tx[0][3] = 0;
tx[1][0] = 0; tx[1][1] = 1; tx[1][2] = 0; tx[1][3] = 0;
tx[2][0] = 0; tx[2][1] = 0; tx[2][2] = 1; tx[2][3] = 0;
// 24-bit instanceCustomIndex carries the albedo array layer that
// closesthit.wgsl reads as `hit.customIndex`.
r.instance.instanceCustomIndex = static_cast<std::uint32_t>(meshAlbedo[i]);
r.instance.mask = 0xFF;
r.instance.instanceShaderBindingTableRecordOffset = 0;
r.instance.flags = kRTGeometryInstanceForceOpaque;
r.instance.accelerationStructureReference = meshes.back().blasAddr;
RenderingElement3D::Add(&r);
}
RenderingElement3D::BuildTLAS(cmd, 0);
window.descriptorHeap = &heap;
window.FinishInit();
RTPass rtPass(&pipeline);
rtPass.handlesPtr = userHandles.data();
rtPass.handlesCount = static_cast<std::uint32_t>(userHandles.size());
window.passes.push_back(&rtPass);
// ── Free camera: WASD + mouse-delta look ───────────────────────────
//
// Initial pose puts the camera near one end of the atrium at eye
// height, looking +X down the long axis (bbox: X[-1921..1800],
// Y[-126..1429], Z[-1183..1105]). The user can fine-tune from there.
struct CamState {
Vector<float, 3, 4> position{ -1500.0f, 200.0f, 0.0f };
float yaw = 0.0f; // radians, around world +Y
float pitch = 0.0f; // radians, +pitch looks up
} cam;
Input::Map inputMap;
Input::Action& moveAct = inputMap.AddAction("Move", Input::ActionType::Vector2);
Input::Action& lookAct = inputMap.AddAction("Look", Input::ActionType::Vector2);
moveAct.bindings = {
Input::WASDBind{
Key(CrafterKeys::W), Key(CrafterKeys::S),
Key(CrafterKeys::A), Key(CrafterKeys::D),
},
};
lookAct.bindings = {
Input::MouseDeltaBind{ 1.0f },
};
inputMap.Attach(window);
constexpr float kMoveSpeed = 1200.0f; // Sponza units / second (room is ~3700 wide)
constexpr float kLookSens = 0.05f; // radians per mouse pixel
constexpr float kDt = 1.0f / 60.0f;
EventListener<void> camTick(&window.onBeforeUpdate, [&]() {
inputMap.Tick();
cam.yaw += lookAct.vector2.x * kLookSens;
cam.pitch -= lookAct.vector2.y * kLookSens;
// Keep pitch just shy of straight up/down so the basis vectors
// don't collapse (cross(forward, world_up) would go zero).
cam.pitch = std::clamp(cam.pitch, -1.55f, 1.55f);
const float cp = std::cos(cam.pitch), sp = std::sin(cam.pitch);
const float cy = std::cos(cam.yaw), sy = std::sin(cam.yaw);
Vector<float, 3, 4> forward { cp * cy, sp, cp * sy };
Vector<float, 3, 4> worldUp { 0.0f, 1.0f, 0.0f };
Vector<float, 3, 4> right { forward.y * worldUp.z - forward.z * worldUp.y,
forward.z * worldUp.x - forward.x * worldUp.z,
forward.x * worldUp.y - forward.y * worldUp.x };
const float rLen = std::sqrt(right.x*right.x + right.y*right.y + right.z*right.z);
right.x /= rLen; right.y /= rLen; right.z /= rLen;
Vector<float, 3, 4> up { right.y * forward.z - right.z * forward.y,
right.z * forward.x - right.x * forward.z,
right.x * forward.y - right.y * forward.x };
const float dx = moveAct.vector2.x * kMoveSpeed * kDt;
const float dy = moveAct.vector2.y * kMoveSpeed * kDt;
cam.position.x += right.x * dx + forward.x * dy;
cam.position.y += right.y * dx + forward.y * dy;
cam.position.z += right.z * dx + forward.z * dy;
CameraGPU& g = cameraBuf.value[0];
g.origin[0] = cam.position.x; g.origin[1] = cam.position.y; g.origin[2] = cam.position.z; g.pad0 = 0.0f;
g.right[0] = right.x; g.right[1] = right.y; g.right[2] = right.z;
g.up[0] = up.x; g.up[1] = up.y; g.up[2] = up.z;
g.forward[0] = forward.x; g.forward[1] = forward.y; g.forward[2] = forward.z;
g.aspect = float(window.width) / float(window.height);
g.tanHalf = std::tan(70.0f * 3.14159265f / 360.0f);
g.pad1 = 0.0f;
cameraBuf.FlushDevice();
});
window.Render();
window.StartUpdate();
window.StartSync();
return 0;
}
#endif

11
examples/Sponza/miss.glsl Normal file
View file

@ -0,0 +1,11 @@
#version 460
#extension GL_EXT_ray_tracing : enable
layout(location = 0) rayPayloadInEXT vec3 hitValue;
void main() {
// Soft sky gradient based on ray direction Y. The actual ray dir
// isn't accessible without an extra payload field; use a flat warm
// tone that matches Sponza's interior lighting.
hitValue = vec3(0.10, 0.08, 0.06);
}

16
examples/Sponza/miss.wgsl Normal file
View file

@ -0,0 +1,16 @@
fn miss_main(ray: RayDesc, payload: ptr<function, Payload>) {
if ((*payload).shadowRay == 1u) {
// Shadow ray escaped to infinity the sun is visible from the
// origin, so the surface there should pick up full direct light.
// raygen reads color.x as the visibility coefficient.
(*payload).color = vec3<f32>(1.0);
return;
}
// Primary miss: cheap two-stop sky gradient. (*payload).hit stays 0
// so raygen knows to skip the lighting path and just use this color.
let t = clamp(ray.direction.y * 0.5 + 0.5, 0.0, 1.0);
let sky = vec3<f32>(0.45, 0.65, 0.95);
let zenith = vec3<f32>(0.95, 0.85, 0.65);
(*payload).color = mix(sky, zenith, t);
}

View file

@ -0,0 +1,92 @@
import std;
import Crafter.Build;
namespace fs = std::filesystem;
using namespace Crafter;
// Sponza geometry + albedo: CC BY 3.0, Frank Meinl (Crytek), packaged by
// Jimmie Bergmann (https://github.com/jimmiebergmann/Sponza) and Morgan
// McGuire (https://casual-effects.com/data). The full asset bundle is
// ~280 MB — too large to live in this repo. GitFetch lands it in the
// per-user crafter-build cache on first build and reuses thereafter.
constexpr std::string_view kSponzaGitUrl = "https://github.com/jimmiebergmann/Sponza.git";
constexpr std::string_view kSponzaCommitSHA = "222338979d32f4f4818466291bdbc29f192b86ba";
// Every albedo is normalized to this size so they can live as layers of
// one texture_2d_array on the GPU (WebGPU array textures require
// identical layer dimensions). 1024 matches the majority of Sponza's
// textures; the few outliers (256×1024 chain, 512² thorn, 2048² curtains)
// get bilinear-resized via stb_image_resize2.
constexpr std::uint16_t kAlbedoSize = 1024u;
extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> args) {
bool isWasm = false;
for (std::string_view a : args) {
if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) {
isWasm = true;
break;
}
}
std::vector<std::string> graphicsArgs(args.begin(), args.end());
Configuration* graphics = LocalProject({
.projectFile = "../../project.cpp",
.args = graphicsArgs,
});
Configuration cfg;
cfg.path = "./";
cfg.name = "Sponza";
cfg.outputName = "Sponza";
cfg.type = ConfigurationType::Executable;
if (isWasm) {
cfg.target = "wasm32-wasip1";
cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_DOM", ""});
cfg.compileFlags.push_back("-msimd128");
}
ApplyStandardArgs(cfg, args);
cfg.dependencies = { graphics };
std::array<fs::path, 0> ifaces = {};
std::array<fs::path, 1> impls = { "main" };
cfg.GetInterfacesAndImplementations(ifaces, impls);
// Fetch Sponza once into the shared crafter-build cache, then process
// it into a per-material bundle under build/sponza-bundle-<hash>/.
// Hashing on (sha, albedoSize) so changing either invalidates the
// bundle without touching the rest of the example's build tree.
fs::path sponzaRoot = GitFetch({
.url = std::string(kSponzaGitUrl),
.commit = std::string(kSponzaCommitSHA),
});
std::string bundleKey = std::format("{}|{}", kSponzaCommitSHA, kAlbedoSize);
auto bundleHash = std::hash<std::string>{}(bundleKey);
fs::path bundleDir = fs::path("build") / std::format("sponza-bundle-{:016x}", bundleHash);
if (auto err = BuildOBJBundle(
sponzaRoot / "sponza.obj",
sponzaRoot / "sponza.mtl",
bundleDir,
kAlbedoSize); !err.empty()) {
std::println(std::cerr, "Sponza bundle error: {}", err);
std::exit(1);
}
// Forward every produced file (.cmesh, .ctex, scene.txt) as a
// passthrough — they're already compressed by Crafter.Asset, no
// further compression needed. cfg.files copies them flat into
// the executable's bin dir.
for (const auto& entry : fs::directory_iterator(bundleDir)) {
if (entry.is_regular_file()) cfg.files.push_back(entry.path());
}
if (isWasm) {
cfg.files.emplace_back(fs::path("raygen.wgsl"));
cfg.files.emplace_back(fs::path("closesthit.wgsl"));
cfg.files.emplace_back(fs::path("miss.wgsl"));
EnableWasiBrowserRuntime(cfg);
} else {
cfg.shaders.emplace_back(fs::path("raygen.glsl"), std::string("main"), ShaderType::RayGen);
cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit);
cfg.shaders.emplace_back(fs::path("miss.glsl"), std::string("main"), ShaderType::Miss);
}
return cfg;
}

View file

@ -0,0 +1,52 @@
#version 460
#extension GL_EXT_ray_tracing : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
#extension GL_EXT_descriptor_heap : enable
#extension GL_EXT_nonuniform_qualifier : enable
// Specialization constant set from descriptorHeap.bufferStartElement —
// shared with closesthit.glsl. The TLAS lives at descriptor_heap slot
// `bufferStart` (it's an SSBO-typed entry), the per-frame output image
// at heap slot 0.
layout(constant_id = 0) const uint16_t bufferStart = 0us;
layout(descriptor_heap) uniform accelerationStructureEXT topLevelAS[];
layout(descriptor_heap) uniform writeonly image2D image[];
layout(location = 0) rayPayloadEXT vec3 hitValue;
void main() {
uvec2 pixel = gl_LaunchIDEXT.xy;
uvec2 resolution = gl_LaunchSizeEXT.xy;
vec2 uv = (vec2(pixel) + 0.5) / vec2(resolution);
vec2 ndc = uv * 2.0 - 1.0;
// Camera positioned to look down the Sponza atrium axis. Sponza-OBJ
// from McGuire's archive is roughly 30 units wide × 13 tall × 18 deep,
// axis-aligned, with the floor near y=0 and the atrium centered on
// origin. -X faces the long end, so we sit inside looking +X.
vec3 origin = vec3(-10.0, 5.0, 0.0);
float aspect = float(resolution.x) / float(resolution.y);
float fov = radians(70.0);
float tanHalf = tan(fov * 0.5);
vec3 direction = normalize(vec3(
ndc.x * aspect * tanHalf,
-ndc.y * tanHalf,
1.0));
// Rotate +Z forward → +X forward (90° about Y).
direction = vec3(direction.z, direction.y, -direction.x);
traceRayEXT(
topLevelAS[bufferStart],
gl_RayFlagsNoneEXT,
0xff,
0, 0, 0,
origin,
0.001,
direction,
10000.0,
0);
imageStore(image[0], ivec2(pixel), vec4(hitValue, 1.0));
}

109
examples/Sponza/raygen.wgsl Normal file
View file

@ -0,0 +1,109 @@
// WebGPU raygen. Camera state comes from the host every frame via a
// storage buffer bound at @group(2) @binding(2); main.cpp drives that
// from WASD + mouse-delta through Crafter::Input.
//
// The shading + shadow trace all happens here because WGSL forbids
// recursive function call graphs closesthit_main can't call traceRay
// (that would loop closesthit traceRay runClosestHit closesthit).
// Raygen is the entry point and not called by anyone, so it can call
// traceRay twice (once primary, once shadow) without forming a cycle.
struct Camera {
origin: vec3<f32>,
pad0: f32,
right: vec3<f32>,
tanHalf: f32,
up: vec3<f32>,
aspect: f32,
forward: vec3<f32>,
pad1: f32,
};
@group(2) @binding(2) var<storage, read> camera : Camera;
// Sun coming through Sponza's open roof. Y is up; this points "down and
// slightly along +X" so the light grazes the colonnades on one side.
const SUN_DIR_TO_LIGHT: vec3<f32> = vec3<f32>(-0.35, 1.00, -0.20);
const SUN_COLOR: vec3<f32> = vec3<f32>( 1.10, 1.00, 0.85);
const AMBIENT_COLOR: vec3<f32> = vec3<f32>( 0.18, 0.20, 0.28);
fn raygen_main(gid: vec3<u32>) {
if (gid.x >= hdr.surfaceW || gid.y >= hdr.surfaceH) { return; }
let pixel = vec2<f32>(f32(gid.x), f32(gid.y));
let resolution = vec2<f32>(f32(hdr.surfaceW), f32(hdr.surfaceH));
let uv = (pixel + vec2<f32>(0.5)) / resolution;
let ndc = uv * 2.0 - vec2<f32>(1.0);
// Pinhole camera reconstructed from the host basis. ndc.x runs left-
// to-right across the screen +right; ndc.y is top-down so we
// negate before applying +up.
let direction = normalize(
camera.right * (ndc.x * camera.aspect * camera.tanHalf) +
camera.up * (-ndc.y * camera.tanHalf) +
camera.forward);
// Primary ray
var payload: Payload;
payload.color = vec3<f32>(0.0);
payload.shadowRay = 0u;
payload.hit = 0u;
traceRay(
0u, 0u, 0xFFu,
0u, 0u, 0u,
camera.origin, 0.001,
direction, 10000.0,
&payload);
var finalColor: vec3<f32>;
if (payload.hit == 1u) {
// Closesthit filled albedo/worldPos/worldNormal. Two-sided
// shading: flip the normal toward the camera if we hit the back
// face Sponza's curtains in particular have inconsistent
// winding, and without this half the surface would go black.
let albedo = payload.color;
let nFacing = select(-payload.worldNormal,
payload.worldNormal,
dot(payload.worldNormal, direction) < 0.0);
let lightDir = normalize(SUN_DIR_TO_LIGHT);
let nDotL = max(0.0, dot(nFacing, lightDir));
// Shadow ray
// Only worth tracing if the surface faces the sun at all.
var visibility = 0.0;
if (nDotL > 0.0) {
// Normal-offset bias on Sponza's units (~3700 wide atrium)
// is hefty; 0.5 keeps the shadow ray clear of the originating
// triangle without producing visible "floating" shadows.
let shadowOrigin = payload.worldPos + nFacing * 0.5;
var shadowPayload: Payload;
shadowPayload.color = vec3<f32>(0.0); // default: blocked
shadowPayload.shadowRay = 1u;
shadowPayload.hit = 0u;
traceRay(
0u,
RT_FLAG_SKIP_CLOSEST_HIT | RT_FLAG_TERMINATE_ON_FIRST_HIT,
0xFFu,
0u, 0u, 0u,
shadowOrigin, 0.001,
lightDir, 10000.0,
&shadowPayload);
visibility = shadowPayload.color.x;
}
let lit = AMBIENT_COLOR + SUN_COLOR * (nDotL * visibility);
finalColor = albedo * lit;
} else {
// Sky color was filled by miss_main.
finalColor = payload.color;
}
// Reinhard tonemap + gamma 2.2 so sun-lit albedos don't clip and
// shadow detail stays readable.
let mapped = finalColor / (finalColor + vec3<f32>(1.0));
let gamma = pow(mapped, vec3<f32>(1.0 / 2.2));
textureStore(outImage,
vec2<i32>(i32(gid.x), i32(gid.y)),
vec4<f32>(gamma, 1.0));
}

View file

@ -19,6 +19,7 @@ module Crafter.Graphics:Mesh_implWebGPU;
import :Mesh; import :Mesh;
import :WebGPU; import :WebGPU;
import Crafter.Asset;
import Crafter.Math; import Crafter.Math;
import std; import std;
@ -215,26 +216,59 @@ namespace {
}; };
} }
namespace {
// Shared between the positions-only and the compressed-asset Build paths.
// attribsBytes is empty for positions-only meshes; the JS bridge skips
// the attribs-heap append in that case.
void BuildBVHAndRegister(Mesh& mesh,
std::span<const Vector<float, 3, 3>> vertices,
std::span<const std::uint32_t> indices,
std::span<const std::byte> attribsBytes) {
mesh.triangleCount = static_cast<std::uint32_t>(indices.size()) / 3;
Builder builder;
builder.Build(vertices, indices);
std::vector<std::uint32_t> primRemap(mesh.triangleCount);
for (std::uint32_t i = 0; i < mesh.triangleCount; ++i) {
primRemap[i] = builder.prims[i].triIndex;
}
const BVHNode& root = builder.nodes[0];
mesh.blasAddr = WebGPU::wgpuRegisterMeshBLAS(
root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
vertices.data(), static_cast<std::int32_t>(vertices.size()),
indices.data(), static_cast<std::int32_t>(indices.size()),
builder.nodes.data(), static_cast<std::int32_t>(builder.nodes.size()),
primRemap.data(), static_cast<std::int32_t>(primRemap.size()),
attribsBytes.data(), static_cast<std::int32_t>(attribsBytes.size()));
}
}
void Mesh::Build(std::span<Vector<float, 3, 3>> vertices, void Mesh::Build(std::span<Vector<float, 3, 3>> vertices,
std::span<std::uint32_t> indices, std::span<std::uint32_t> indices,
WebGPUCommandEncoderRef /*cmd*/) { WebGPUCommandEncoderRef /*cmd*/) {
triangleCount = static_cast<std::uint32_t>(indices.size()) / 3; BuildBVHAndRegister(*this, vertices, indices, {});
}
Builder builder; void Mesh::Build(const CompressedMeshAsset& asset,
builder.Build(vertices, indices); WebGPUCommandEncoderRef /*cmd*/) {
std::vector<Vector<float, 3, 3>> vertices(asset.vertexCount);
std::vector<std::uint32_t> indices(asset.indexCount);
std::vector<std::byte> dataBytes(
static_cast<std::size_t>(asset.dataCount) * asset.dataStride);
std::vector<std::uint32_t> primRemap(triangleCount); // CompressedBlob always carries 3 regions for MeshAsset (the data region
for (std::uint32_t i = 0; i < triangleCount; ++i) { // can have decompressedSize=0). DecompressCPU validates output sizes
primRemap[i] = builder.prims[i].triIndex; // against region sizes, so the empty-data path needs the empty span.
} std::array<std::span<std::byte>, 3> outputs = {
std::as_writable_bytes(std::span(vertices)),
std::as_writable_bytes(std::span(indices)),
std::span<std::byte>(dataBytes),
};
Compression::DecompressCPU(asset.blob,
std::span(outputs).first(asset.blob.regions.size()));
const BVHNode& root = builder.nodes[0]; BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes));
std::uint32_t h = WebGPU::wgpuRegisterMeshBLAS(
root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
vertices.data(), static_cast<std::int32_t>(vertices.size()),
indices.data(), static_cast<std::int32_t>(indices.size()),
builder.nodes.data(), static_cast<std::int32_t>(builder.nodes.size()),
primRemap.data(), static_cast<std::int32_t>(primRemap.size()));
blasAddr = h;
} }

View file

@ -22,6 +22,7 @@ module Crafter.Graphics:PipelineRTWebGPU_impl;
import :PipelineRTWebGPU; import :PipelineRTWebGPU;
import :ShaderBindingTableWebGPU; import :ShaderBindingTableWebGPU;
import :WebGPUComputeShader;
import :RT; import :RT;
import :WebGPU; import :WebGPU;
import std; import std;
@ -65,7 +66,9 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/,
std::span<const RTShaderGroup> raygenGroups, std::span<const RTShaderGroup> raygenGroups,
std::span<const RTShaderGroup> missGroups, std::span<const RTShaderGroup> missGroups,
std::span<const RTShaderGroup> hitGroups, std::span<const RTShaderGroup> hitGroups,
const ShaderBindingTableWebGPU& sbt) { const ShaderBindingTableWebGPU& sbt,
std::span<const UICustomBinding> bindings) {
userBindings.assign(bindings.begin(), bindings.end());
std::string wgsl; std::string wgsl;
wgsl.reserve(8 * 1024); wgsl.reserve(8 * 1024);
@ -183,5 +186,7 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/,
pipelineHandle = WebGPU::wgpuLoadRTPipeline( pipelineHandle = WebGPU::wgpuLoadRTPipeline(
wgsl.data(), wgsl.data(),
static_cast<std::int32_t>(wgsl.size())); static_cast<std::int32_t>(wgsl.size()),
userBindings.empty() ? nullptr : userBindings.data(),
static_cast<std::int32_t>(userBindings.size()));
} }

View file

@ -181,5 +181,15 @@ export namespace Crafter {
} }
return *this; return *this;
} }
// Convenience: create the "standard" linear-filter clamp-to-edge sampler,
// allocate a slot for it, and return the slot. The wgpu* bridge call is
// intentionally kept inside the library — example code shouldn't need to
// reach into Crafter::WebGPU directly.
inline SamplerSlot AllocateLinearClampSampler(DescriptorHeapWebGPU& heap) {
DescriptorRange r = heap.AllocateSamplerSlots(1);
heap.samplerTable[r.firstElement] = WebGPU::wgpuCreateLinearClampSampler();
return SamplerSlot(&heap, r.firstElement);
}
} }
#endif // CRAFTER_GRAPHICS_WINDOW_DOM #endif // CRAFTER_GRAPHICS_WINDOW_DOM

View file

@ -0,0 +1,166 @@
/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
// Image2D<T> — portable 2D image type whose API surface is intentionally
// backend-specific via #ifdef. On Vulkan it aliases the existing
// ImageVulkan<T> (full VkFormat / usage / layout control). On WebGPU it's
// a thin handle around an rgba8unorm GPUTexture; sizes are u16 and the
// only update path is from a CompressedTextureAsset.
//
// The "no shared no-op signatures" principle is deliberate: callers do
// the same #ifdef the library does, and write the backend-specific
// invocation. The unified type name Image2D<T> is the only thing
// portable between the two — that's the whole point.
export module Crafter.Graphics:Image2D;
#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
import :ImageVulkan;
export namespace Crafter {
// Vulkan target: Image2D is just the existing ImageVulkan. New name,
// same shape — keeps existing ImageVulkan callers (e.g. examples/
// Decompression) working without a churn-rename.
template <typename PixelType>
using Image2D = ImageVulkan<PixelType>;
}
#endif // !CRAFTER_GRAPHICS_WINDOW_DOM
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
import std;
import Crafter.Asset;
import :DescriptorHeapWebGPU;
import :WebGPU;
export namespace Crafter {
template <typename PixelType>
class Image2D {
public:
WebGPUTextureRef handle = 0;
std::uint16_t width = 0;
std::uint16_t height = 0;
void Create(std::uint16_t w, std::uint16_t h) {
width = w;
height = h;
handle = WebGPU::wgpuCreateImage2D(w, h);
}
// CPU-decompress the .ctex blob (no GPU decompression on WebGPU)
// and upload via wgpuWriteImage2D. The intermediate `pixels` vector
// lives only for the duration of this call — the underlying
// queue.writeTexture in JS makes its own copy.
void Update(const CompressedTextureAsset& asset) {
if (asset.pixelStride != sizeof(PixelType)) {
std::println(std::cerr,
"Image2D::Update: pixel stride mismatch (got {}, expected {})",
asset.pixelStride, sizeof(PixelType));
std::abort();
}
std::vector<PixelType> pixels(
static_cast<std::size_t>(asset.sizeX) * asset.sizeY);
std::array<std::span<std::byte>, 1> outputs = {
std::as_writable_bytes(std::span(pixels)),
};
Compression::DecompressCPU(asset.blob, outputs);
WebGPU::wgpuWriteImage2D(
handle,
pixels.data(),
static_cast<std::int32_t>(pixels.size() * sizeof(PixelType)),
asset.sizeX, asset.sizeY);
}
// Register the texture in a descriptor heap slot so a custom RT
// pipeline can bind it via UICustomBinding::SampledTexture.
ImageSlot AllocateSlot(DescriptorHeapWebGPU& heap) {
DescriptorRange r = heap.AllocateImageSlots(1);
heap.imageTable[r.firstElement] = handle;
return ImageSlot(&heap, r.firstElement);
}
void Destroy() {
if (handle != 0) {
WebGPU::wgpuDestroyTexture(handle);
handle = 0;
}
}
};
// 2D texture array — `layers` × (w × h) rgba8unorm. Each layer is
// populated independently from a CompressedTextureAsset whose dims
// must match the array's (w × h). Layer 0 is sampled at array
// index 0 in WGSL; bind through UICustomBindingKind::SampledTextureArray.
template <typename PixelType>
class Image2DArray {
public:
WebGPUTextureRef handle = 0;
std::uint16_t width = 0;
std::uint16_t height = 0;
std::uint16_t layers = 0;
void Create(std::uint16_t w, std::uint16_t h, std::uint16_t layerCount) {
width = w;
height = h;
layers = layerCount;
handle = WebGPU::wgpuCreateImage2DArray(w, h, layerCount);
}
// Decompress `tex` and upload to `layer`. The asset's dims must
// match the array's (w × h) — resize beforehand on the host with
// TextureAsset<RGBA8>::Resize() if they don't.
void UpdateLayer(std::uint16_t layer, const CompressedTextureAsset& tex) {
if (tex.pixelStride != sizeof(PixelType)) {
std::println(std::cerr,
"Image2DArray::UpdateLayer: pixel stride mismatch (got {}, expected {})",
tex.pixelStride, sizeof(PixelType));
std::abort();
}
if (tex.sizeX != width || tex.sizeY != height) {
std::println(std::cerr,
"Image2DArray::UpdateLayer: layer {} dims {}x{} don't match array dims {}x{}",
layer, tex.sizeX, tex.sizeY, width, height);
std::abort();
}
std::vector<PixelType> pixels(static_cast<std::size_t>(width) * height);
std::array<std::span<std::byte>, 1> outputs = {
std::as_writable_bytes(std::span(pixels)),
};
Compression::DecompressCPU(tex.blob, outputs);
WebGPU::wgpuWriteImage2DLayer(
handle, layer,
pixels.data(),
static_cast<std::int32_t>(pixels.size() * sizeof(PixelType)),
width, height);
}
ImageSlot AllocateSlot(DescriptorHeapWebGPU& heap) {
DescriptorRange r = heap.AllocateImageSlots(1);
heap.imageTable[r.firstElement] = handle;
return ImageSlot(&heap, r.firstElement);
}
void Destroy() {
if (handle != 0) {
WebGPU::wgpuDestroyTexture(handle);
handle = 0;
}
}
};
}
#endif // CRAFTER_GRAPHICS_WINDOW_DOM

View file

@ -64,6 +64,7 @@ export namespace Crafter {
#ifdef CRAFTER_GRAPHICS_WINDOW_DOM #ifdef CRAFTER_GRAPHICS_WINDOW_DOM
import std; import std;
import Crafter.Math; import Crafter.Math;
import Crafter.Asset;
import :WebGPU; import :WebGPU;
export namespace Crafter { export namespace Crafter {
@ -108,6 +109,15 @@ export namespace Crafter {
void Build(std::span<Crafter::Vector<float, 3, 3>> vertices, void Build(std::span<Crafter::Vector<float, 3, 3>> vertices,
std::span<std::uint32_t> indices, std::span<std::uint32_t> indices,
WebGPUCommandEncoderRef cmd = 0); WebGPUCommandEncoderRef cmd = 0);
// CPU-decompress the .cmesh blob (no VK_EXT_memory_decompression
// equivalent in WebGPU) and forward to the positions+indices path,
// plus push the optional `data` region into the per-vertex attribs
// heap so closest-hit shaders can sample UVs / normals / tangents.
// The data layout is example-defined — the heap is exposed in WGSL
// as `vertexAttribs : array<u32>` with a per-mesh u32-word offset.
void Build(const ::Crafter::CompressedMeshAsset& asset,
WebGPUCommandEncoderRef cmd = 0);
}; };
} }
#endif // CRAFTER_GRAPHICS_WINDOW_DOM #endif // CRAFTER_GRAPHICS_WINDOW_DOM

View file

@ -26,22 +26,31 @@ import std;
import :RT; import :RT;
import :WebGPU; import :WebGPU;
import :ShaderBindingTableWebGPU; import :ShaderBindingTableWebGPU;
import :WebGPUComputeShader;
export namespace Crafter { export namespace Crafter {
class PipelineRTWebGPU { class PipelineRTWebGPU {
public: public:
std::uint32_t pipelineHandle = 0; std::uint32_t pipelineHandle = 0;
// Mirror of the bindings handed to Init. Kept for the example /
// RTPass to consult when packing the handles[] array at dispatch
// time (one resolved u32 handle per binding, in declaration order).
std::vector<UICustomBinding> userBindings;
// Build the megakernel pipeline. Groups carry indices into // Build the megakernel pipeline. Groups carry indices into
// `sbt.shaders`. The library generates one `case` per registered // `sbt.shaders`. The library generates one `case` per registered
// group: closest-hit groups dispatch to their closestHitShader's // group: closest-hit groups dispatch to their closestHitShader's
// entryFn, miss groups to their generalShader's entryFn, etc. // entryFn, miss groups to their generalShader's entryFn, etc.
// The `cmd` parameter is unused on WebGPU; kept for API symmetry. // The `cmd` parameter is unused on WebGPU; kept for API symmetry.
// `userBindings` declares extra @group(2)+ resources the user's
// closest-hit / miss / raygen WGSL touches (material SSBOs,
// albedo textures, samplers).
void Init(WebGPUCommandEncoderRef cmd, void Init(WebGPUCommandEncoderRef cmd,
std::span<const RTShaderGroup> raygenGroups, std::span<const RTShaderGroup> raygenGroups,
std::span<const RTShaderGroup> missGroups, std::span<const RTShaderGroup> missGroups,
std::span<const RTShaderGroup> hitGroups, std::span<const RTShaderGroup> hitGroups,
const ShaderBindingTableWebGPU& sbt); const ShaderBindingTableWebGPU& sbt,
std::span<const UICustomBinding> bindings = {});
PipelineRTWebGPU() = default; PipelineRTWebGPU() = default;
PipelineRTWebGPU(const PipelineRTWebGPU&) = delete; PipelineRTWebGPU(const PipelineRTWebGPU&) = delete;

View file

@ -66,6 +66,12 @@ export namespace Crafter {
// RTDispatchHeader. Null means "no extra data". // RTDispatchHeader. Null means "no extra data".
const void* pushPtr = nullptr; const void* pushPtr = nullptr;
std::uint32_t pushBytes = 0; std::uint32_t pushBytes = 0;
// Resolved WebGPU resource handles for each user binding the
// pipeline was loaded with, in declaration order. The example
// owns the storage (typically a small std::array of u32). Null /
// 0 means "no user bindings".
const void* handlesPtr = nullptr;
std::uint32_t handlesCount = 0;
RTPass(PipelineRTWebGPU* p) : pipeline(p) {} RTPass(PipelineRTWebGPU* p) : pipeline(p) {}
@ -80,7 +86,9 @@ export namespace Crafter {
tlas.buffer.handle, tlas.buffer.handle,
static_cast<std::int32_t>(tlas.builtInstanceCount), static_cast<std::int32_t>(tlas.builtInstanceCount),
static_cast<std::int32_t>(gx), static_cast<std::int32_t>(gx),
static_cast<std::int32_t>(gy)); static_cast<std::int32_t>(gy),
handlesPtr,
static_cast<std::int32_t>(handlesCount));
} }
}; };
} }

View file

@ -49,6 +49,27 @@ namespace Crafter::WebGPU {
__attribute__((import_module("env"), import_name("wgpuDestroyTexture"))) __attribute__((import_module("env"), import_name("wgpuDestroyTexture")))
extern "C" void wgpuDestroyTexture(std::uint32_t handle); extern "C" void wgpuDestroyTexture(std::uint32_t handle);
// General-purpose rgba8unorm 2D texture for material albedo etc.
// Separate from the atlas path because atlas uses r8unorm + sub-region
// writes; this one takes the whole image in one shot.
__attribute__((import_module("env"), import_name("wgpuCreateImage2D")))
extern "C" std::uint32_t wgpuCreateImage2D(std::int32_t w, std::int32_t h);
__attribute__((import_module("env"), import_name("wgpuWriteImage2D")))
extern "C" void wgpuWriteImage2D(std::uint32_t handle, const void* srcPtr,
std::int32_t byteSize,
std::int32_t w, std::int32_t h);
// 2D texture array — `layerCount` rgba8unorm layers of identical (w × h).
// Sampled via `texture_2d_array<f32>` in WGSL (UICustomBindingKind 3).
// Used by Image2DArray<RGBA8> to stack per-material albedos for one
// multi-material scene.
__attribute__((import_module("env"), import_name("wgpuCreateImage2DArray")))
extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h, std::int32_t layerCount);
__attribute__((import_module("env"), import_name("wgpuWriteImage2DLayer")))
extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer,
const void* srcPtr, std::int32_t byteSize,
std::int32_t w, std::int32_t h);
__attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler"))) __attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler")))
extern "C" std::uint32_t wgpuCreateLinearClampSampler(); extern "C" std::uint32_t wgpuCreateLinearClampSampler();
@ -96,6 +117,11 @@ namespace Crafter::WebGPU {
// stores in RTInstance::accelerationStructureReference; the WebGPU // stores in RTInstance::accelerationStructureReference; the WebGPU
// TLAS-build compute shader resolves it back to root AABB + heap // TLAS-build compute shader resolves it back to root AABB + heap
// offsets at dispatch time. Returns 0 on failure. // offsets at dispatch time. Returns 0 on failure.
// The optional `attribsPtr` / `attribsByteCount` carry per-vertex
// attribute payload (normals, UVs, etc. — layout is example-defined)
// that gets appended to a global attribs heap and exposed to RT
// closest-hit shaders as `vertexAttribs : array<u32>` at
// @group(1) @binding(7). Pass (nullptr, 0) for positions-only meshes.
__attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS"))) __attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS")))
extern "C" std::uint32_t wgpuRegisterMeshBLAS( extern "C" std::uint32_t wgpuRegisterMeshBLAS(
float minX, float minY, float minZ, float minX, float minY, float minZ,
@ -103,25 +129,34 @@ namespace Crafter::WebGPU {
const void* verticesPtr, std::int32_t vertexCount, const void* verticesPtr, std::int32_t vertexCount,
const void* indicesPtr, std::int32_t indexCount, const void* indicesPtr, std::int32_t indexCount,
const void* bvhNodesPtr, std::int32_t bvhNodeCount, const void* bvhNodesPtr, std::int32_t bvhNodeCount,
const void* primRemapPtr, std::int32_t primRemapCount); const void* primRemapPtr, std::int32_t primRemapCount,
const void* attribsPtr, std::int32_t attribsByteCount);
// RT pipeline build. The library composes WGSL by concatenating the // RT pipeline build. The library composes WGSL by concatenating the
// traversal library, generated hit-group switches, and the user- // traversal library, generated hit-group switches, and the user-
// supplied raygen / miss / closesthit / anyhit bodies. Returns an // supplied raygen / miss / closesthit / anyhit bodies. `bindings` is
// opaque pipeline handle. // UICustomBinding-shaped (8 bytes each) declaring extra @group(2)+
// resources the user's closest-hit / miss / raygen WGSL references.
// Pass (nullptr, 0) for a pipeline with no user-declared bindings.
// Returns an opaque pipeline handle.
__attribute__((import_module("env"), import_name("wgpuLoadRTPipeline"))) __attribute__((import_module("env"), import_name("wgpuLoadRTPipeline")))
extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen); extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen,
const void* bindingsPtr, std::int32_t bindingsCount);
// Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched // Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched
// over a (gx, gy) tile grid; the library writes the push data (camera, // over a (gx, gy) tile grid; the library writes the push data (camera,
// payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS // payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS
// + global mesh heap, and runs one workgroup per 8x8 screen tile. // + global mesh heap, and runs one workgroup per 8x8 screen tile.
// `handles[]` carries resolved WebGPU resource handles for every user
// binding declared at pipeline-load time, in the same order. Pass
// (nullptr, 0) for a pipeline with no user bindings.
__attribute__((import_module("env"), import_name("wgpuDispatchRT"))) __attribute__((import_module("env"), import_name("wgpuDispatchRT")))
extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle, extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle,
const void* pushPtr, std::int32_t pushBytes, const void* pushPtr, std::int32_t pushBytes,
std::uint32_t tlasBufHandle, std::uint32_t tlasBufHandle,
std::int32_t instanceCount, std::int32_t instanceCount,
std::int32_t gx, std::int32_t gy); std::int32_t gx, std::int32_t gy,
const void* handlesPtr, std::int32_t handlesCount);
// GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or // GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or
// GPU-written), produces per-instance world-space AABBs + per-instance // GPU-written), produces per-instance world-space AABBs + per-instance

View file

@ -32,9 +32,10 @@ import :WebGPU;
export namespace Crafter { export namespace Crafter {
enum class UICustomBindingKind : std::uint8_t { enum class UICustomBindingKind : std::uint8_t {
Buffer = 0, // read-only-storage SSBO, handle is a slot into heap.bufferTable Buffer = 0, // read-only-storage SSBO, handle is a slot into heap.bufferTable
SampledTexture = 1, // sampled texture_2d<f32>, handle is a slot into heap.imageTable SampledTexture = 1, // sampled texture_2d<f32>, handle is a slot into heap.imageTable
Sampler = 2, // filtering sampler, handle is a slot into heap.samplerTable Sampler = 2, // filtering sampler, handle is a slot into heap.samplerTable
SampledTextureArray = 3, // sampled texture_2d_array<f32>, handle is a slot into heap.imageTable
}; };
struct UICustomBinding { struct UICustomBinding {

View file

@ -47,6 +47,7 @@ export import :ShaderBindingTableVulkan;
export import :PipelineRTVulkan; export import :PipelineRTVulkan;
export import :RenderingElement3D; export import :RenderingElement3D;
export import :ImageVulkan; export import :ImageVulkan;
export import :Image2D;
export import :SamplerVulkan; export import :SamplerVulkan;
export import :DescriptorHeapVulkan; export import :DescriptorHeapVulkan;
export import :RenderPass; export import :RenderPass;

View file

@ -31,23 +31,9 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
}); });
}; };
// Sniff the requested target from args before any deps resolve — the
// Crafter.Asset dependency is heavy and not wasm-ready (uses `throw`
// under -fno-exceptions, references `_Float16`). The DOM build stubs
// the renderer entirely so the dep doesn't apply anyway.
bool isWasm = false;
for (std::string_view a : args) {
if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) {
isWasm = true;
break;
}
}
Configuration* event = resolveDep("Crafter.Event", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Event.git"); Configuration* event = resolveDep("Crafter.Event", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Event.git");
Configuration* math = resolveDep("Crafter.Math", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Math.git"); Configuration* math = resolveDep("Crafter.Math", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Math.git");
Configuration* asset = isWasm Configuration* asset = resolveDep("Crafter.Asset", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Asset.git");
? nullptr
: resolveDep("Crafter.Asset", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Asset.git");
Configuration cfg; Configuration cfg;
cfg.path = "./"; cfg.path = "./";
@ -55,11 +41,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
cfg.outputName = "Crafter.Graphics"; cfg.outputName = "Crafter.Graphics";
cfg.type = ConfigurationType::LibraryStatic; cfg.type = ConfigurationType::LibraryStatic;
auto opts = ApplyStandardArgs(cfg, args); auto opts = ApplyStandardArgs(cfg, args);
if (asset) { cfg.dependencies = { event, math, asset };
cfg.dependencies = { event, math, asset };
} else {
cfg.dependencies = { event, math };
}
// Window backend follows the target triple. V1 had separate lib-wayland / // Window backend follows the target triple. V1 had separate lib-wayland /
// lib-win32 configurations; V2 picks the right one automatically based on // lib-win32 configurations; V2 picks the right one automatically based on
@ -78,6 +60,16 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
// strips -march/-mtune from the clang command line for any wasm32-* // strips -march/-mtune from the clang command line for any wasm32-*
// triple, so cfg.march/mtune can stay at their defaults — keeping them // triple, so cfg.march/mtune can stay at their defaults — keeping them
// matches the VariantId of dependency PCMs. // matches the VariantId of dependency PCMs.
//
// WasmAlloc / WasmFree live in Crafter.Graphics-Dom.cpp and back
// dom-env.js's __writeUtf8 path (every keyboard / text-input event
// routes through them). The TU defines no symbols main.cpp would
// reference, so wasm-ld dead-strips it from libCrafter.Graphics.a
// for examples that don't touch the `Dom::HtmlElement*` API (like
// Sponza). `--export=` both forces the export AND pulls the
// defining .o in — solving both halves of the dead-strip problem.
cfg.linkFlags.push_back("-Wl,--export=WasmAlloc");
cfg.linkFlags.push_back("-Wl,--export=WasmFree");
} else if (windows) { } else if (windows) {
cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_WIN32", ""}); cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_WIN32", ""});
cfg.linkFlags.push_back("-lkernel32"); cfg.linkFlags.push_back("-lkernel32");
@ -131,7 +123,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
// when its body is gated out. Vulkan-typed partitions stub to empty // when its body is gated out. Vulkan-typed partitions stub to empty
// modules under CRAFTER_GRAPHICS_WINDOW_DOM; the Dom/DomEvents/Router // modules under CRAFTER_GRAPHICS_WINDOW_DOM; the Dom/DomEvents/Router
// partitions stub to empty modules in the opposite direction. // partitions stub to empty modules in the opposite direction.
std::array<fs::path, 40> ifaces = { std::array<fs::path, 41> ifaces = {
"interfaces/Crafter.Graphics", "interfaces/Crafter.Graphics",
"interfaces/Crafter.Graphics-Animation", "interfaces/Crafter.Graphics-Animation",
"interfaces/Crafter.Graphics-Clipboard", "interfaces/Crafter.Graphics-Clipboard",
@ -147,6 +139,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
"interfaces/Crafter.Graphics-ForwardDeclarations", "interfaces/Crafter.Graphics-ForwardDeclarations",
"interfaces/Crafter.Graphics-Gamepad", "interfaces/Crafter.Graphics-Gamepad",
"interfaces/Crafter.Graphics-GraphicsTypes", "interfaces/Crafter.Graphics-GraphicsTypes",
"interfaces/Crafter.Graphics-Image2D",
"interfaces/Crafter.Graphics-ImageVulkan", "interfaces/Crafter.Graphics-ImageVulkan",
"interfaces/Crafter.Graphics-Input", "interfaces/Crafter.Graphics-Input",
"interfaces/Crafter.Graphics-InputField", "interfaces/Crafter.Graphics-InputField",