From b5d0f52da0b9a4c170863f215d0289ac4c1602d0 Mon Sep 17 00:00:00 2001 From: Jorijn van der Graaf Date: Tue, 19 May 2026 00:27:09 +0200 Subject: [PATCH] webgpu sponza --- additional/dom-webgpu.js | 218 ++++++++- examples/Sponza/README.md | 58 +++ examples/Sponza/closesthit.glsl | 23 + examples/Sponza/closesthit.wgsl | 90 ++++ examples/Sponza/main.cpp | 445 ++++++++++++++++++ examples/Sponza/miss.glsl | 11 + examples/Sponza/miss.wgsl | 16 + examples/Sponza/project.cpp | 92 ++++ examples/Sponza/raygen.glsl | 52 ++ examples/Sponza/raygen.wgsl | 109 +++++ .../Crafter.Graphics-Mesh-WebGPU.cpp | 66 ++- .../Crafter.Graphics-PipelineRTWebGPU.cpp | 9 +- ...Crafter.Graphics-DescriptorHeapWebGPU.cppm | 10 + interfaces/Crafter.Graphics-Image2D.cppm | 166 +++++++ interfaces/Crafter.Graphics-Mesh.cppm | 10 + .../Crafter.Graphics-PipelineRTWebGPU.cppm | 11 +- interfaces/Crafter.Graphics-RTPass.cppm | 10 +- interfaces/Crafter.Graphics-WebGPU.cppm | 45 +- .../Crafter.Graphics-WebGPUComputeShader.cppm | 7 +- interfaces/Crafter.Graphics.cppm | 1 + project.cpp | 35 +- 21 files changed, 1426 insertions(+), 58 deletions(-) create mode 100644 examples/Sponza/README.md create mode 100644 examples/Sponza/closesthit.glsl create mode 100644 examples/Sponza/closesthit.wgsl create mode 100644 examples/Sponza/main.cpp create mode 100644 examples/Sponza/miss.glsl create mode 100644 examples/Sponza/miss.wgsl create mode 100644 examples/Sponza/project.cpp create mode 100644 examples/Sponza/raygen.glsl create mode 100644 examples/Sponza/raygen.wgsl create mode 100644 interfaces/Crafter.Graphics-Image2D.cppm diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js index 834f320..6e35c21 100644 --- a/additional/dom-webgpu.js +++ b/additional/dom-webgpu.js @@ -43,6 +43,8 @@ function stub(name) { "wgpuGetCanvasWidth", "wgpuGetCanvasHeight", "wgpuSurfaceWidth", "wgpuSurfaceHeight", "wgpuInit", "wgpuCreateBuffer", "wgpuWriteBuffer", "wgpuDestroyBuffer", "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture", + "wgpuCreateImage2D", "wgpuWriteImage2D", + "wgpuCreateImage2DArray", "wgpuWriteImage2DLayer", "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd", "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText", "wgpuLoadCustomShader", "wgpuDispatchCustom", @@ -580,6 +582,99 @@ env.wgpuDestroyTexture = (handle) => { if (tex) { tex.destroy(); textures.delete(handle); textureViews.delete(handle); } }; +// General-purpose 2D rgba8unorm texture, used by Image2D. Distinct +// from the atlas path (r8unorm, sub-region writes) — this one's a one-shot +// upload of a whole image, sized to the pixel data the caller hands over. +env.wgpuCreateImage2D = (w, h) => { + const handle = newHandle(); + const tex = device.createTexture({ + size: [w, h], + format: "rgba8unorm", + usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, + }); + textures.set(handle, tex); + textureViews.set(handle, tex.createView()); + return handle; +}; +// 2D texture array — N layers of identical (w × h) rgba8unorm. Used by +// Image2DArray to back one material albedo per layer; shaders +// sample with `textureSampleLevel(tex, samp, uv, layerIdx, 0.0)`. +env.wgpuCreateImage2DArray = (w, h, layerCount) => { + const handle = newHandle(); + const tex = device.createTexture({ + size: [w, h, layerCount], + dimension: "2d", + format: "rgba8unorm", + usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, + }); + textures.set(handle, tex); + textureViews.set(handle, tex.createView({ + dimension: "2d-array", + arrayLayerCount: layerCount, + })); + return handle; +}; +env.wgpuWriteImage2DLayer = (handle, layer, srcPtr, byteSize, w, h) => { + const tex = textures.get(handle); + if (!tex) return; + const srcBPR = w * 4; + const alignedBPR = (srcBPR + 255) & ~255; + if (alignedBPR === srcBPR) { + queue.writeTexture( + { texture: tex, origin: [0, 0, layer] }, + memU8().subarray(srcPtr, srcPtr + byteSize), + { bytesPerRow: srcBPR, rowsPerImage: h }, + { width: w, height: h, depthOrArrayLayers: 1 } + ); + } else { + const staging = new Uint8Array(alignedBPR * h); + const src = memU8(); + for (let y = 0; y < h; y++) { + staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR), + y * alignedBPR); + } + queue.writeTexture( + { texture: tex, origin: [0, 0, layer] }, + staging, + { bytesPerRow: alignedBPR, rowsPerImage: h }, + { width: w, height: h, depthOrArrayLayers: 1 } + ); + } +}; + +env.wgpuWriteImage2D = (handle, srcPtr, byteSize, w, h) => { + const tex = textures.get(handle); + if (!tex) return; + // queue.writeTexture wants bytesPerRow as a multiple of 256, OR == width*bpp + // when the source is contiguous. RGBA8 = 4 bpp, so bytesPerRow = w*4. + const srcBPR = w * 4; + const alignedBPR = (srcBPR + 255) & ~255; + if (alignedBPR === srcBPR) { + // Already aligned (w * 4 is a multiple of 256 → w is a multiple of 64). + queue.writeTexture( + { texture: tex }, + memU8().subarray(srcPtr, srcPtr + byteSize), + { bytesPerRow: srcBPR, rowsPerImage: h }, + { width: w, height: h } + ); + } else { + // Repack into a 256-aligned staging buffer. One alloc per Update, + // freed when the function returns — fine for asset-load time use. + const staging = new Uint8Array(alignedBPR * h); + const src = memU8(); + for (let y = 0; y < h; y++) { + staging.set(src.subarray(srcPtr + y * srcBPR, srcPtr + (y + 1) * srcBPR), + y * alignedBPR); + } + queue.writeTexture( + { texture: tex }, + staging, + { bytesPerRow: alignedBPR, rowsPerImage: h }, + { width: w, height: h } + ); + } +}; + env.wgpuCreateLinearClampSampler = () => { const handle = newHandle(); samplers.set(handle, device.createSampler({ @@ -756,6 +851,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, { binding: 6, visibility: GPUShaderStage.COMPUTE, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, + { binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, ]}) : device.createBindGroupLayout({ entries: [ { binding: 0, visibility: GPUShaderStage.COMPUTE, @@ -773,9 +869,10 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQue if (byGroup.has(g)) { const entries = byGroup.get(g).map(b => { const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE }; - if (b.kind === 0) e.buffer = { type: "read-only-storage" }; + if (b.kind === 0) e.buffer = { type: "read-only-storage" }; else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" }; else if (b.kind === 2) e.sampler = { type: "filtering" }; + else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" }; return e; }); bgls.push(device.createBindGroupLayout({ entries })); @@ -839,6 +936,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle { binding: 4, resource: { buffer: rtState.indexHeap.gpu } }, { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } }, { binding: 6, resource: outView }, + { binding: 7, resource: { buffer: rtState.attribsHeap.gpu } }, ], }); state.pass.setBindGroup(1, rtBG); @@ -858,6 +956,7 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle if (b.kind === 0) resource = { buffer: buffers.get(h) }; else if (b.kind === 1) resource = textureViews.get(h); else if (b.kind === 2) resource = samplers.get(h); + else if (b.kind === 3) resource = textureViews.get(h); return { binding: b.binding, resource }; }); const bg = device.createBindGroup({ layout: pipe.bgls[g], entries }); @@ -981,6 +1080,12 @@ struct BVHNode { }; // Per-mesh record. Indexed by RTInstance::accelerationStructureReference. +// attribsOffset is the per-mesh base index (in u32 words) into the +// vertexAttribs heap; meshes registered without per-vertex attribs leave +// it 0 (the heap entries at that range are also 0 / never touched). The +// per-vertex stride lives in the user's WGSL — the library doesn't store +// it because the layout is example-defined (Sponza uses 8 u32 / vertex +// for VertexNormalTangentUVPacked). struct MeshRecord { rootAabbMin: vec3, vertexOffset: u32, @@ -989,7 +1094,7 @@ struct MeshRecord { bvhOffset: u32, primRemapOffset: u32, triangleCount: u32, - _pad: u32, + attribsOffset: u32, }; // Per-instance TLAS record built by the TLAS-build compute pass. @@ -1048,6 +1153,7 @@ const rtWgslMegakernelBindings = String.raw` @group(1) @binding(4) var indices : array; @group(1) @binding(5) var primRemap : array; @group(1) @binding(6) var outImage : texture_storage_2d; +@group(1) @binding(7) var vertexAttribs : array; `; const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings; @@ -1565,6 +1671,7 @@ const rtState = { indexHeap: null, // u32 stream bvhHeap: null, // BVHNode stream (32 bytes per node) primRemapHeap: null, // u32 stream + attribsHeap: null, // u32 stream (per-vertex attribute payload; example-defined stride) meshRecordsBuffer: null, // GPUBuffer of MeshRecord[] meshRecordsCapacity: 0, @@ -1588,6 +1695,7 @@ function rtInit() { rtState.indexHeap = makeRtHeap(); rtState.bvhHeap = makeRtHeap(); rtState.primRemapHeap = makeRtHeap(); + rtState.attribsHeap = makeRtHeap(); rtState.meshRecordsCapacity = 16; rtState.meshRecordsBuffer = device.createBuffer({ size: rtState.meshRecordsCapacity * 48, @@ -1634,23 +1742,30 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ, verticesPtr, vertexCount, indicesPtr, indexCount, bvhNodesPtr, bvhNodeCount, - primRemapPtr, primRemapCount) => { + primRemapPtr, primRemapCount, + attribsPtr, attribsByteCount) => { if (!rtState.vertHeap) rtInit(); + console.log(`[crafter-wgpu] mesh BLAS: bbox=(${minX.toFixed(1)}..${maxX.toFixed(1)}, ${minY.toFixed(1)}..${maxY.toFixed(1)}, ${minZ.toFixed(1)}..${maxZ.toFixed(1)}), ${vertexCount} verts, ${indexCount/3} tris, attribs=${attribsByteCount}B`); const vBytes = vertexCount * 12; const iBytes = indexCount * 4; const nBytes = bvhNodeCount * 32; const rBytes = primRemapCount * 4; + // attribsByteCount must be a multiple of 4 (the heap is array). + // Round up the upload size; the in-MeshRecord offset is in u32 words. + const aBytes = (attribsByteCount + 3) & ~3; rtHeapEnsure(rtState.vertHeap, vBytes); rtHeapEnsure(rtState.indexHeap, iBytes); rtHeapEnsure(rtState.bvhHeap, nBytes); rtHeapEnsure(rtState.primRemapHeap, rBytes); + if (aBytes > 0) rtHeapEnsure(rtState.attribsHeap, aBytes); const vOff = rtState.vertHeap.cursor / 12; // in vec3 units const iOff = rtState.indexHeap.cursor / 4; // in u32 units const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units const rOff = rtState.primRemapHeap.cursor / 4; + const aOff = rtState.attribsHeap.cursor / 4; // in u32 units // queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is // already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4. @@ -1662,11 +1777,16 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ, memU8().buffer, bvhNodesPtr, nBytes); queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor, memU8().buffer, primRemapPtr, rBytes); + if (aBytes > 0) { + queue.writeBuffer(rtState.attribsHeap.gpu, rtState.attribsHeap.cursor, + memU8().buffer, attribsPtr, aBytes); + } rtState.vertHeap.cursor += vBytes; rtState.indexHeap.cursor += iBytes; rtState.bvhHeap.cursor += nBytes; rtState.primRemapHeap.cursor += rBytes; + rtState.attribsHeap.cursor += aBytes; const handle = rtState.nextMeshHandle++; rtMeshRecordsEnsure(handle + 1); @@ -1682,7 +1802,7 @@ env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ, u32[8] = nOff; u32[9] = rOff; u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0; - u32[11] = 0; + u32[11] = aOff; queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec); return handle; @@ -1734,9 +1854,13 @@ env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => { // RT pipeline loader — wraps user-supplied WGSL (sources + generated mega // switches + raygen + @compute entry) with the library prelude/helpers. -const rtPipelines = new Map(); // handle → { pipeline, bgls } +// `bindingsPtr` / `bindingsCount` are UICustomBinding entries (same 8-byte +// shape as wgpuLoadCustomShader) declaring extra @group(2)+ resources the +// closest-hit / miss / raygen WGSL touches (material SSBOs, albedo +// textures, samplers). Pass (0, 0) for a pipeline with no user bindings. +const rtPipelines = new Map(); // handle → { pipeline, bgls, byGroup, sortedGroups } -env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => { +env.wgpuLoadRTPipeline = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => { if (!rtState.vertHeap) rtInit(); const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen)); @@ -1751,6 +1875,31 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => { } const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers; + // Parse user bindings (same wire format as wgpuLoadCustomShader). + const userBindings = []; + if (bindingsCount > 0) { + const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8); + for (let i = 0; i < bindingsCount; i++) { + const g = dv.getUint8(i*8 + 0); + if (g < 2) { + console.error(`[crafter-wgpu] RT pipeline: @group(${g}) reserved; user bindings need group >= 2`); + return 0; + } + userBindings.push({ + group: g, + binding: dv.getUint8(i*8 + 1), + kind: dv.getUint8(i*8 + 2), + pushOffset: dv.getUint32(i*8 + 4, true), + }); + } + } + const byGroup = new Map(); + for (const b of userBindings) { + if (!byGroup.has(b.group)) byGroup.set(b.group, []); + byGroup.get(b.group).push(b); + } + const sortedGroups = [...byGroup.keys()].sort((a, b) => a - b); + let pipeline; try { const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" }); @@ -1768,13 +1917,34 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => { { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, { binding: 6, visibility: GPUShaderStage.COMPUTE, storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, + { binding: 7, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, ]}); + // User binding-group layouts. WebGPU pipeline layouts need a + // contiguous array up to the highest group used, so pad any gaps + // with empty bgls (same rule as wgpuLoadCustomShader). + const userBgls = []; + const highest = sortedGroups.length ? sortedGroups[sortedGroups.length - 1] : 1; + for (let g = 2; g <= highest; g++) { + if (byGroup.has(g)) { + const entries = byGroup.get(g).map(b => { + const e = { binding: b.binding, visibility: GPUShaderStage.COMPUTE }; + if (b.kind === 0) e.buffer = { type: "read-only-storage" }; + else if (b.kind === 1) e.texture = { sampleType: "float", viewDimension: "2d" }; + else if (b.kind === 2) e.sampler = { type: "filtering" }; + else if (b.kind === 3) e.texture = { sampleType: "float", viewDimension: "2d-array" }; + return e; + }); + userBgls.push(device.createBindGroupLayout({ entries })); + } else { + userBgls.push(device.createBindGroupLayout({ entries: [] })); + } + } pipeline = device.createComputePipeline({ - layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }), + layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl, ...userBgls] }), compute: { module: mod, entryPoint: "main" }, }); const handle = newHandle(); - rtPipelines.set(handle, { pipeline, headerBgl, dataBgl }); + rtPipelines.set(handle, { pipeline, headerBgl, dataBgl, userBgls, byGroup, sortedGroups }); return handle; } catch (e) { console.error("[crafter-wgpu] RT pipeline compile failed:", e); @@ -1784,7 +1954,8 @@ env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => { }; env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes, - tlasBufHandle, instanceCount, gx, gy) => { + tlasBufHandle, instanceCount, gx, gy, + handlesPtr, handlesCount) => { if (!state.pass) return; const pipe = rtPipelines.get(pipelineHandle); const tlas = buffers.get(tlasBufHandle); @@ -1815,12 +1986,41 @@ env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes, { binding: 4, resource: { buffer: rtState.indexHeap.gpu } }, { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } }, { binding: 6, resource: outView }, + { binding: 7, resource: { buffer: rtState.attribsHeap.gpu } }, ], }); state.pass.setPipeline(pipe.pipeline); state.pass.setBindGroup(0, headerBg); state.pass.setBindGroup(1, dataBg); + + // User bindings: walk byGroup in the same sorted order the C++ side + // packed handles[], picking up indices linearly. + if (handlesCount > 0) { + const handles = new Uint32Array(memU8().buffer, handlesPtr, handlesCount); + let handleIdx = 0; + let bglIdx = 0; + for (let g = 2; g <= (pipe.sortedGroups[pipe.sortedGroups.length - 1] || 1); g++) { + if (pipe.byGroup.has(g)) { + const entries = pipe.byGroup.get(g).map(b => { + const h = handles[handleIdx++]; + let resource; + if (b.kind === 0) resource = { buffer: buffers.get(h) }; + else if (b.kind === 1) resource = textureViews.get(h); + else if (b.kind === 2) resource = samplers.get(h); + else if (b.kind === 3) resource = textureViews.get(h); + return { binding: b.binding, resource }; + }); + const bg = device.createBindGroup({ + layout: pipe.userBgls[bglIdx], + entries, + }); + state.pass.setBindGroup(g, bg); + } + bglIdx++; + } + } + state.pass.dispatchWorkgroups(gx, gy, 1); state.outIsPing = !state.outIsPing; }; diff --git a/examples/Sponza/README.md b/examples/Sponza/README.md new file mode 100644 index 0000000..5545764 --- /dev/null +++ b/examples/Sponza/README.md @@ -0,0 +1,58 @@ +# Sponza example + +Loads the Sponza atrium as a `.cmesh` + one albedo `.ctex` and renders +it via ray tracing on both Vulkan (native) and WebGPU (wasm). Same +`main.cpp`, `#ifdef CRAFTER_GRAPHICS_WINDOW_DOM` selects the backend. + +## What this example proves + +- `.cmesh` and `.ctex` decompression round-trip on both backends + (GPU via `VK_EXT_memory_decompression` on Vulkan, CPU via + `Compression::DecompressCPU` on WebGPU). +- A single texture binding flowing from `Image2D` through the + RT pipeline's closest-hit on both backends. The closest-hit samples + at the barycentric attribs as UVs — proof-of-binding, not visually + accurate. Per-vertex UV interpolation is the next step. + +## Asset fetch + +`project.cpp` calls `Crafter::GitFetch(...)` on +[https://github.com/jimmiebergmann/Sponza](https://github.com/jimmiebergmann/Sponza) +(pinned to commit `222338979d32f4f4818466291bdbc29f192b86ba`). The +clone lands in the per-user crafter-build cache; first build pulls +~280 MB once, subsequent builds reuse it. + +`cfg.assets` then picks two files out of that clone: + +| Source | Compressed output | +|-----------------------------------------|-------------------------| +| `sponza.obj` | `sponza.cmesh` | +| `textures/sponza_arch_diff.tga` | `sponza_arch_diff.ctex` | + +Both land flat in the example's bin directory. + +## Building + +``` +crafter build # native Vulkan +crafter build --target=wasm32-wasip1 # WebGPU / wasm +``` + +## License & attribution + +Sponza geometry, materials, and textures are licensed under +[CC BY 3.0](https://creativecommons.org/licenses/by/3.0/). + +- **Original model:** Frank Meinl, Crytek (2010). +- **OBJ packaging / cleanup:** Morgan McGuire, McGuire Computer + Graphics Archive — https://casual-effects.com/data. +- **GitHub mirror used here:** Jimmie Bergmann's roof-material fixup — + https://github.com/jimmiebergmann/Sponza. + +When redistributing builds of this example that bundle the compressed +Sponza outputs (`*.cmesh`, `*.ctex`), the CC BY 3.0 attribution +requirement applies. Quoting the original credit somewhere visible to +end users (about-screen, credits page, etc.) is enough. + +The Crafter.Graphics library code itself is LGPL-3.0; the two +licenses are compatible for data + code distribution. diff --git a/examples/Sponza/closesthit.glsl b/examples/Sponza/closesthit.glsl new file mode 100644 index 0000000..2355d9a --- /dev/null +++ b/examples/Sponza/closesthit.glsl @@ -0,0 +1,23 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_shader_image_load_formatted : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable +#extension GL_EXT_descriptor_heap : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Specialization constant: descriptor-heap slot of the albedo texture. +// Set from descriptorHeap.bufferStartElement + the slot allocated for +// the Image2D on the host side. Sampling uses gl_HitAttributeEXT +// barycentrics as UVs — proof-of-binding rather than UV-correct shading. +// Per-vertex UV interpolation lands when Mesh on Vulkan exposes the +// data-region buffer. +layout(constant_id = 0) const uint16_t albedoSlot = 0us; +layout(descriptor_heap) uniform sampler2D albedo[]; + +hitAttributeEXT vec2 hitAttrs; +layout(location = 0) rayPayloadInEXT vec3 hitValue; + +void main() { + vec2 bary = vec2(hitAttrs.x, hitAttrs.y); + hitValue = texture(albedo[albedoSlot], bary).rgb; +} diff --git a/examples/Sponza/closesthit.wgsl b/examples/Sponza/closesthit.wgsl new file mode 100644 index 0000000..9fa15a8 --- /dev/null +++ b/examples/Sponza/closesthit.wgsl @@ -0,0 +1,90 @@ +// Payload declared here so the WGSL assembler sees it before raygen +// (the assembler concatenates closesthit/anyhit/miss BEFORE raygen). +// +// WGSL forbids cycles in the function call graph, so closesthit_main +// CAN'T call traceRay (that would create closesthit → traceRay → +// runClosestHit → closesthit). The lighting + shadow trace therefore +// happens in raygen; closesthit's job is just to gather surface data +// into the payload. +// +// shadowRay = 0 (primary): closesthit fills albedo/worldPos/normal/hit. +// shadowRay = 1 (shadow): closesthit is skipped (RT_FLAG_SKIP_CLOSEST_HIT), +// miss flips color to white = "lit". +struct Payload { + color: vec3, + shadowRay: u32, + worldPos: vec3, + hit: u32, + worldNormal: vec3, + _pad: f32, +}; + +// User-bound resources at group(2). Matches the UICustomBinding span the +// host hands to PipelineRTWebGPU::Init. +// binding 0 — albedo texture_2d_array, one layer per Sponza material +// binding 1 — sampler (linear clamp) +// binding 2 — camera storage buffer (read by raygen only) +@group(2) @binding(0) var albedos : texture_2d_array; +@group(2) @binding(1) var samp : sampler; + +// VertexNormalTangentUVPacked is `packed` on the outer struct but each +// inner `Vector` is SIMD-aligned to a 16-byte stride. So +// each vertex is 12 u32 words: normal at 0..2, tangent at 4..6, uv at 8..9. +const ATTRIB_STRIDE_U32: u32 = 12u; +const ATTRIB_NORMAL_OFFSET: u32 = 0u; +const ATTRIB_UV_OFFSET: u32 = 8u; + +fn fetchUV(meshRec: MeshRecord, vertexIdx: u32) -> vec2 { + let base = meshRec.attribsOffset + vertexIdx * ATTRIB_STRIDE_U32 + ATTRIB_UV_OFFSET; + return vec2( + bitcast(vertexAttribs[base + 0u]), + bitcast(vertexAttribs[base + 1u]), + ); +} + +fn fetchNormal(meshRec: MeshRecord, vertexIdx: u32) -> vec3 { + let base = meshRec.attribsOffset + vertexIdx * ATTRIB_STRIDE_U32 + ATTRIB_NORMAL_OFFSET; + return vec3( + bitcast(vertexAttribs[base + 0u]), + bitcast(vertexAttribs[base + 1u]), + bitcast(vertexAttribs[base + 2u]), + ); +} + +fn closesthit_main(ray: RayDesc, hit: HitInfo, payload: ptr) { + // Resolve hit triangle → 3 vertex indices. + let meshIdx = tlasEntries[hit.instanceId].blasMeshIdx; + let meshRec = meshRecords[meshIdx]; + let baseIdx = meshRec.indexOffset + hit.primitiveId * 3u; + let i0 = indices[baseIdx + 0u]; + let i1 = indices[baseIdx + 1u]; + let i2 = indices[baseIdx + 2u]; + let bary = vec3(1.0 - hit.attribs.x - hit.attribs.y, hit.attribs.x, hit.attribs.y); + + // Albedo via barycentric UV interpolation. + let uv0 = fetchUV(meshRec, i0); + let uv1 = fetchUV(meshRec, i1); + let uv2 = fetchUV(meshRec, i2); + let uv = uv0 * bary.x + uv1 * bary.y + uv2 * bary.z; + // OBJ V is bottom-up; sampler is top-down. fract for manual tiling. + let uvTiled = vec2(fract(uv.x), fract(1.0 - uv.y)); + let layer = i32(hit.customIndex); + let albedo = textureSampleLevel(albedos, samp, uvTiled, layer, 0.0).rgb; + + // World-space smooth shading normal. Multiply through the + // object-to-world rotation so this stays correct if a future scene + // rotates instances (Sponza itself is all identities). + let n0 = fetchNormal(meshRec, i0); + let n1 = fetchNormal(meshRec, i1); + let n2 = fetchNormal(meshRec, i2); + let nObj = normalize(n0 * bary.x + n1 * bary.y + n2 * bary.z); + let nWorld = normalize(vec3( + dot(hit.objectToWorldR0.xyz, nObj), + dot(hit.objectToWorldR1.xyz, nObj), + dot(hit.objectToWorldR2.xyz, nObj))); + + (*payload).color = albedo; + (*payload).worldPos = ray.origin + ray.direction * hit.t; + (*payload).worldNormal = nWorld; + (*payload).hit = 1u; +} diff --git a/examples/Sponza/main.cpp b/examples/Sponza/main.cpp new file mode 100644 index 0000000..73db99e --- /dev/null +++ b/examples/Sponza/main.cpp @@ -0,0 +1,445 @@ +// Sponza on Vulkan + WebGPU. Same example source, two backends — picked +// by CRAFTER_GRAPHICS_WINDOW_DOM. Both paths: +// 1. Load a Sponza .cmesh (positions + indices, optional per-vertex +// data region) and a single albedo .ctex from disk. The source +// assets are fetched once by project.cpp (Crafter.Build::GitFetch) +// from https://github.com/jimmiebergmann/Sponza and compressed +// into the bin dir at build time — they don't live in this repo. +// 2. Build BLAS + TLAS via the existing Mesh / RenderingElement3D +// flow. The on-disk format is identical between backends; only +// the decompression path differs (VK_EXT_memory_decompression +// on Vulkan, CPU GDeflate on WebGPU). +// 3. Upload the albedo as Image2D, register it in the +// backend descriptor heap, and run the RT pipeline. Closest-hit +// shaders sample the texture at the hit's barycentric coords — +// proof-of-binding rather than UV-correct shading. Per-vertex +// UV interpolation is follow-up work (the attribs heap is in +// place on WebGPU; the Vulkan side needs a sibling data buffer +// exposed off Mesh). +// +// Sponza model: CC BY 3.0 — Frank Meinl (Crytek), packaged by Jimmie +// Bergmann and Morgan McGuire. https://casual-effects.com/data + +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM +#include "vulkan/vulkan.h" +#endif + +import Crafter.Graphics; +import Crafter.Asset; +import Crafter.Math; +import Crafter.Event; +import std; + +using namespace Crafter; +namespace fs = std::filesystem; + +namespace { + struct RGBA8 { std::uint8_t r, g, b, a; }; + + void RequireAssets(const fs::path& mesh, const fs::path& tex) { + const bool haveMesh = fs::exists(mesh); + const bool haveTex = fs::exists(tex); + if (haveMesh && haveTex) return; + std::println(std::cerr, + "[Sponza] missing asset(s):\n" + " mesh: {} {}\n" + " albedo: {} {}\n" + "The build should have populated these via cfg.assets +\n" + "GitFetch (see examples/Sponza/project.cpp). If you ran\n" + "the binary from outside its bin dir, cd into the bin dir\n" + "first — asset paths are relative to cwd.", + mesh.string(), haveMesh ? "OK" : "MISSING", + tex.string(), haveTex ? "OK" : "MISSING"); + std::abort(); + } +} + +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM +int main() { + // Native Vulkan path is single-material for now (see file header) — + // pick up just the first per-material output the build emits. The + // WebGPU branch below uses every mesh + a texture array. + const fs::path meshPath = "mesh_0.cmesh"; + const fs::path texPath = "tex_0.ctex"; + RequireAssets(meshPath, texPath); + + CompressedMeshAsset loadedMesh = LoadCompressedMesh(meshPath); + CompressedTextureAsset loadedTex = LoadCompressedTexture(texPath); + std::println("[Sponza] loaded {} verts, {} idx, {}x{} albedo", + loadedMesh.vertexCount, loadedMesh.indexCount, + loadedTex.sizeX, loadedTex.sizeY); + + Device::Initialize(); + Window window(1280, 720, "Sponza"); + VkCommandBuffer cmd = window.StartInit(); + + DescriptorHeapVulkan descriptorHeap; + descriptorHeap.Initialize(/*images*/ 2, /*buffers*/ 1, /*samplers*/ 0); + + // Two specialization constants: the TLAS slot offset (shared with + // VulkanTriangle pattern) and the albedo slot index for closesthit. + VkSpecializationMapEntry raygenEntry = { .constantID = 0, .offset = 0, .size = sizeof(std::uint16_t) }; + VkSpecializationInfo raygenSpec = { + .mapEntryCount = 1, .pMapEntries = &raygenEntry, + .dataSize = sizeof(std::uint16_t), .pData = &descriptorHeap.bufferStartElement, + }; + + // Allocate the albedo slot first so its index is known when we + // compile closesthit.spv. + auto imgSlots = descriptorHeap.AllocateImageSlots(2); + auto bufSlots = descriptorHeap.AllocateBufferSlots(1); + std::uint16_t albedoHeapSlot = static_cast(imgSlots.firstElement + 1); + + VkSpecializationMapEntry hitEntry = { .constantID = 0, .offset = 0, .size = sizeof(std::uint16_t) }; + VkSpecializationInfo hitSpec = { + .mapEntryCount = 1, .pMapEntries = &hitEntry, + .dataSize = sizeof(std::uint16_t), .pData = &albedoHeapSlot, + }; + + std::array shaders {{ + { "raygen.spv", "main", VK_SHADER_STAGE_RAYGEN_BIT_KHR, &raygenSpec }, + { "miss.spv", "main", VK_SHADER_STAGE_MISS_BIT_KHR, nullptr }, + { "closesthit.spv", "main", VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, &hitSpec }, + }}; + ShaderBindingTableVulkan shaderTable; + shaderTable.Init(shaders); + + std::array raygenGroups {{ { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 0, .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR, + } }}; + std::array missGroups {{ { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 1, .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR, + } }}; + std::array hitGroups {{ { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = 2, + .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR, + } }}; + + PipelineRTVulkan pipeline; + pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, shaderTable); + + Mesh sponzaMesh; + sponzaMesh.Build(loadedMesh, cmd); + + Image2D albedo; + albedo.Create(loadedTex.sizeX, loadedTex.sizeY, /*mipLevels*/ 1, cmd, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + albedo.Update(loadedTex, cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SamplerVulkan sampler; + + static RenderingElement3D renderer; + renderer.instance = { + .transform = {}, + .instanceCustomIndex = 0, + .mask = 0xFF, + .instanceShaderBindingTableRecordOffset = 0, + .flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, + .accelerationStructureReference = sponzaMesh.blasAddr, + }; + MatrixRowMajor::Identity() + .Store(reinterpret_cast(renderer.instance.transform.matrix)); + RenderingElement3D::elements.emplace_back(&renderer); + RenderingElement3D::BuildTLAS(cmd, 0); + RenderingElement3D::BuildTLAS(cmd, 1); + RenderingElement3D::BuildTLAS(cmd, 2); + + window.FinishInit(); + + // Write descriptors: TLAS at bufSlots[0], output image at imgSlots[0], + // albedo (combined image+sampler) at imgSlots[1]. Per-frame replicated. + VkDeviceAddressRangeKHR tlasRanges[Window::numFrames]; + VkImageDescriptorInfoEXT outImgInfos[Window::numFrames]; + VkDescriptorImageInfo albedoInfo { + .sampler = sampler.textureSampler, + .imageView = albedo.imageView, + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + }; + for (std::uint32_t f = 0; f < Window::numFrames; ++f) { + tlasRanges[f] = { .address = RenderingElement3D::tlases[f].address }; + outImgInfos[f] = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DESCRIPTOR_INFO_EXT, + .pView = &window.imageViews[f], + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + } + + std::vector resources; + std::vector destinations; + resources.reserve(Window::numFrames * 3); + destinations.reserve(Window::numFrames * 3); + for (std::uint32_t f = 0; f < Window::numFrames; ++f) { + resources.push_back({ + .sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT, + .type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, + .data = { .pAddressRange = &tlasRanges[f] }, + }); + destinations.push_back({ + .address = descriptorHeap.resourceHeap[f].value + + descriptorHeap.BufferByteOffset(bufSlots.firstElement), + .size = Device::descriptorHeapProperties.bufferDescriptorSize, + }); + resources.push_back({ + .sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT, + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .data = { .pImage = &outImgInfos[f] }, + }); + destinations.push_back({ + .address = descriptorHeap.resourceHeap[f].value + + descriptorHeap.ImageByteOffset(imgSlots.firstElement), + .size = Device::descriptorHeapProperties.imageDescriptorSize, + }); + resources.push_back({ + .sType = VK_STRUCTURE_TYPE_RESOURCE_DESCRIPTOR_INFO_EXT, + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .data = { .pCombinedImageSampler = &albedoInfo }, + }); + destinations.push_back({ + .address = descriptorHeap.resourceHeap[f].value + + descriptorHeap.ImageByteOffset(albedoHeapSlot), + .size = Device::descriptorHeapProperties.imageDescriptorSize, + }); + } + Device::vkWriteResourceDescriptorsEXT(Device::device, + static_cast(resources.size()), + resources.data(), destinations.data()); + for (std::uint32_t f = 0; f < Window::numFrames; ++f) { + descriptorHeap.resourceHeap[f].FlushDevice(); + } + + window.descriptorHeap = &descriptorHeap; + RTPass rtPass(&pipeline); + window.passes.push_back(&rtPass); + + window.Render(); + window.StartSync(); + return 0; +} +#else +int main() { + // ── Read scene manifest (produced by project.cpp's ImportSponzaBundle). + // + // line 1: albedoCount + // line 2: meshCount + // line 3..: per-mesh albedoIdx (-1 means "no albedo") + const fs::path manifestPath = "scene.txt"; + if (!fs::exists(manifestPath)) { + std::println(std::cerr, + "[Sponza] missing scene.txt — the build should have produced " + "it (see examples/Sponza/project.cpp). If you ran the binary " + "from outside its bin dir, cd in first."); + std::abort(); + } + std::ifstream manifest(manifestPath); + std::uint32_t albedoCount = 0, meshCount = 0; + manifest >> albedoCount >> meshCount; + std::vector meshAlbedo(meshCount); + for (std::uint32_t i = 0; i < meshCount; ++i) manifest >> meshAlbedo[i]; + std::println("[Sponza] scene: {} albedos, {} meshes", albedoCount, meshCount); + + Device::Initialize(); + static Window window(1280, 720, "Sponza"); + auto cmd = window.StartInit(); + + DescriptorHeapWebGPU heap; + heap.Initialize(/*images*/ 2, /*buffers*/ 2, /*samplers*/ 2); + + std::array shaders {{ + WebGPUShader(fs::path("raygen.wgsl"), "raygen_main", WebGPURTStage::Raygen), + WebGPUShader(fs::path("miss.wgsl"), "miss_main", WebGPURTStage::Miss), + WebGPUShader(fs::path("closesthit.wgsl"), "closesthit_main", WebGPURTStage::ClosestHit), + }}; + ShaderBindingTableWebGPU sbt; + sbt.Init(shaders); + + std::array raygenGroups {{ + { .type = RTShaderGroupType::General, .generalShader = 0 }, + }}; + std::array missGroups {{ + { .type = RTShaderGroupType::General, .generalShader = 1 }, + }}; + std::array hitGroups {{ + { .type = RTShaderGroupType::TrianglesHitGroup, .closestHitShader = 2 }, + }}; + + // Three user bindings at @group(2): + // binding 0 — albedo texture_2d_array (one layer per material) + // binding 1 — sampler (linear clamp) + // binding 2 — Camera storage buffer (host-driven, updated per frame) + std::array bindings {{ + { .group = 2, .binding = 0, .kind = UICustomBindingKind::SampledTextureArray, ._pad = 0, .pushOffset = 0 }, + { .group = 2, .binding = 1, .kind = UICustomBindingKind::Sampler, ._pad = 0, .pushOffset = 0 }, + { .group = 2, .binding = 2, .kind = UICustomBindingKind::Buffer, ._pad = 0, .pushOffset = 0 }, + }}; + + PipelineRTWebGPU pipeline; + pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, sbt, bindings); + + // ── Albedo texture array — one rgba8unorm layer per material. ────── + // + // Probe layer 0 for the canonical layer dimensions; project.cpp + // already resized every albedo to the same square so any tex_N.ctex + // would do, layer 0 is just the first one we have. + Image2DArray albedoArray; + { + CompressedTextureAsset probe = LoadCompressedTexture("tex_0.ctex"); + albedoArray.Create(probe.sizeX, probe.sizeY, static_cast(albedoCount)); + albedoArray.UpdateLayer(0, probe); + for (std::uint32_t i = 1; i < albedoCount; ++i) { + CompressedTextureAsset tex = LoadCompressedTexture(std::format("tex_{}.ctex", i)); + albedoArray.UpdateLayer(static_cast(i), tex); + } + } + auto albedoArraySlot = albedoArray.AllocateSlot(heap); + SamplerSlot samplerSlot = AllocateLinearClampSampler(heap); + + // Camera storage buffer — host writes (origin, right, up, forward, + // aspect, tanHalf) every frame from the input-driven free camera + // below. Layout matches the WGSL Camera struct in raygen.wgsl + // (vec3-aligned, std430). 64 bytes total. + struct CameraGPU { + float origin[3]; float pad0; + float right[3]; float tanHalf; + float up[3]; float aspect; + float forward[3]; float pad1; + }; + static_assert(sizeof(CameraGPU) == 64); + WebGPUBuffer cameraBuf; + cameraBuf.Create(1); + + // Handle array fed to RTPass — order matches the bindings declaration. + static std::array userHandles { + heap.imageTable [albedoArraySlot.firstElement], + heap.samplerTable[samplerSlot.firstElement], + cameraBuf.handle, + }; + + // ── Meshes + scene instances ─────────────────────────────────────── + // + // One Mesh + one RenderingElement3D per material group from + // scene.txt. Meshes whose albedoIdx is -1 (the .obj's `usemtl` named + // something without a map_Kd in .mtl) get dropped — they're rare in + // Sponza and we'd have nothing to sample for them anyway. + // + // Vector capacity is reserved up-front: RenderingElement3D::Add + // takes a pointer that's stored in the static elements[] vector, so + // any later vector reallocation would dangle those pointers. + static std::vector meshes; + static std::vector renderers; + meshes.reserve(meshCount); + renderers.reserve(meshCount); + + for (std::uint32_t i = 0; i < meshCount; ++i) { + if (meshAlbedo[i] < 0) continue; + CompressedMeshAsset loaded = LoadCompressedMesh(std::format("mesh_{}.cmesh", i)); + meshes.emplace_back(); + meshes.back().Build(loaded, cmd); + + renderers.emplace_back(); + RenderingElement3D& r = renderers.back(); + auto& tx = r.instance.transform.matrix; + tx[0][0] = 1; tx[0][1] = 0; tx[0][2] = 0; tx[0][3] = 0; + tx[1][0] = 0; tx[1][1] = 1; tx[1][2] = 0; tx[1][3] = 0; + tx[2][0] = 0; tx[2][1] = 0; tx[2][2] = 1; tx[2][3] = 0; + // 24-bit instanceCustomIndex carries the albedo array layer that + // closesthit.wgsl reads as `hit.customIndex`. + r.instance.instanceCustomIndex = static_cast(meshAlbedo[i]); + r.instance.mask = 0xFF; + r.instance.instanceShaderBindingTableRecordOffset = 0; + r.instance.flags = kRTGeometryInstanceForceOpaque; + r.instance.accelerationStructureReference = meshes.back().blasAddr; + RenderingElement3D::Add(&r); + } + RenderingElement3D::BuildTLAS(cmd, 0); + + window.descriptorHeap = &heap; + window.FinishInit(); + + RTPass rtPass(&pipeline); + rtPass.handlesPtr = userHandles.data(); + rtPass.handlesCount = static_cast(userHandles.size()); + window.passes.push_back(&rtPass); + + // ── Free camera: WASD + mouse-delta look ─────────────────────────── + // + // Initial pose puts the camera near one end of the atrium at eye + // height, looking +X down the long axis (bbox: X[-1921..1800], + // Y[-126..1429], Z[-1183..1105]). The user can fine-tune from there. + struct CamState { + Vector position{ -1500.0f, 200.0f, 0.0f }; + float yaw = 0.0f; // radians, around world +Y + float pitch = 0.0f; // radians, +pitch looks up + } cam; + + Input::Map inputMap; + Input::Action& moveAct = inputMap.AddAction("Move", Input::ActionType::Vector2); + Input::Action& lookAct = inputMap.AddAction("Look", Input::ActionType::Vector2); + moveAct.bindings = { + Input::WASDBind{ + Key(CrafterKeys::W), Key(CrafterKeys::S), + Key(CrafterKeys::A), Key(CrafterKeys::D), + }, + }; + lookAct.bindings = { + Input::MouseDeltaBind{ 1.0f }, + }; + inputMap.Attach(window); + + constexpr float kMoveSpeed = 1200.0f; // Sponza units / second (room is ~3700 wide) + constexpr float kLookSens = 0.05f; // radians per mouse pixel + constexpr float kDt = 1.0f / 60.0f; + + EventListener camTick(&window.onBeforeUpdate, [&]() { + inputMap.Tick(); + + cam.yaw += lookAct.vector2.x * kLookSens; + cam.pitch -= lookAct.vector2.y * kLookSens; + // Keep pitch just shy of straight up/down so the basis vectors + // don't collapse (cross(forward, world_up) would go zero). + cam.pitch = std::clamp(cam.pitch, -1.55f, 1.55f); + + const float cp = std::cos(cam.pitch), sp = std::sin(cam.pitch); + const float cy = std::cos(cam.yaw), sy = std::sin(cam.yaw); + Vector forward { cp * cy, sp, cp * sy }; + Vector worldUp { 0.0f, 1.0f, 0.0f }; + Vector right { forward.y * worldUp.z - forward.z * worldUp.y, + forward.z * worldUp.x - forward.x * worldUp.z, + forward.x * worldUp.y - forward.y * worldUp.x }; + const float rLen = std::sqrt(right.x*right.x + right.y*right.y + right.z*right.z); + right.x /= rLen; right.y /= rLen; right.z /= rLen; + Vector up { right.y * forward.z - right.z * forward.y, + right.z * forward.x - right.x * forward.z, + right.x * forward.y - right.y * forward.x }; + + const float dx = moveAct.vector2.x * kMoveSpeed * kDt; + const float dy = moveAct.vector2.y * kMoveSpeed * kDt; + cam.position.x += right.x * dx + forward.x * dy; + cam.position.y += right.y * dx + forward.y * dy; + cam.position.z += right.z * dx + forward.z * dy; + + CameraGPU& g = cameraBuf.value[0]; + g.origin[0] = cam.position.x; g.origin[1] = cam.position.y; g.origin[2] = cam.position.z; g.pad0 = 0.0f; + g.right[0] = right.x; g.right[1] = right.y; g.right[2] = right.z; + g.up[0] = up.x; g.up[1] = up.y; g.up[2] = up.z; + g.forward[0] = forward.x; g.forward[1] = forward.y; g.forward[2] = forward.z; + g.aspect = float(window.width) / float(window.height); + g.tanHalf = std::tan(70.0f * 3.14159265f / 360.0f); + g.pad1 = 0.0f; + cameraBuf.FlushDevice(); + }); + + window.Render(); + window.StartUpdate(); + window.StartSync(); + return 0; +} +#endif diff --git a/examples/Sponza/miss.glsl b/examples/Sponza/miss.glsl new file mode 100644 index 0000000..909d9ca --- /dev/null +++ b/examples/Sponza/miss.glsl @@ -0,0 +1,11 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable + +layout(location = 0) rayPayloadInEXT vec3 hitValue; + +void main() { + // Soft sky gradient based on ray direction Y. The actual ray dir + // isn't accessible without an extra payload field; use a flat warm + // tone that matches Sponza's interior lighting. + hitValue = vec3(0.10, 0.08, 0.06); +} diff --git a/examples/Sponza/miss.wgsl b/examples/Sponza/miss.wgsl new file mode 100644 index 0000000..39ff71d --- /dev/null +++ b/examples/Sponza/miss.wgsl @@ -0,0 +1,16 @@ +fn miss_main(ray: RayDesc, payload: ptr) { + if ((*payload).shadowRay == 1u) { + // Shadow ray escaped to infinity — the sun is visible from the + // origin, so the surface there should pick up full direct light. + // raygen reads color.x as the visibility coefficient. + (*payload).color = vec3(1.0); + return; + } + + // Primary miss: cheap two-stop sky gradient. (*payload).hit stays 0 + // so raygen knows to skip the lighting path and just use this color. + let t = clamp(ray.direction.y * 0.5 + 0.5, 0.0, 1.0); + let sky = vec3(0.45, 0.65, 0.95); + let zenith = vec3(0.95, 0.85, 0.65); + (*payload).color = mix(sky, zenith, t); +} diff --git a/examples/Sponza/project.cpp b/examples/Sponza/project.cpp new file mode 100644 index 0000000..b850c90 --- /dev/null +++ b/examples/Sponza/project.cpp @@ -0,0 +1,92 @@ +import std; +import Crafter.Build; +namespace fs = std::filesystem; +using namespace Crafter; + +// Sponza geometry + albedo: CC BY 3.0, Frank Meinl (Crytek), packaged by +// Jimmie Bergmann (https://github.com/jimmiebergmann/Sponza) and Morgan +// McGuire (https://casual-effects.com/data). The full asset bundle is +// ~280 MB — too large to live in this repo. GitFetch lands it in the +// per-user crafter-build cache on first build and reuses thereafter. +constexpr std::string_view kSponzaGitUrl = "https://github.com/jimmiebergmann/Sponza.git"; +constexpr std::string_view kSponzaCommitSHA = "222338979d32f4f4818466291bdbc29f192b86ba"; +// Every albedo is normalized to this size so they can live as layers of +// one texture_2d_array on the GPU (WebGPU array textures require +// identical layer dimensions). 1024 matches the majority of Sponza's +// textures; the few outliers (256×1024 chain, 512² thorn, 2048² curtains) +// get bilinear-resized via stb_image_resize2. +constexpr std::uint16_t kAlbedoSize = 1024u; + +extern "C" Configuration CrafterBuildProject(std::span args) { + bool isWasm = false; + for (std::string_view a : args) { + if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) { + isWasm = true; + break; + } + } + + std::vector graphicsArgs(args.begin(), args.end()); + Configuration* graphics = LocalProject({ + .projectFile = "../../project.cpp", + .args = graphicsArgs, + }); + + Configuration cfg; + cfg.path = "./"; + cfg.name = "Sponza"; + cfg.outputName = "Sponza"; + cfg.type = ConfigurationType::Executable; + if (isWasm) { + cfg.target = "wasm32-wasip1"; + cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_DOM", ""}); + cfg.compileFlags.push_back("-msimd128"); + } + ApplyStandardArgs(cfg, args); + cfg.dependencies = { graphics }; + + std::array ifaces = {}; + std::array impls = { "main" }; + cfg.GetInterfacesAndImplementations(ifaces, impls); + + // Fetch Sponza once into the shared crafter-build cache, then process + // it into a per-material bundle under build/sponza-bundle-/. + // Hashing on (sha, albedoSize) so changing either invalidates the + // bundle without touching the rest of the example's build tree. + fs::path sponzaRoot = GitFetch({ + .url = std::string(kSponzaGitUrl), + .commit = std::string(kSponzaCommitSHA), + }); + std::string bundleKey = std::format("{}|{}", kSponzaCommitSHA, kAlbedoSize); + auto bundleHash = std::hash{}(bundleKey); + fs::path bundleDir = fs::path("build") / std::format("sponza-bundle-{:016x}", bundleHash); + + if (auto err = BuildOBJBundle( + sponzaRoot / "sponza.obj", + sponzaRoot / "sponza.mtl", + bundleDir, + kAlbedoSize); !err.empty()) { + std::println(std::cerr, "Sponza bundle error: {}", err); + std::exit(1); + } + + // Forward every produced file (.cmesh, .ctex, scene.txt) as a + // passthrough — they're already compressed by Crafter.Asset, no + // further compression needed. cfg.files copies them flat into + // the executable's bin dir. + for (const auto& entry : fs::directory_iterator(bundleDir)) { + if (entry.is_regular_file()) cfg.files.push_back(entry.path()); + } + + if (isWasm) { + cfg.files.emplace_back(fs::path("raygen.wgsl")); + cfg.files.emplace_back(fs::path("closesthit.wgsl")); + cfg.files.emplace_back(fs::path("miss.wgsl")); + EnableWasiBrowserRuntime(cfg); + } else { + cfg.shaders.emplace_back(fs::path("raygen.glsl"), std::string("main"), ShaderType::RayGen); + cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit); + cfg.shaders.emplace_back(fs::path("miss.glsl"), std::string("main"), ShaderType::Miss); + } + return cfg; +} diff --git a/examples/Sponza/raygen.glsl b/examples/Sponza/raygen.glsl new file mode 100644 index 0000000..7eafa4b --- /dev/null +++ b/examples/Sponza/raygen.glsl @@ -0,0 +1,52 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_shader_image_load_formatted : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable +#extension GL_EXT_descriptor_heap : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Specialization constant set from descriptorHeap.bufferStartElement — +// shared with closesthit.glsl. The TLAS lives at descriptor_heap slot +// `bufferStart` (it's an SSBO-typed entry), the per-frame output image +// at heap slot 0. +layout(constant_id = 0) const uint16_t bufferStart = 0us; +layout(descriptor_heap) uniform accelerationStructureEXT topLevelAS[]; +layout(descriptor_heap) uniform writeonly image2D image[]; + +layout(location = 0) rayPayloadEXT vec3 hitValue; + +void main() { + uvec2 pixel = gl_LaunchIDEXT.xy; + uvec2 resolution = gl_LaunchSizeEXT.xy; + vec2 uv = (vec2(pixel) + 0.5) / vec2(resolution); + vec2 ndc = uv * 2.0 - 1.0; + + // Camera positioned to look down the Sponza atrium axis. Sponza-OBJ + // from McGuire's archive is roughly 30 units wide × 13 tall × 18 deep, + // axis-aligned, with the floor near y=0 and the atrium centered on + // origin. -X faces the long end, so we sit inside looking +X. + vec3 origin = vec3(-10.0, 5.0, 0.0); + float aspect = float(resolution.x) / float(resolution.y); + float fov = radians(70.0); + float tanHalf = tan(fov * 0.5); + vec3 direction = normalize(vec3( + ndc.x * aspect * tanHalf, + -ndc.y * tanHalf, + 1.0)); + + // Rotate +Z forward → +X forward (90° about Y). + direction = vec3(direction.z, direction.y, -direction.x); + + traceRayEXT( + topLevelAS[bufferStart], + gl_RayFlagsNoneEXT, + 0xff, + 0, 0, 0, + origin, + 0.001, + direction, + 10000.0, + 0); + + imageStore(image[0], ivec2(pixel), vec4(hitValue, 1.0)); +} diff --git a/examples/Sponza/raygen.wgsl b/examples/Sponza/raygen.wgsl new file mode 100644 index 0000000..a90ca84 --- /dev/null +++ b/examples/Sponza/raygen.wgsl @@ -0,0 +1,109 @@ +// WebGPU raygen. Camera state comes from the host every frame via a +// storage buffer bound at @group(2) @binding(2); main.cpp drives that +// from WASD + mouse-delta through Crafter::Input. +// +// The shading + shadow trace all happens here because WGSL forbids +// recursive function call graphs — closesthit_main can't call traceRay +// (that would loop closesthit → traceRay → runClosestHit → closesthit). +// Raygen is the entry point and not called by anyone, so it can call +// traceRay twice (once primary, once shadow) without forming a cycle. + +struct Camera { + origin: vec3, + pad0: f32, + right: vec3, + tanHalf: f32, + up: vec3, + aspect: f32, + forward: vec3, + pad1: f32, +}; +@group(2) @binding(2) var camera : Camera; + +// Sun coming through Sponza's open roof. Y is up; this points "down and +// slightly along +X" so the light grazes the colonnades on one side. +const SUN_DIR_TO_LIGHT: vec3 = vec3(-0.35, 1.00, -0.20); +const SUN_COLOR: vec3 = vec3( 1.10, 1.00, 0.85); +const AMBIENT_COLOR: vec3 = vec3( 0.18, 0.20, 0.28); + +fn raygen_main(gid: vec3) { + if (gid.x >= hdr.surfaceW || gid.y >= hdr.surfaceH) { return; } + + let pixel = vec2(f32(gid.x), f32(gid.y)); + let resolution = vec2(f32(hdr.surfaceW), f32(hdr.surfaceH)); + let uv = (pixel + vec2(0.5)) / resolution; + let ndc = uv * 2.0 - vec2(1.0); + + // Pinhole camera reconstructed from the host basis. ndc.x runs left- + // to-right across the screen → +right; ndc.y is top-down so we + // negate before applying +up. + let direction = normalize( + camera.right * (ndc.x * camera.aspect * camera.tanHalf) + + camera.up * (-ndc.y * camera.tanHalf) + + camera.forward); + + // ── Primary ray ──────────────────────────────────────────────────── + var payload: Payload; + payload.color = vec3(0.0); + payload.shadowRay = 0u; + payload.hit = 0u; + + traceRay( + 0u, 0u, 0xFFu, + 0u, 0u, 0u, + camera.origin, 0.001, + direction, 10000.0, + &payload); + + var finalColor: vec3; + if (payload.hit == 1u) { + // Closesthit filled albedo/worldPos/worldNormal. Two-sided + // shading: flip the normal toward the camera if we hit the back + // face — Sponza's curtains in particular have inconsistent + // winding, and without this half the surface would go black. + let albedo = payload.color; + let nFacing = select(-payload.worldNormal, + payload.worldNormal, + dot(payload.worldNormal, direction) < 0.0); + let lightDir = normalize(SUN_DIR_TO_LIGHT); + let nDotL = max(0.0, dot(nFacing, lightDir)); + + // ── Shadow ray ──────────────────────────────────────────────── + // Only worth tracing if the surface faces the sun at all. + var visibility = 0.0; + if (nDotL > 0.0) { + // Normal-offset bias on Sponza's units (~3700 wide atrium) + // is hefty; 0.5 keeps the shadow ray clear of the originating + // triangle without producing visible "floating" shadows. + let shadowOrigin = payload.worldPos + nFacing * 0.5; + + var shadowPayload: Payload; + shadowPayload.color = vec3(0.0); // default: blocked + shadowPayload.shadowRay = 1u; + shadowPayload.hit = 0u; + traceRay( + 0u, + RT_FLAG_SKIP_CLOSEST_HIT | RT_FLAG_TERMINATE_ON_FIRST_HIT, + 0xFFu, + 0u, 0u, 0u, + shadowOrigin, 0.001, + lightDir, 10000.0, + &shadowPayload); + visibility = shadowPayload.color.x; + } + + let lit = AMBIENT_COLOR + SUN_COLOR * (nDotL * visibility); + finalColor = albedo * lit; + } else { + // Sky color was filled by miss_main. + finalColor = payload.color; + } + + // Reinhard tonemap + gamma 2.2 so sun-lit albedos don't clip and + // shadow detail stays readable. + let mapped = finalColor / (finalColor + vec3(1.0)); + let gamma = pow(mapped, vec3(1.0 / 2.2)); + textureStore(outImage, + vec2(i32(gid.x), i32(gid.y)), + vec4(gamma, 1.0)); +} diff --git a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp index 4c86ad7..5a61871 100644 --- a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp +++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp @@ -19,6 +19,7 @@ module Crafter.Graphics:Mesh_implWebGPU; import :Mesh; import :WebGPU; +import Crafter.Asset; import Crafter.Math; import std; @@ -215,26 +216,59 @@ namespace { }; } +namespace { + // Shared between the positions-only and the compressed-asset Build paths. + // attribsBytes is empty for positions-only meshes; the JS bridge skips + // the attribs-heap append in that case. + void BuildBVHAndRegister(Mesh& mesh, + std::span> vertices, + std::span indices, + std::span attribsBytes) { + mesh.triangleCount = static_cast(indices.size()) / 3; + + Builder builder; + builder.Build(vertices, indices); + + std::vector primRemap(mesh.triangleCount); + for (std::uint32_t i = 0; i < mesh.triangleCount; ++i) { + primRemap[i] = builder.prims[i].triIndex; + } + + const BVHNode& root = builder.nodes[0]; + mesh.blasAddr = WebGPU::wgpuRegisterMeshBLAS( + root.aabbMin[0], root.aabbMin[1], root.aabbMin[2], + root.aabbMax[0], root.aabbMax[1], root.aabbMax[2], + vertices.data(), static_cast(vertices.size()), + indices.data(), static_cast(indices.size()), + builder.nodes.data(), static_cast(builder.nodes.size()), + primRemap.data(), static_cast(primRemap.size()), + attribsBytes.data(), static_cast(attribsBytes.size())); + } +} + void Mesh::Build(std::span> vertices, std::span indices, WebGPUCommandEncoderRef /*cmd*/) { - triangleCount = static_cast(indices.size()) / 3; + BuildBVHAndRegister(*this, vertices, indices, {}); +} - Builder builder; - builder.Build(vertices, indices); +void Mesh::Build(const CompressedMeshAsset& asset, + WebGPUCommandEncoderRef /*cmd*/) { + std::vector> vertices(asset.vertexCount); + std::vector indices(asset.indexCount); + std::vector dataBytes( + static_cast(asset.dataCount) * asset.dataStride); - std::vector primRemap(triangleCount); - for (std::uint32_t i = 0; i < triangleCount; ++i) { - primRemap[i] = builder.prims[i].triIndex; - } + // CompressedBlob always carries 3 regions for MeshAsset (the data region + // can have decompressedSize=0). DecompressCPU validates output sizes + // against region sizes, so the empty-data path needs the empty span. + std::array, 3> outputs = { + std::as_writable_bytes(std::span(vertices)), + std::as_writable_bytes(std::span(indices)), + std::span(dataBytes), + }; + Compression::DecompressCPU(asset.blob, + std::span(outputs).first(asset.blob.regions.size())); - const BVHNode& root = builder.nodes[0]; - std::uint32_t h = WebGPU::wgpuRegisterMeshBLAS( - root.aabbMin[0], root.aabbMin[1], root.aabbMin[2], - root.aabbMax[0], root.aabbMax[1], root.aabbMax[2], - vertices.data(), static_cast(vertices.size()), - indices.data(), static_cast(indices.size()), - builder.nodes.data(), static_cast(builder.nodes.size()), - primRemap.data(), static_cast(primRemap.size())); - blasAddr = h; + BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes)); } diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp index bf9e115..c4265da 100644 --- a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp +++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp @@ -22,6 +22,7 @@ module Crafter.Graphics:PipelineRTWebGPU_impl; import :PipelineRTWebGPU; import :ShaderBindingTableWebGPU; +import :WebGPUComputeShader; import :RT; import :WebGPU; import std; @@ -65,7 +66,9 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, std::span raygenGroups, std::span missGroups, std::span hitGroups, - const ShaderBindingTableWebGPU& sbt) { + const ShaderBindingTableWebGPU& sbt, + std::span bindings) { + userBindings.assign(bindings.begin(), bindings.end()); std::string wgsl; wgsl.reserve(8 * 1024); @@ -183,5 +186,7 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, pipelineHandle = WebGPU::wgpuLoadRTPipeline( wgsl.data(), - static_cast(wgsl.size())); + static_cast(wgsl.size()), + userBindings.empty() ? nullptr : userBindings.data(), + static_cast(userBindings.size())); } diff --git a/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm b/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm index e36003c..9e50d45 100644 --- a/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm +++ b/interfaces/Crafter.Graphics-DescriptorHeapWebGPU.cppm @@ -181,5 +181,15 @@ export namespace Crafter { } return *this; } + + // Convenience: create the "standard" linear-filter clamp-to-edge sampler, + // allocate a slot for it, and return the slot. The wgpu* bridge call is + // intentionally kept inside the library — example code shouldn't need to + // reach into Crafter::WebGPU directly. + inline SamplerSlot AllocateLinearClampSampler(DescriptorHeapWebGPU& heap) { + DescriptorRange r = heap.AllocateSamplerSlots(1); + heap.samplerTable[r.firstElement] = WebGPU::wgpuCreateLinearClampSampler(); + return SamplerSlot(&heap, r.firstElement); + } } #endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-Image2D.cppm b/interfaces/Crafter.Graphics-Image2D.cppm new file mode 100644 index 0000000..01643e2 --- /dev/null +++ b/interfaces/Crafter.Graphics-Image2D.cppm @@ -0,0 +1,166 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License version 3.0 as published by the Free Software Foundation; + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +// Image2D — portable 2D image type whose API surface is intentionally +// backend-specific via #ifdef. On Vulkan it aliases the existing +// ImageVulkan (full VkFormat / usage / layout control). On WebGPU it's +// a thin handle around an rgba8unorm GPUTexture; sizes are u16 and the +// only update path is from a CompressedTextureAsset. +// +// The "no shared no-op signatures" principle is deliberate: callers do +// the same #ifdef the library does, and write the backend-specific +// invocation. The unified type name Image2D is the only thing +// portable between the two — that's the whole point. + +export module Crafter.Graphics:Image2D; + +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM +import :ImageVulkan; + +export namespace Crafter { + // Vulkan target: Image2D is just the existing ImageVulkan. New name, + // same shape — keeps existing ImageVulkan callers (e.g. examples/ + // Decompression) working without a churn-rename. + template + using Image2D = ImageVulkan; +} +#endif // !CRAFTER_GRAPHICS_WINDOW_DOM + +#ifdef CRAFTER_GRAPHICS_WINDOW_DOM +import std; +import Crafter.Asset; +import :DescriptorHeapWebGPU; +import :WebGPU; + +export namespace Crafter { + template + class Image2D { + public: + WebGPUTextureRef handle = 0; + std::uint16_t width = 0; + std::uint16_t height = 0; + + void Create(std::uint16_t w, std::uint16_t h) { + width = w; + height = h; + handle = WebGPU::wgpuCreateImage2D(w, h); + } + + // CPU-decompress the .ctex blob (no GPU decompression on WebGPU) + // and upload via wgpuWriteImage2D. The intermediate `pixels` vector + // lives only for the duration of this call — the underlying + // queue.writeTexture in JS makes its own copy. + void Update(const CompressedTextureAsset& asset) { + if (asset.pixelStride != sizeof(PixelType)) { + std::println(std::cerr, + "Image2D::Update: pixel stride mismatch (got {}, expected {})", + asset.pixelStride, sizeof(PixelType)); + std::abort(); + } + std::vector pixels( + static_cast(asset.sizeX) * asset.sizeY); + std::array, 1> outputs = { + std::as_writable_bytes(std::span(pixels)), + }; + Compression::DecompressCPU(asset.blob, outputs); + WebGPU::wgpuWriteImage2D( + handle, + pixels.data(), + static_cast(pixels.size() * sizeof(PixelType)), + asset.sizeX, asset.sizeY); + } + + // Register the texture in a descriptor heap slot so a custom RT + // pipeline can bind it via UICustomBinding::SampledTexture. + ImageSlot AllocateSlot(DescriptorHeapWebGPU& heap) { + DescriptorRange r = heap.AllocateImageSlots(1); + heap.imageTable[r.firstElement] = handle; + return ImageSlot(&heap, r.firstElement); + } + + void Destroy() { + if (handle != 0) { + WebGPU::wgpuDestroyTexture(handle); + handle = 0; + } + } + }; + + // 2D texture array — `layers` × (w × h) rgba8unorm. Each layer is + // populated independently from a CompressedTextureAsset whose dims + // must match the array's (w × h). Layer 0 is sampled at array + // index 0 in WGSL; bind through UICustomBindingKind::SampledTextureArray. + template + class Image2DArray { + public: + WebGPUTextureRef handle = 0; + std::uint16_t width = 0; + std::uint16_t height = 0; + std::uint16_t layers = 0; + + void Create(std::uint16_t w, std::uint16_t h, std::uint16_t layerCount) { + width = w; + height = h; + layers = layerCount; + handle = WebGPU::wgpuCreateImage2DArray(w, h, layerCount); + } + + // Decompress `tex` and upload to `layer`. The asset's dims must + // match the array's (w × h) — resize beforehand on the host with + // TextureAsset::Resize() if they don't. + void UpdateLayer(std::uint16_t layer, const CompressedTextureAsset& tex) { + if (tex.pixelStride != sizeof(PixelType)) { + std::println(std::cerr, + "Image2DArray::UpdateLayer: pixel stride mismatch (got {}, expected {})", + tex.pixelStride, sizeof(PixelType)); + std::abort(); + } + if (tex.sizeX != width || tex.sizeY != height) { + std::println(std::cerr, + "Image2DArray::UpdateLayer: layer {} dims {}x{} don't match array dims {}x{}", + layer, tex.sizeX, tex.sizeY, width, height); + std::abort(); + } + std::vector pixels(static_cast(width) * height); + std::array, 1> outputs = { + std::as_writable_bytes(std::span(pixels)), + }; + Compression::DecompressCPU(tex.blob, outputs); + WebGPU::wgpuWriteImage2DLayer( + handle, layer, + pixels.data(), + static_cast(pixels.size() * sizeof(PixelType)), + width, height); + } + + ImageSlot AllocateSlot(DescriptorHeapWebGPU& heap) { + DescriptorRange r = heap.AllocateImageSlots(1); + heap.imageTable[r.firstElement] = handle; + return ImageSlot(&heap, r.firstElement); + } + + void Destroy() { + if (handle != 0) { + WebGPU::wgpuDestroyTexture(handle); + handle = 0; + } + } + }; +} +#endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-Mesh.cppm b/interfaces/Crafter.Graphics-Mesh.cppm index d8385ad..087d94f 100644 --- a/interfaces/Crafter.Graphics-Mesh.cppm +++ b/interfaces/Crafter.Graphics-Mesh.cppm @@ -64,6 +64,7 @@ export namespace Crafter { #ifdef CRAFTER_GRAPHICS_WINDOW_DOM import std; import Crafter.Math; +import Crafter.Asset; import :WebGPU; export namespace Crafter { @@ -108,6 +109,15 @@ export namespace Crafter { void Build(std::span> vertices, std::span indices, WebGPUCommandEncoderRef cmd = 0); + + // CPU-decompress the .cmesh blob (no VK_EXT_memory_decompression + // equivalent in WebGPU) and forward to the positions+indices path, + // plus push the optional `data` region into the per-vertex attribs + // heap so closest-hit shaders can sample UVs / normals / tangents. + // The data layout is example-defined — the heap is exposed in WGSL + // as `vertexAttribs : array` with a per-mesh u32-word offset. + void Build(const ::Crafter::CompressedMeshAsset& asset, + WebGPUCommandEncoderRef cmd = 0); }; } #endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm index b3df012..6825fbd 100644 --- a/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm +++ b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm @@ -26,22 +26,31 @@ import std; import :RT; import :WebGPU; import :ShaderBindingTableWebGPU; +import :WebGPUComputeShader; export namespace Crafter { class PipelineRTWebGPU { public: std::uint32_t pipelineHandle = 0; + // Mirror of the bindings handed to Init. Kept for the example / + // RTPass to consult when packing the handles[] array at dispatch + // time (one resolved u32 handle per binding, in declaration order). + std::vector userBindings; // Build the megakernel pipeline. Groups carry indices into // `sbt.shaders`. The library generates one `case` per registered // group: closest-hit groups dispatch to their closestHitShader's // entryFn, miss groups to their generalShader's entryFn, etc. // The `cmd` parameter is unused on WebGPU; kept for API symmetry. + // `userBindings` declares extra @group(2)+ resources the user's + // closest-hit / miss / raygen WGSL touches (material SSBOs, + // albedo textures, samplers). void Init(WebGPUCommandEncoderRef cmd, std::span raygenGroups, std::span missGroups, std::span hitGroups, - const ShaderBindingTableWebGPU& sbt); + const ShaderBindingTableWebGPU& sbt, + std::span bindings = {}); PipelineRTWebGPU() = default; PipelineRTWebGPU(const PipelineRTWebGPU&) = delete; diff --git a/interfaces/Crafter.Graphics-RTPass.cppm b/interfaces/Crafter.Graphics-RTPass.cppm index 78064c0..1397a68 100644 --- a/interfaces/Crafter.Graphics-RTPass.cppm +++ b/interfaces/Crafter.Graphics-RTPass.cppm @@ -66,6 +66,12 @@ export namespace Crafter { // RTDispatchHeader. Null means "no extra data". const void* pushPtr = nullptr; std::uint32_t pushBytes = 0; + // Resolved WebGPU resource handles for each user binding the + // pipeline was loaded with, in declaration order. The example + // owns the storage (typically a small std::array of u32). Null / + // 0 means "no user bindings". + const void* handlesPtr = nullptr; + std::uint32_t handlesCount = 0; RTPass(PipelineRTWebGPU* p) : pipeline(p) {} @@ -80,7 +86,9 @@ export namespace Crafter { tlas.buffer.handle, static_cast(tlas.builtInstanceCount), static_cast(gx), - static_cast(gy)); + static_cast(gy), + handlesPtr, + static_cast(handlesCount)); } }; } diff --git a/interfaces/Crafter.Graphics-WebGPU.cppm b/interfaces/Crafter.Graphics-WebGPU.cppm index 616c8fe..799c8c0 100644 --- a/interfaces/Crafter.Graphics-WebGPU.cppm +++ b/interfaces/Crafter.Graphics-WebGPU.cppm @@ -49,6 +49,27 @@ namespace Crafter::WebGPU { __attribute__((import_module("env"), import_name("wgpuDestroyTexture"))) extern "C" void wgpuDestroyTexture(std::uint32_t handle); + // General-purpose rgba8unorm 2D texture for material albedo etc. + // Separate from the atlas path because atlas uses r8unorm + sub-region + // writes; this one takes the whole image in one shot. + __attribute__((import_module("env"), import_name("wgpuCreateImage2D"))) + extern "C" std::uint32_t wgpuCreateImage2D(std::int32_t w, std::int32_t h); + __attribute__((import_module("env"), import_name("wgpuWriteImage2D"))) + extern "C" void wgpuWriteImage2D(std::uint32_t handle, const void* srcPtr, + std::int32_t byteSize, + std::int32_t w, std::int32_t h); + + // 2D texture array — `layerCount` rgba8unorm layers of identical (w × h). + // Sampled via `texture_2d_array` in WGSL (UICustomBindingKind 3). + // Used by Image2DArray to stack per-material albedos for one + // multi-material scene. + __attribute__((import_module("env"), import_name("wgpuCreateImage2DArray"))) + extern "C" std::uint32_t wgpuCreateImage2DArray(std::int32_t w, std::int32_t h, std::int32_t layerCount); + __attribute__((import_module("env"), import_name("wgpuWriteImage2DLayer"))) + extern "C" void wgpuWriteImage2DLayer(std::uint32_t handle, std::int32_t layer, + const void* srcPtr, std::int32_t byteSize, + std::int32_t w, std::int32_t h); + __attribute__((import_module("env"), import_name("wgpuCreateLinearClampSampler"))) extern "C" std::uint32_t wgpuCreateLinearClampSampler(); @@ -96,6 +117,11 @@ namespace Crafter::WebGPU { // stores in RTInstance::accelerationStructureReference; the WebGPU // TLAS-build compute shader resolves it back to root AABB + heap // offsets at dispatch time. Returns 0 on failure. + // The optional `attribsPtr` / `attribsByteCount` carry per-vertex + // attribute payload (normals, UVs, etc. — layout is example-defined) + // that gets appended to a global attribs heap and exposed to RT + // closest-hit shaders as `vertexAttribs : array` at + // @group(1) @binding(7). Pass (nullptr, 0) for positions-only meshes. __attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS"))) extern "C" std::uint32_t wgpuRegisterMeshBLAS( float minX, float minY, float minZ, @@ -103,25 +129,34 @@ namespace Crafter::WebGPU { const void* verticesPtr, std::int32_t vertexCount, const void* indicesPtr, std::int32_t indexCount, const void* bvhNodesPtr, std::int32_t bvhNodeCount, - const void* primRemapPtr, std::int32_t primRemapCount); + const void* primRemapPtr, std::int32_t primRemapCount, + const void* attribsPtr, std::int32_t attribsByteCount); // RT pipeline build. The library composes WGSL by concatenating the // traversal library, generated hit-group switches, and the user- - // supplied raygen / miss / closesthit / anyhit bodies. Returns an - // opaque pipeline handle. + // supplied raygen / miss / closesthit / anyhit bodies. `bindings` is + // UICustomBinding-shaped (8 bytes each) declaring extra @group(2)+ + // resources the user's closest-hit / miss / raygen WGSL references. + // Pass (nullptr, 0) for a pipeline with no user-declared bindings. + // Returns an opaque pipeline handle. __attribute__((import_module("env"), import_name("wgpuLoadRTPipeline"))) - extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen); + extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen, + const void* bindingsPtr, std::int32_t bindingsCount); // Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched // over a (gx, gy) tile grid; the library writes the push data (camera, // payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS // + global mesh heap, and runs one workgroup per 8x8 screen tile. + // `handles[]` carries resolved WebGPU resource handles for every user + // binding declared at pipeline-load time, in the same order. Pass + // (nullptr, 0) for a pipeline with no user bindings. __attribute__((import_module("env"), import_name("wgpuDispatchRT"))) extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle, const void* pushPtr, std::int32_t pushBytes, std::uint32_t tlasBufHandle, std::int32_t instanceCount, - std::int32_t gx, std::int32_t gy); + std::int32_t gx, std::int32_t gy, + const void* handlesPtr, std::int32_t handlesCount); // GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or // GPU-written), produces per-instance world-space AABBs + per-instance diff --git a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm index 867bb26..9726d76 100644 --- a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm +++ b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm @@ -32,9 +32,10 @@ import :WebGPU; export namespace Crafter { enum class UICustomBindingKind : std::uint8_t { - Buffer = 0, // read-only-storage SSBO, handle is a slot into heap.bufferTable - SampledTexture = 1, // sampled texture_2d, handle is a slot into heap.imageTable - Sampler = 2, // filtering sampler, handle is a slot into heap.samplerTable + Buffer = 0, // read-only-storage SSBO, handle is a slot into heap.bufferTable + SampledTexture = 1, // sampled texture_2d, handle is a slot into heap.imageTable + Sampler = 2, // filtering sampler, handle is a slot into heap.samplerTable + SampledTextureArray = 3, // sampled texture_2d_array, handle is a slot into heap.imageTable }; struct UICustomBinding { diff --git a/interfaces/Crafter.Graphics.cppm b/interfaces/Crafter.Graphics.cppm index b48f029..5c94db9 100644 --- a/interfaces/Crafter.Graphics.cppm +++ b/interfaces/Crafter.Graphics.cppm @@ -47,6 +47,7 @@ export import :ShaderBindingTableVulkan; export import :PipelineRTVulkan; export import :RenderingElement3D; export import :ImageVulkan; +export import :Image2D; export import :SamplerVulkan; export import :DescriptorHeapVulkan; export import :RenderPass; diff --git a/project.cpp b/project.cpp index c1a162c..e1fd83e 100644 --- a/project.cpp +++ b/project.cpp @@ -31,23 +31,9 @@ extern "C" Configuration CrafterBuildProject(std::span a }); }; - // Sniff the requested target from args before any deps resolve — the - // Crafter.Asset dependency is heavy and not wasm-ready (uses `throw` - // under -fno-exceptions, references `_Float16`). The DOM build stubs - // the renderer entirely so the dep doesn't apply anyway. - bool isWasm = false; - for (std::string_view a : args) { - if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) { - isWasm = true; - break; - } - } - Configuration* event = resolveDep("Crafter.Event", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Event.git"); Configuration* math = resolveDep("Crafter.Math", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Math.git"); - Configuration* asset = isWasm - ? nullptr - : resolveDep("Crafter.Asset", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Asset.git"); + Configuration* asset = resolveDep("Crafter.Asset", "https://forgejo.catcrafts.net/Catcrafts/Crafter.Asset.git"); Configuration cfg; cfg.path = "./"; @@ -55,11 +41,7 @@ extern "C" Configuration CrafterBuildProject(std::span a cfg.outputName = "Crafter.Graphics"; cfg.type = ConfigurationType::LibraryStatic; auto opts = ApplyStandardArgs(cfg, args); - if (asset) { - cfg.dependencies = { event, math, asset }; - } else { - cfg.dependencies = { event, math }; - } + cfg.dependencies = { event, math, asset }; // Window backend follows the target triple. V1 had separate lib-wayland / // lib-win32 configurations; V2 picks the right one automatically based on @@ -78,6 +60,16 @@ extern "C" Configuration CrafterBuildProject(std::span a // strips -march/-mtune from the clang command line for any wasm32-* // triple, so cfg.march/mtune can stay at their defaults — keeping them // matches the VariantId of dependency PCMs. + // + // WasmAlloc / WasmFree live in Crafter.Graphics-Dom.cpp and back + // dom-env.js's __writeUtf8 path (every keyboard / text-input event + // routes through them). The TU defines no symbols main.cpp would + // reference, so wasm-ld dead-strips it from libCrafter.Graphics.a + // for examples that don't touch the `Dom::HtmlElement*` API (like + // Sponza). `--export=` both forces the export AND pulls the + // defining .o in — solving both halves of the dead-strip problem. + cfg.linkFlags.push_back("-Wl,--export=WasmAlloc"); + cfg.linkFlags.push_back("-Wl,--export=WasmFree"); } else if (windows) { cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_WIN32", ""}); cfg.linkFlags.push_back("-lkernel32"); @@ -131,7 +123,7 @@ extern "C" Configuration CrafterBuildProject(std::span a // when its body is gated out. Vulkan-typed partitions stub to empty // modules under CRAFTER_GRAPHICS_WINDOW_DOM; the Dom/DomEvents/Router // partitions stub to empty modules in the opposite direction. - std::array ifaces = { + std::array ifaces = { "interfaces/Crafter.Graphics", "interfaces/Crafter.Graphics-Animation", "interfaces/Crafter.Graphics-Clipboard", @@ -147,6 +139,7 @@ extern "C" Configuration CrafterBuildProject(std::span a "interfaces/Crafter.Graphics-ForwardDeclarations", "interfaces/Crafter.Graphics-Gamepad", "interfaces/Crafter.Graphics-GraphicsTypes", + "interfaces/Crafter.Graphics-Image2D", "interfaces/Crafter.Graphics-ImageVulkan", "interfaces/Crafter.Graphics-Input", "interfaces/Crafter.Graphics-InputField",