From 5553ded476bf747f407cbfc61723a2fc7e59f4f4 Mon Sep 17 00:00:00 2001 From: Jorijn van der Graaf Date: Mon, 18 May 2026 18:43:30 +0200 Subject: [PATCH] webgpu triangle --- additional/dom-webgpu.js | 976 +++++++++++++++++- examples/VulkanTriangle/closesthit.wgsl | 12 + examples/VulkanTriangle/main.cpp | 68 +- examples/VulkanTriangle/miss.wgsl | 5 + examples/VulkanTriangle/project.cpp | 27 +- examples/VulkanTriangle/raygen.glsl | 22 +- examples/VulkanTriangle/raygen.wgsl | 39 + .../Crafter.Graphics-Mesh-WebGPU.cpp | 240 +++++ .../Crafter.Graphics-PipelineRTWebGPU.cpp | 187 ++++ ...ter.Graphics-RenderingElement3D-WebGPU.cpp | 91 ++ ...fter.Graphics-ShaderBindingTableWebGPU.cpp | 32 + .../Crafter.Graphics-WebGPUComputeShader.cpp | 12 +- interfaces/Crafter.Graphics-Mesh.cppm | 51 + .../Crafter.Graphics-PipelineRTWebGPU.cppm | 51 + interfaces/Crafter.Graphics-RT.cppm | 83 ++ interfaces/Crafter.Graphics-RTPass.cppm | 39 + .../Crafter.Graphics-RenderingElement3D.cppm | 63 +- ...ter.Graphics-ShaderBindingTableWebGPU.cppm | 64 ++ interfaces/Crafter.Graphics-WebGPU.cppm | 51 +- .../Crafter.Graphics-WebGPUComputeShader.cppm | 19 +- interfaces/Crafter.Graphics.cppm | 6 + project.cpp | 11 +- 22 files changed, 2107 insertions(+), 42 deletions(-) create mode 100644 examples/VulkanTriangle/closesthit.wgsl create mode 100644 examples/VulkanTriangle/miss.wgsl create mode 100644 examples/VulkanTriangle/raygen.wgsl create mode 100644 implementations/Crafter.Graphics-Mesh-WebGPU.cpp create mode 100644 implementations/Crafter.Graphics-PipelineRTWebGPU.cpp create mode 100644 implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp create mode 100644 implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp create mode 100644 interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm create mode 100644 interfaces/Crafter.Graphics-RT.cppm create mode 100644 interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js index 8706e90..834f320 100644 --- a/additional/dom-webgpu.js +++ b/additional/dom-webgpu.js @@ -45,9 +45,13 @@ function stub(name) { "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture", "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd", "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText", + "wgpuLoadCustomShader", "wgpuDispatchCustom", + "wgpuRegisterMeshBLAS", "wgpuLoadRTPipeline", "wgpuDispatchRT", "wgpuBuildTLAS", ]) { // Read-write ints don't need a stub-throw; return 0 for the size queries. - e[n] = n.endsWith("Width") || n.endsWith("Height") ? () => 0 : stub(n); + e[n] = n.endsWith("Width") || n.endsWith("Height") + ? () => 0 + : (n === "wgpuRegisterMeshBLAS" ? () => 0 : stub(n)); } } @@ -702,8 +706,16 @@ env.wgpuDispatchText = (itemsHandle, headerPtr, gx, gy, atlasHandle, sampHandle) const customPipelines = new Map(); // handle → { pipeline, bgls, hdrBG, byGroup } -env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => { - const wgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen)); +env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQueryFlag) => { + if (!rtState.vertHeap && rayQueryFlag) rtInit(); + const userWgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen)); + // For rayQuery-capable shaders, prepend the RT prelude + ray-query + // library. The user shader can declare its own group 0 / 2+ bindings + // but MUST NOT redeclare group(1) — that's reserved for RT data. + const wgsl = rayQueryFlag + ? (rtWgslTypes + rtWgslMegakernelBindings + rtWgslRayQueryLib + "\n" + userWgsl) + : userWgsl; + const bindings = []; const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8); for (let i = 0; i < bindingsCount; i++) { @@ -726,18 +738,31 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => { byGroup.get(b.group).push(b); } - // Group 0 = header uniform, Group 1 = ping-pong out+prev — always injected. + // Group 0 = header uniform (same for both paths). + // Group 1 = ping-pong out+prev OR RT data (TLAS, BVH, meshRecs, verts, + // idx, primRemap, outImage) when rayQuery flag is on. const bgls = [ device.createBindGroupLayout({ entries: [ { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } }, ]}), - device.createBindGroupLayout({ entries: [ - { binding: 0, visibility: GPUShaderStage.COMPUTE, - storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, - { binding: 1, visibility: GPUShaderStage.COMPUTE, - texture: { sampleType: "float", viewDimension: "2d" } }, - ]}), + rayQueryFlag + ? device.createBindGroupLayout({ entries: [ + { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 6, visibility: GPUShaderStage.COMPUTE, + storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, + ]}) + : device.createBindGroupLayout({ entries: [ + { binding: 0, visibility: GPUShaderStage.COMPUTE, + storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, + { binding: 1, visibility: GPUShaderStage.COMPUTE, + texture: { sampleType: "float", viewDimension: "2d" } }, + ]}), ]; // Sorted custom groups. Pad any gaps with empty bgls (WebGPU pipeline // layouts require a contiguous array of GPUBindGroupLayout per group @@ -766,6 +791,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => { pipeline = device.createComputePipeline({ layout, compute: { module: mod, entryPoint: "main" } }); } catch (e) { console.error("[crafter-wgpu] custom shader compile failed:", e); + if (rayQueryFlag) console.error("[crafter-wgpu] WGSL was:\n", wgsl); return 0; } @@ -775,7 +801,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => { }); const handle = newHandle(); - customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups }); + customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups, rayQueryCapable: !!rayQueryFlag }); return handle; }; @@ -794,7 +820,31 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle state.pass.setPipeline(pipe.pipeline); state.pass.setBindGroup(0, pipe.hdrBG, [off]); - state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1])); + // Group 1: rayQuery-capable shaders get the RT data heaps + the most + // recently built TLAS; everyone else gets the standard ping-pong pair. + if (pipe.rayQueryCapable) { + const tlasBuf = buffers.get(rtState.currentTlas); + if (!tlasBuf) { + console.error("[crafter-wgpu] rayQuery dispatch but no TLAS built yet"); + return; + } + const outView = state.outIsPing ? state.pingView : state.pongView; + const rtBG = device.createBindGroup({ + layout: pipe.bgls[1], + entries: [ + { binding: 0, resource: { buffer: tlasBuf } }, + { binding: 1, resource: { buffer: rtState.bvhHeap.gpu } }, + { binding: 2, resource: { buffer: rtState.meshRecordsBuffer } }, + { binding: 3, resource: { buffer: rtState.vertHeap.gpu } }, + { binding: 4, resource: { buffer: rtState.indexHeap.gpu } }, + { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } }, + { binding: 6, resource: outView }, + ], + }); + state.pass.setBindGroup(1, rtBG); + } else { + state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1])); + } // Walk bindings in declaration order and assemble bind groups. // handles[] from wasm is in the SAME order as customBindings, so we @@ -873,6 +923,908 @@ env.wgpuInit = () => { // listens to. We trigger sizing on next frame begin; no work here. window.addEventListener("resize", () => { /* ensureSized in wgpuFrameBegin */ }); +// ───────────────────────────────────────────────────────────────────── +// ── Software raytracing subsystem ──────────────────────────────────── +// ───────────────────────────────────────────────────────────────────── +// +// WebGPU has no hardware RT. The library emulates DXR/VKRT semantics in +// compute: a megakernel raygen pipeline traverses a CPU-built BLAS BVH + +// GPU-built TLAS, dispatches user closesthit / anyhit / miss via a +// generated `switch`. The same traversal kernel is also exposed as a +// rayQuery* function set for regular compute shaders (see the +// rayQueryCapable path under wgpuLoadCustomShader). +// +// The four mesh data streams live in *shared* GPU heaps; each Mesh::Build +// appends to them and gets a u32 handle back. The handle is what the +// application stores in RTInstance::accelerationStructureReference. + +// ── WGSL library: shared types + constants (no bindings) ───────────── +// Used by both the megakernel pipeline (which adds group(0..1) bindings) +// and the TLAS-build pipeline (which only uses group(2)). Keeping bindings +// out of the shared block avoids inflating storage-buffer count past the +// 8-per-stage baseline limit on pipelines that don't actually use them. +const rtWgslTypes = String.raw` +struct RTDispatchHeader { + surfaceW: u32, + surfaceH: u32, + instanceCount: u32, + flags: u32, +}; + +struct RayDesc { + origin: vec3, + tMin: f32, + direction: vec3, + tMax: f32, +}; + +struct HitInfo { + t: f32, + instanceId: u32, + primitiveId: u32, + hitGroupIndex: u32, + attribs: vec2, + objectRayOrigin: vec3, + objectRayDirection: vec3, + objectToWorldR0: vec4, + objectToWorldR1: vec4, + objectToWorldR2: vec4, + customIndex: u32, +}; + +// Matches Crafter::BVHNode in interfaces/Crafter.Graphics-Mesh.cppm. +struct BVHNode { + aabbMin: vec3, + firstChildOrPrim: u32, + aabbMax: vec3, + primCount: u32, +}; + +// Per-mesh record. Indexed by RTInstance::accelerationStructureReference. +struct MeshRecord { + rootAabbMin: vec3, + vertexOffset: u32, + rootAabbMax: vec3, + indexOffset: u32, + bvhOffset: u32, + primRemapOffset: u32, + triangleCount: u32, + _pad: u32, +}; + +// Per-instance TLAS record built by the TLAS-build compute pass. +struct TLASEntry { + aabbMin: vec3, + maskHGOffset: u32, + aabbMax: vec3, + blasMeshIdx: u32, + objectToWorldR0: vec4, + objectToWorldR1: vec4, + objectToWorldR2: vec4, + worldToObjectR0: vec4, + worldToObjectR1: vec4, + worldToObjectR2: vec4, + customIndex: u32, + instanceFlags: u32, + _pad0: u32, + _pad1: u32, +}; + +// ── Ray flag mirror of VkGeometryInstanceFlagBitsKHR + DXR ray flags ── +const RT_FLAG_OPAQUE: u32 = 0x1u; +const RT_FLAG_NO_OPAQUE: u32 = 0x2u; +const RT_FLAG_TERMINATE_ON_FIRST_HIT: u32 = 0x4u; +const RT_FLAG_SKIP_CLOSEST_HIT: u32 = 0x8u; +const RT_FLAG_CULL_BACK_FACING_TRIANGLES: u32 = 0x10u; +const RT_FLAG_CULL_FRONT_FACING_TRIANGLES: u32 = 0x20u; +const RT_FLAG_CULL_OPAQUE: u32 = 0x40u; +const RT_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; +const RT_FLAG_SKIP_TRIANGLES: u32 = 0x100u; +const RT_FLAG_SKIP_AABBS: u32 = 0x200u; + +const RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE: u32 = 0x1u; +const RT_INSTANCE_TRIANGLE_FLIP_FACING: u32 = 0x2u; +const RT_INSTANCE_FORCE_OPAQUE: u32 = 0x4u; +const RT_INSTANCE_FORCE_NO_OPAQUE: u32 = 0x8u; + +const RT_ANYHIT_ACCEPT: u32 = 0u; +const RT_ANYHIT_IGNORE: u32 = 1u; +const RT_ANYHIT_END_SEARCH: u32 = 2u; + +const RT_INTERSECTION_NONE: u32 = 0u; +const RT_INTERSECTION_TRIANGLE: u32 = 1u; +`; + +// Megakernel-only bindings. Concatenated after rtWgslTypes for the +// raygen pipeline; the TLAS-build pipeline omits these because it doesn't +// touch them — declaring them would push it past 8 storage buffers per +// stage on the WebGPU baseline. +const rtWgslMegakernelBindings = String.raw` +@group(0) @binding(0) var hdr : RTDispatchHeader; +@group(1) @binding(0) var tlasEntries : array; +@group(1) @binding(1) var bvhNodes : array; +@group(1) @binding(2) var meshRecords : array; +@group(1) @binding(3) var vertices : array; +@group(1) @binding(4) var indices : array; +@group(1) @binding(5) var primRemap : array; +@group(1) @binding(6) var outImage : texture_storage_2d; +`; + +const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings; + +// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ─── +// Injected after the user-supplied closesthit/anyhit/miss sources + +// mega-switch dispatchers (which PipelineRTWebGPU emits). User raygen +// sources sit after this block so they can call traceRay. +const rtWgslHelpers = String.raw` +fn _rtFetchTri(meshRec: MeshRecord, triIndex: u32) -> array, 3> { + let baseIdx = meshRec.indexOffset + triIndex * 3u; + let i0 = indices[baseIdx + 0u]; + let i1 = indices[baseIdx + 1u]; + let i2 = indices[baseIdx + 2u]; + let baseV = meshRec.vertexOffset; + let v0i = (baseV + i0) * 3u; + let v1i = (baseV + i1) * 3u; + let v2i = (baseV + i2) * 3u; + return array, 3>( + vec3(vertices[v0i + 0u], vertices[v0i + 1u], vertices[v0i + 2u]), + vec3(vertices[v1i + 0u], vertices[v1i + 1u], vertices[v1i + 2u]), + vec3(vertices[v2i + 0u], vertices[v2i + 1u], vertices[v2i + 2u]), + ); +} + +fn _rtAabb(ro: vec3, invRd: vec3, mn: vec3, mx: vec3, tMax: f32) -> bool { + let t0 = (mn - ro) * invRd; + let t1 = (mx - ro) * invRd; + let tmin = min(t0, t1); + let tmax = max(t0, t1); + let tEnter = max(max(tmin.x, tmin.y), tmin.z); + let tExit = min(min(tmax.x, tmax.y), tmax.z); + return tExit >= max(tEnter, 0.0) && tEnter <= tMax; +} + +struct _RtTriHit { hit: bool, t: f32, u: f32, v: f32 }; +fn _rtTri(ro: vec3, rd: vec3, p0: vec3, p1: vec3, p2: vec3, + tMin: f32, tMax: f32) -> _RtTriHit { + var r: _RtTriHit; + r.hit = false; + let e1 = p1 - p0; + let e2 = p2 - p0; + let h = cross(rd, e2); + let a = dot(e1, h); + if (abs(a) < 1e-8) { return r; } + let f = 1.0 / a; + let s = ro - p0; + let u = f * dot(s, h); + if (u < 0.0 || u > 1.0) { return r; } + let q = cross(s, e1); + let v = f * dot(rd, q); + if (v < 0.0 || u + v > 1.0) { return r; } + let t = f * dot(e2, q); + if (t < tMin || t > tMax) { return r; } + r.hit = true; r.t = t; r.u = u; r.v = v; + return r; +} + +// Iterative stack-based BLAS traversal. Returns true if traversal was +// terminated by an END_SEARCH from anyhit (caller should stop entirely). +fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord, + instanceId: u32, hitGroupBase: u32, + bestHit: ptr, + bestT: ptr, + payload: ptr) -> bool { + let invD = vec3(1.0) / rayObj.direction; + var stack: array; + var sp: u32 = 0u; + var nodeRel: u32 = 0u; + + loop { + let abs = meshRec.bvhOffset + nodeRel; + let node = bvhNodes[abs]; + if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) { + if (sp == 0u) { break; } + sp = sp - 1u; nodeRel = stack[sp]; continue; + } + if (node.primCount > 0u) { + for (var i: u32 = 0u; i < node.primCount; i = i + 1u) { + let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i]; + let verts = _rtFetchTri(meshRec, triIndex); + let tr = _rtTri(rayObj.origin, rayObj.direction, + verts[0], verts[1], verts[2], + rayObj.tMin, *bestT); + if (!tr.hit) { continue; } + + let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]); + let facing = dot(geomNormal, rayObj.direction); + if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; } + if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; } + + var candidate: HitInfo; + candidate.t = tr.t; + candidate.instanceId = instanceId; + candidate.primitiveId = triIndex; + candidate.hitGroupIndex = hitGroupBase; + candidate.attribs = vec2(tr.u, tr.v); + candidate.objectRayOrigin = rayObj.origin; + candidate.objectRayDirection = rayObj.direction; + + let opaque = (flags & RT_FLAG_OPAQUE) != 0u + || (flags & RT_FLAG_NO_OPAQUE) == 0u; // default opaque + + if (opaque) { + *bestHit = candidate; + *bestT = tr.t; + if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; } + } else { + let r = runAnyHit(hitGroupBase, rayObj, candidate, payload); + if (r == RT_ANYHIT_END_SEARCH) { + *bestHit = candidate; + *bestT = tr.t; + return true; + } + if (r == RT_ANYHIT_ACCEPT) { + *bestHit = candidate; + *bestT = tr.t; + if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; } + } + } + } + if (sp == 0u) { break; } + sp = sp - 1u; nodeRel = stack[sp]; continue; + } + // inner node — push right, descend left + let left = node.firstChildOrPrim; + let right = left + 1u; + if (sp < 32u) { stack[sp] = right; sp = sp + 1u; } + nodeRel = left; + } + return false; +} + +fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32, + sbtRecordOffset: u32, sbtRecordStride: u32, + bestHit: ptr, + bestT: ptr, + payload: ptr) -> bool { + let invD = vec3(1.0) / rayWorld.direction; + let n = hdr.instanceCount; + for (var i: u32 = 0u; i < n; i = i + 1u) { + let inst = tlasEntries[i]; + let instanceMask = inst.maskHGOffset & 0xFFu; + if ((instanceMask & cullMask) == 0u) { continue; } + if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; } + + // Transform ray to object space. + let r0 = inst.worldToObjectR0; + let r1 = inst.worldToObjectR1; + let r2 = inst.worldToObjectR2; + var rayObj: RayDesc; + rayObj.origin = vec3( + dot(r0.xyz, rayWorld.origin) + r0.w, + dot(r1.xyz, rayWorld.origin) + r1.w, + dot(r2.xyz, rayWorld.origin) + r2.w, + ); + rayObj.direction = vec3( + dot(r0.xyz, rayWorld.direction), + dot(r1.xyz, rayWorld.direction), + dot(r2.xyz, rayWorld.direction), + ); + rayObj.tMin = rayWorld.tMin; + rayObj.tMax = *bestT; + + var effective = flags; + let iflags = inst.instanceFlags; + if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) { + effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE; + } + if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) { + effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE; + } + if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) { + effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES); + } + + let hitGroupOffset = inst.maskHGOffset >> 8u; + let hitGroupBase = sbtRecordOffset + hitGroupOffset; + let meshRec = meshRecords[inst.blasMeshIdx]; + + let pre = *bestT; + let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase, + bestHit, bestT, payload); + if (endSearch) { return true; } + if ((*bestT) < pre) { + // record world-space object-to-world for the closest-hit shader + (*bestHit).objectToWorldR0 = inst.objectToWorldR0; + (*bestHit).objectToWorldR1 = inst.objectToWorldR1; + (*bestHit).objectToWorldR2 = inst.objectToWorldR2; + (*bestHit).customIndex = inst.customIndex; + if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; } + } + } + return false; +} + +fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32, + sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32, + rayOrigin: vec3, rayTMin: f32, + rayDir: vec3, rayTMax: f32, + payload: ptr) { + var ray: RayDesc; + ray.origin = rayOrigin; + ray.direction = rayDir; + ray.tMin = rayTMin; + ray.tMax = rayTMax; + var bestHit: HitInfo; + bestHit.t = rayTMax; + var bestT = rayTMax; + let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu, + sbtRecordOffset, sbtRecordStride, + &bestHit, &bestT, payload); + if (bestT < rayTMax) { + if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) { + runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload); + } + } else { + runMiss(missIndex, ray, payload); + } +} +`; + +// ── WGSL library: rayQuery API for non-megakernel compute shaders ──── +// +// Mirrors GL_EXT_ray_query semantics that 3DForts's physics shaders use +// (projectile-collide, splash, builder-pick). User WGSL: +// var rq: RayQuery; +// rayQueryInitialize(&rq, 0u, flags, mask, origin, tMin, dir, tMax); +// while (rayQueryProceed(&rq)) {} // run traversal to completion +// if (rayQueryGetCommittedIntersectionType(&rq) != RT_INTERSECTION_NONE) { +// let t = rayQueryGetCommittedT(&rq); +// ... +// } +// +// v1 simplification: traversal force-opaques every hit (no anyhit). The +// user can still test for triangle vs miss and read t/instance/bary. +// Anyhit-style candidate inspection is a future extension. +const rtWgslRayQueryLib = String.raw` +struct RayQuery { + ray: RayDesc, + flags: u32, + cullMask: u32, + committedType: u32, + committedT: f32, + committedInstanceId: u32, + committedInstanceCustomIndex: u32, + committedPrimitiveIndex: u32, + committedBarycentrics: vec2, + committedObjectRayOrigin: vec3, + committedObjectRayDirection: vec3, + committedObjectToWorldR0: vec4, + committedObjectToWorldR1: vec4, + committedObjectToWorldR2: vec4, + committedWorldToObjectR0: vec4, + committedWorldToObjectR1: vec4, + committedWorldToObjectR2: vec4, + done: u32, +}; + +fn _rqTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord, + instanceId: u32, customIndex: u32, + inst: TLASEntry, + rq: ptr) { + let invD = vec3(1.0) / rayObj.direction; + var stack: array; + var sp: u32 = 0u; + var nodeRel: u32 = 0u; + + loop { + let abs = meshRec.bvhOffset + nodeRel; + let node = bvhNodes[abs]; + if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, (*rq).committedT)) { + if (sp == 0u) { break; } + sp = sp - 1u; nodeRel = stack[sp]; continue; + } + if (node.primCount > 0u) { + for (var i: u32 = 0u; i < node.primCount; i = i + 1u) { + let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i]; + let verts = _rtFetchTri(meshRec, triIndex); + let tr = _rtTri(rayObj.origin, rayObj.direction, + verts[0], verts[1], verts[2], + rayObj.tMin, (*rq).committedT); + if (!tr.hit) { continue; } + + let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]); + let facing = dot(geomNormal, rayObj.direction); + if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; } + if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; } + + (*rq).committedType = RT_INTERSECTION_TRIANGLE; + (*rq).committedT = tr.t; + (*rq).committedInstanceId = instanceId; + (*rq).committedInstanceCustomIndex = customIndex; + (*rq).committedPrimitiveIndex = triIndex; + (*rq).committedBarycentrics = vec2(tr.u, tr.v); + (*rq).committedObjectRayOrigin = rayObj.origin; + (*rq).committedObjectRayDirection = rayObj.direction; + (*rq).committedObjectToWorldR0 = inst.objectToWorldR0; + (*rq).committedObjectToWorldR1 = inst.objectToWorldR1; + (*rq).committedObjectToWorldR2 = inst.objectToWorldR2; + (*rq).committedWorldToObjectR0 = inst.worldToObjectR0; + (*rq).committedWorldToObjectR1 = inst.worldToObjectR1; + (*rq).committedWorldToObjectR2 = inst.worldToObjectR2; + } + if (sp == 0u) { break; } + sp = sp - 1u; nodeRel = stack[sp]; continue; + } + let left = node.firstChildOrPrim; + let right = left + 1u; + if (sp < 32u) { stack[sp] = right; sp = sp + 1u; } + nodeRel = left; + } +} + +fn _rqTraverseTlas(rq: ptr) { + let rayWorld = (*rq).ray; + let invD = vec3(1.0) / rayWorld.direction; + let n = hdr.instanceCount; + let cullMask = (*rq).cullMask; + let rayFlags = (*rq).flags; + for (var i: u32 = 0u; i < n; i = i + 1u) { + let inst = tlasEntries[i]; + let instanceMask = inst.maskHGOffset & 0xFFu; + if ((instanceMask & cullMask) == 0u) { continue; } + if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, (*rq).committedT)) { continue; } + + let r0 = inst.worldToObjectR0; + let r1 = inst.worldToObjectR1; + let r2 = inst.worldToObjectR2; + var rayObj: RayDesc; + rayObj.origin = vec3( + dot(r0.xyz, rayWorld.origin) + r0.w, + dot(r1.xyz, rayWorld.origin) + r1.w, + dot(r2.xyz, rayWorld.origin) + r2.w, + ); + rayObj.direction = vec3( + dot(r0.xyz, rayWorld.direction), + dot(r1.xyz, rayWorld.direction), + dot(r2.xyz, rayWorld.direction), + ); + rayObj.tMin = rayWorld.tMin; + rayObj.tMax = (*rq).committedT; + + var effective = rayFlags; + let iflags = inst.instanceFlags; + if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) { + effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES); + } + + let meshRec = meshRecords[inst.blasMeshIdx]; + _rqTraverseBlas(rayObj, effective, meshRec, i, inst.customIndex, inst, rq); + + if ((rayFlags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u + && (*rq).committedType != RT_INTERSECTION_NONE) { + return; + } + } +} + +fn rayQueryInitialize(rq: ptr, tlasIdx: u32, flags: u32, cullMask: u32, + origin: vec3, tMin: f32, direction: vec3, tMax: f32) { + (*rq).ray.origin = origin; + (*rq).ray.tMin = tMin; + (*rq).ray.direction = direction; + (*rq).ray.tMax = tMax; + (*rq).flags = flags; + (*rq).cullMask = cullMask & 0xFFu; + (*rq).committedType = RT_INTERSECTION_NONE; + (*rq).committedT = tMax; + (*rq).done = 0u; +} + +fn rayQueryProceed(rq: ptr) -> bool { + if ((*rq).done != 0u) { return false; } + _rqTraverseTlas(rq); + (*rq).done = 1u; + return false; +} + +fn rayQueryTerminate(rq: ptr) { + (*rq).done = 1u; +} + +fn rayQueryGetCommittedIntersectionType(rq: ptr) -> u32 { return (*rq).committedType; } +fn rayQueryGetCommittedT(rq: ptr) -> f32 { return (*rq).committedT; } +fn rayQueryGetCommittedInstanceId(rq: ptr) -> u32 { return (*rq).committedInstanceId; } +fn rayQueryGetCommittedInstanceCustomIndex(rq: ptr) -> u32 { return (*rq).committedInstanceCustomIndex; } +fn rayQueryGetCommittedPrimitiveIndex(rq: ptr) -> u32 { return (*rq).committedPrimitiveIndex; } +fn rayQueryGetCommittedBarycentrics(rq: ptr) -> vec2 { return (*rq).committedBarycentrics; } +fn rayQueryGetCommittedObjectRayOrigin(rq: ptr) -> vec3 { return (*rq).committedObjectRayOrigin; } +fn rayQueryGetCommittedObjectRayDirection(rq: ptr) -> vec3 { return (*rq).committedObjectRayDirection; } +fn rayQueryGetCommittedWorldPosition(rq: ptr) -> vec3 { + return (*rq).ray.origin + (*rq).ray.direction * (*rq).committedT; +} +`; + +// ── Internal compute pipeline: builds TLASEntry[] from the RTInstance[] +// + the meshRecords table. One thread per instance. +// Uses only rtWgslTypes (no megakernel bindings) so it stays well under +// the 8-storage-buffer-per-stage baseline limit. +const tlasBuildWgsl = rtWgslTypes /* needs MeshRecord + TLASEntry */ + String.raw` +struct RTInstance { + transformR0: vec4, + transformR1: vec4, + transformR2: vec4, + customIndexMask: u32, // customIndex: low 24, mask: high 8 + sbtFlags: u32, // sbtOffset: low 24, flags: high 8 + accelStructureRef: vec2, +}; + +@group(0) @binding(0) var inInstances : array; +@group(0) @binding(1) var inMeshes : array; +@group(0) @binding(2) var outEntries : array; + +fn _invMat3(c0: vec3, c1: vec3, c2: vec3) -> array, 3> { + let m00 = c1.y*c2.z - c1.z*c2.y; + let m01 = c1.z*c2.x - c1.x*c2.z; + let m02 = c1.x*c2.y - c1.y*c2.x; + let det = c0.x*m00 + c0.y*m01 + c0.z*m02; + let inv = 1.0 / det; + return array, 3>( + vec3(m00, c0.z*c2.y - c0.y*c2.z, c0.y*c1.z - c0.z*c1.y) * inv, + vec3(m01, c0.x*c2.z - c0.z*c2.x, c0.z*c1.x - c0.x*c1.z) * inv, + vec3(m02, c0.y*c2.x - c0.x*c2.y, c0.x*c1.y - c0.y*c1.x) * inv, + ); +} + +@compute @workgroup_size(64, 1, 1) +fn tlasBuildMain(@builtin(global_invocation_id) gid: vec3) { + let i = gid.x; + if (i >= arrayLength(&inInstances)) { return; } + let inst = inInstances[i]; + let meshIdx = inst.accelStructureRef.x; + let custom = inst.customIndexMask & 0xFFFFFFu; + let mask = (inst.customIndexMask >> 24u) & 0xFFu; + let sbtOffset = inst.sbtFlags & 0xFFFFFFu; + let iflags = (inst.sbtFlags >> 24u) & 0xFFu; + + let rec = inMeshes[meshIdx]; + // 8 corners → world AABB. + var worldMin = vec3(1e30); + var worldMax = vec3(-1e30); + for (var c: u32 = 0u; c < 8u; c = c + 1u) { + let corner = vec3( + select(rec.rootAabbMin.x, rec.rootAabbMax.x, (c & 1u) != 0u), + select(rec.rootAabbMin.y, rec.rootAabbMax.y, (c & 2u) != 0u), + select(rec.rootAabbMin.z, rec.rootAabbMax.z, (c & 4u) != 0u), + ); + let wc = vec3( + dot(inst.transformR0.xyz, corner) + inst.transformR0.w, + dot(inst.transformR1.xyz, corner) + inst.transformR1.w, + dot(inst.transformR2.xyz, corner) + inst.transformR2.w, + ); + worldMin = min(worldMin, wc); + worldMax = max(worldMax, wc); + } + + // Inverse 3x4 affine. + let inv = _invMat3(inst.transformR0.xyz, inst.transformR1.xyz, inst.transformR2.xyz); + let T = vec3(inst.transformR0.w, inst.transformR1.w, inst.transformR2.w); + let invT = vec3(-dot(inv[0], T), -dot(inv[1], T), -dot(inv[2], T)); + + var e: TLASEntry; + e.aabbMin = worldMin; + e.aabbMax = worldMax; + e.maskHGOffset = mask | (sbtOffset << 8u); + e.blasMeshIdx = meshIdx; + e.objectToWorldR0 = inst.transformR0; + e.objectToWorldR1 = inst.transformR1; + e.objectToWorldR2 = inst.transformR2; + e.worldToObjectR0 = vec4(inv[0], invT.x); + e.worldToObjectR1 = vec4(inv[1], invT.y); + e.worldToObjectR2 = vec4(inv[2], invT.z); + e.customIndex = custom; + e.instanceFlags = iflags; + outEntries[i] = e; +} +`; + +// ── RT runtime state ────────────────────────────────────────────────── +// Mesh heaps grow geometrically; each Mesh::Build appends + records its +// offsets in meshRecordsCpu/Buffer. nextMeshHandle is what gets returned +// to C++ as RTInstance::accelerationStructureReference. +const RT_HEAP_INITIAL_BYTES = 64 * 1024; +function makeRtHeap() { + return { + gpu: device.createBuffer({ + size: RT_HEAP_INITIAL_BYTES, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, + }), + capacity: RT_HEAP_INITIAL_BYTES, + cursor: 0, + }; +} +function rtHeapEnsure(h, neededBytes) { + if (h.cursor + neededBytes <= h.capacity) return; + let cap = h.capacity; + while (h.cursor + neededBytes > cap) cap *= 2; + const ng = device.createBuffer({ + size: cap, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, + }); + const enc = device.createCommandEncoder(); + if (h.cursor > 0) enc.copyBufferToBuffer(h.gpu, 0, ng, 0, h.cursor); + queue.submit([enc.finish()]); + h.gpu.destroy(); + h.gpu = ng; + h.capacity = cap; + // Invalidate cached bind groups that referenced the heap. + rtState.bindGroupCache.clear(); +} + +const rtState = { + vertHeap: null, // f32 stream (3 floats per vertex) + indexHeap: null, // u32 stream + bvhHeap: null, // BVHNode stream (32 bytes per node) + primRemapHeap: null, // u32 stream + + meshRecordsBuffer: null, // GPUBuffer of MeshRecord[] + meshRecordsCapacity: 0, + nextMeshHandle: 1, + + rtHeader: null, // uniform buffer for RTDispatchHeader (256 B aligned) + + bindGroupCache: new Map(), // key → bind group + + tlasBuildPipeline: null, + tlasBuildBgl: null, + + // Latest TLAS buffer handle from wgpuBuildTLAS, used by rayQuery-capable + // compute shaders at dispatch time. + currentTlas: 0, + currentTlasInstanceCount: 0, +}; + +function rtInit() { + rtState.vertHeap = makeRtHeap(); + rtState.indexHeap = makeRtHeap(); + rtState.bvhHeap = makeRtHeap(); + rtState.primRemapHeap = makeRtHeap(); + rtState.meshRecordsCapacity = 16; + rtState.meshRecordsBuffer = device.createBuffer({ + size: rtState.meshRecordsCapacity * 48, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + rtState.rtHeader = device.createBuffer({ + size: 256, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + }); + + // TLAS-build compute pipeline. Only group(0) is used (3 SSBOs). + const mod = device.createShaderModule({ code: tlasBuildWgsl, label: "rt-tlas-build" }); + const tlasBuildBgl = device.createBindGroupLayout({ entries: [ + { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } }, + ]}); + rtState.tlasBuildBgl = tlasBuildBgl; + rtState.tlasBuildPipeline = device.createComputePipeline({ + layout: device.createPipelineLayout({ bindGroupLayouts: [tlasBuildBgl] }), + compute: { module: mod, entryPoint: "tlasBuildMain" }, + }); +} + +function rtMeshRecordsEnsure(meshCount) { + if (meshCount <= rtState.meshRecordsCapacity) return; + let cap = rtState.meshRecordsCapacity; + while (cap < meshCount) cap *= 2; + const ng = device.createBuffer({ + size: cap * 48, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + const enc = device.createCommandEncoder(); + enc.copyBufferToBuffer(rtState.meshRecordsBuffer, 0, ng, 0, + rtState.meshRecordsCapacity * 48); + queue.submit([enc.finish()]); + rtState.meshRecordsBuffer.destroy(); + rtState.meshRecordsBuffer = ng; + rtState.meshRecordsCapacity = cap; + rtState.bindGroupCache.clear(); +} + +env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ, + verticesPtr, vertexCount, + indicesPtr, indexCount, + bvhNodesPtr, bvhNodeCount, + primRemapPtr, primRemapCount) => { + if (!rtState.vertHeap) rtInit(); + + const vBytes = vertexCount * 12; + const iBytes = indexCount * 4; + const nBytes = bvhNodeCount * 32; + const rBytes = primRemapCount * 4; + + rtHeapEnsure(rtState.vertHeap, vBytes); + rtHeapEnsure(rtState.indexHeap, iBytes); + rtHeapEnsure(rtState.bvhHeap, nBytes); + rtHeapEnsure(rtState.primRemapHeap, rBytes); + + const vOff = rtState.vertHeap.cursor / 12; // in vec3 units + const iOff = rtState.indexHeap.cursor / 4; // in u32 units + const nOff = rtState.bvhHeap.cursor / 32; // in BVHNode units + const rOff = rtState.primRemapHeap.cursor / 4; + + // queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is + // already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4. + queue.writeBuffer(rtState.vertHeap.gpu, rtState.vertHeap.cursor, + memU8().buffer, verticesPtr, vBytes); + queue.writeBuffer(rtState.indexHeap.gpu, rtState.indexHeap.cursor, + memU8().buffer, indicesPtr, iBytes); + queue.writeBuffer(rtState.bvhHeap.gpu, rtState.bvhHeap.cursor, + memU8().buffer, bvhNodesPtr, nBytes); + queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor, + memU8().buffer, primRemapPtr, rBytes); + + rtState.vertHeap.cursor += vBytes; + rtState.indexHeap.cursor += iBytes; + rtState.bvhHeap.cursor += nBytes; + rtState.primRemapHeap.cursor += rBytes; + + const handle = rtState.nextMeshHandle++; + rtMeshRecordsEnsure(handle + 1); + + // Build the MeshRecord (48 bytes) and write it. + const rec = new ArrayBuffer(48); + const f32 = new Float32Array(rec); + const u32 = new Uint32Array(rec); + f32[0] = minX; f32[1] = minY; f32[2] = minZ; + u32[3] = vOff; + f32[4] = maxX; f32[5] = maxY; f32[6] = maxZ; + u32[7] = iOff; + u32[8] = nOff; + u32[9] = rOff; + u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0; + u32[11] = 0; + queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec); + + return handle; +}; + +env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => { + if (!rtState.tlasBuildPipeline) return; + const inst = buffers.get(instanceBufHandle); + const out = buffers.get(tlasOutBufHandle); + if (!inst || !out) { + console.error("[crafter-wgpu] wgpuBuildTLAS: unknown buffer handle"); + return; + } + + const bg = device.createBindGroup({ + layout: rtState.tlasBuildBgl, + entries: [ + { binding: 0, resource: { buffer: inst } }, + { binding: 1, resource: { buffer: rtState.meshRecordsBuffer } }, + { binding: 2, resource: { buffer: out } }, + ], + }); + + if (state.pass) { + // Mid-frame — close the user's compute pass, run our build pass + // on the same encoder, then reopen. + state.pass.end(); + state.pass = null; + } + const enc = state.encoder || device.createCommandEncoder(); + const ownEncoder = !state.encoder; + const pass = enc.beginComputePass({ label: "tlas-build" }); + pass.setPipeline(rtState.tlasBuildPipeline); + pass.setBindGroup(0, bg); + const groups = Math.ceil(instanceCount / 64); + pass.dispatchWorkgroups(groups, 1, 1); + pass.end(); + if (ownEncoder) { + queue.submit([enc.finish()]); + } else { + state.pass = state.encoder.beginComputePass(); + } + + // Publish so rayQuery-capable compute pipelines pick up the latest TLAS + // without each dispatch having to thread the handle explicitly. + rtState.currentTlas = tlasOutBufHandle; + rtState.currentTlasInstanceCount = instanceCount; +}; + +// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega +// switches + raygen + @compute entry) with the library prelude/helpers. +const rtPipelines = new Map(); // handle → { pipeline, bgls } + +env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => { + if (!rtState.vertHeap) rtInit(); + const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen)); + + // Insert helpers at the marker; prepend prelude. + const marker = "// @CRAFTER_RT_LIBRARY_HELPERS_HERE"; + let beforeHelpers = userPart; + let afterHelpers = ""; + const mi = userPart.indexOf(marker); + if (mi >= 0) { + beforeHelpers = userPart.substring(0, mi); + afterHelpers = userPart.substring(mi + marker.length); + } + const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers; + + let pipeline; + try { + const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" }); + // RTDispatchHeader is 16 bytes; bind exactly that. + const headerBgl = device.createBindGroupLayout({ entries: [ + { binding: 0, visibility: GPUShaderStage.COMPUTE, + buffer: { type: "uniform", minBindingSize: 16 } }, + ]}); + const dataBgl = device.createBindGroupLayout({ entries: [ + { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } }, + { binding: 6, visibility: GPUShaderStage.COMPUTE, + storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } }, + ]}); + pipeline = device.createComputePipeline({ + layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }), + compute: { module: mod, entryPoint: "main" }, + }); + const handle = newHandle(); + rtPipelines.set(handle, { pipeline, headerBgl, dataBgl }); + return handle; + } catch (e) { + console.error("[crafter-wgpu] RT pipeline compile failed:", e); + console.error("[crafter-wgpu] WGSL was:\n", fullWgsl); + return 0; + } +}; + +env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes, + tlasBufHandle, instanceCount, gx, gy) => { + if (!state.pass) return; + const pipe = rtPipelines.get(pipelineHandle); + const tlas = buffers.get(tlasBufHandle); + if (!pipe || !tlas) { + console.error("[crafter-wgpu] wgpuDispatchRT: unknown pipeline or tlas"); + return; + } + // Write RT header from push data (first 16 bytes). Surface dims + instance count + flags. + const hdr32 = new Uint32Array(4); + hdr32[0] = state.width; + hdr32[1] = state.height; + hdr32[2] = instanceCount; + hdr32[3] = 0; + queue.writeBuffer(rtState.rtHeader, 0, hdr32); + + const headerBg = device.createBindGroup({ + layout: pipe.headerBgl, + entries: [{ binding: 0, resource: { buffer: rtState.rtHeader, offset: 0, size: 16 } }], + }); + const outView = state.outIsPing ? state.pingView : state.pongView; + const dataBg = device.createBindGroup({ + layout: pipe.dataBgl, + entries: [ + { binding: 0, resource: { buffer: tlas } }, + { binding: 1, resource: { buffer: rtState.bvhHeap.gpu } }, + { binding: 2, resource: { buffer: rtState.meshRecordsBuffer } }, + { binding: 3, resource: { buffer: rtState.vertHeap.gpu } }, + { binding: 4, resource: { buffer: rtState.indexHeap.gpu } }, + { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } }, + { binding: 6, resource: outView }, + ], + }); + + state.pass.setPipeline(pipe.pipeline); + state.pass.setBindGroup(0, headerBg); + state.pass.setBindGroup(1, dataBg); + state.pass.dispatchWorkgroups(gx, gy, 1); + state.outIsPing = !state.outIsPing; +}; + console.log("[crafter-wgpu] init complete; env handlers wired"); } catch (e) { // Capture any throw so the stub error messages name the real cause diff --git a/examples/VulkanTriangle/closesthit.wgsl b/examples/VulkanTriangle/closesthit.wgsl new file mode 100644 index 0000000..e0d29ae --- /dev/null +++ b/examples/VulkanTriangle/closesthit.wgsl @@ -0,0 +1,12 @@ +// WebGPU port of closesthit.glsl. Library concatenates this BEFORE the +// library helpers, so `Payload` declared here is visible to traceRay, +// runClosestHit, the mega-switch, and the user's raygen source. + +struct Payload { + color: vec3, +}; + +fn closesthit_main(ray: RayDesc, hit: HitInfo, payload: ptr) { + let bary = vec3(1.0 - hit.attribs.x - hit.attribs.y, hit.attribs.x, hit.attribs.y); + (*payload).color = bary; +} diff --git a/examples/VulkanTriangle/main.cpp b/examples/VulkanTriangle/main.cpp index 1c5184c..3fcd287 100644 --- a/examples/VulkanTriangle/main.cpp +++ b/examples/VulkanTriangle/main.cpp @@ -1,4 +1,6 @@ +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM #include "vulkan/vulkan.h" +#endif #include import Crafter.Graphics; @@ -7,7 +9,7 @@ import std; import Crafter.Event; import Crafter.Math; - +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM int main() { Device::Initialize(); Window window(1280, 720, "HelloVulkan"); @@ -89,7 +91,7 @@ int main() { RenderingElement3D::elements.emplace_back(&renderer); MatrixRowMajor transform = MatrixRowMajor::Identity(); - std::memcpy(renderer.instance.transform.matrix, transform.m, sizeof(transform.m)); + transform.Store(reinterpret_cast(renderer.instance.transform.matrix)); RenderingElement3D::BuildTLAS(cmd, 0); RenderingElement3D::BuildTLAS(cmd, 1); RenderingElement3D::BuildTLAS(cmd, 2); @@ -202,3 +204,65 @@ int main() { window.Render(); window.StartSync(); } +#else +// DOM-mode port. Same scene (one triangle), software-emulated raytracing +// via compute. Shaders are read from .wgsl files shipped as static +// assets (see project.cpp). Renders barycentric colors via the +// hit/miss/raygen mega-switch in PipelineRTWebGPU. +int main() { + Device::Initialize(); + static Window window(1280, 720, "HelloVulkan"); + auto cmd = window.StartInit(); + + DescriptorHeapWebGPU heap; + heap.Initialize(/*images*/ 4, /*buffers*/ 4, /*samplers*/ 2); + + std::array shaders {{ + WebGPUShader(std::filesystem::path("raygen.wgsl"), "raygen_main", WebGPURTStage::Raygen), + WebGPUShader(std::filesystem::path("miss.wgsl"), "miss_main", WebGPURTStage::Miss), + WebGPUShader(std::filesystem::path("closesthit.wgsl"), "closesthit_main", WebGPURTStage::ClosestHit), + }}; + ShaderBindingTableWebGPU sbt; + sbt.Init(shaders); + + std::array raygenGroups {{ + { .type = RTShaderGroupType::General, .generalShader = 0 }, + }}; + std::array missGroups {{ + { .type = RTShaderGroupType::General, .generalShader = 1 }, + }}; + std::array hitGroups {{ + { .type = RTShaderGroupType::TrianglesHitGroup, .closestHitShader = 2 }, + }}; + + PipelineRTWebGPU pipeline; + pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, sbt); + + Mesh triangleMesh; + std::array, 3> verts {{{-150, -150, 100}, {0, 150, 100}, {150, -150, 100}}}; + std::array index {{2, 1, 0}}; + triangleMesh.Build(verts, index, cmd); + + static RenderingElement3D renderer; + renderer.instance.transform.matrix[0][0] = 1; renderer.instance.transform.matrix[0][1] = 0; renderer.instance.transform.matrix[0][2] = 0; renderer.instance.transform.matrix[0][3] = 0; + renderer.instance.transform.matrix[1][0] = 0; renderer.instance.transform.matrix[1][1] = 1; renderer.instance.transform.matrix[1][2] = 0; renderer.instance.transform.matrix[1][3] = 0; + renderer.instance.transform.matrix[2][0] = 0; renderer.instance.transform.matrix[2][1] = 0; renderer.instance.transform.matrix[2][2] = 1; renderer.instance.transform.matrix[2][3] = 0; + renderer.instance.instanceCustomIndex = 0; + renderer.instance.mask = 0xFF; + renderer.instance.instanceShaderBindingTableRecordOffset = 0; + renderer.instance.flags = kRTGeometryInstanceForceOpaque; + renderer.instance.accelerationStructureReference = triangleMesh.blasAddr; + RenderingElement3D::Add(&renderer); + RenderingElement3D::BuildTLAS(cmd, 0); + + window.descriptorHeap = &heap; + window.FinishInit(); + + RTPass rtPass(&pipeline); + window.passes.push_back(&rtPass); + + window.Render(); + window.StartUpdate(); + window.StartSync(); +} +#endif diff --git a/examples/VulkanTriangle/miss.wgsl b/examples/VulkanTriangle/miss.wgsl new file mode 100644 index 0000000..a0ba944 --- /dev/null +++ b/examples/VulkanTriangle/miss.wgsl @@ -0,0 +1,5 @@ +// WebGPU port of miss.glsl. + +fn miss_main(ray: RayDesc, payload: ptr) { + (*payload).color = vec3(1.0, 1.0, 1.0); +} diff --git a/examples/VulkanTriangle/project.cpp b/examples/VulkanTriangle/project.cpp index fca4a1e..f5f7bb7 100644 --- a/examples/VulkanTriangle/project.cpp +++ b/examples/VulkanTriangle/project.cpp @@ -4,6 +4,14 @@ namespace fs = std::filesystem; using namespace Crafter; extern "C" Configuration CrafterBuildProject(std::span args) { + bool isWasm = false; + for (std::string_view a : args) { + if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) { + isWasm = true; + break; + } + } + std::vector graphicsArgs(args.begin(), args.end()); Configuration* graphics = LocalProject({ .projectFile = "../../project.cpp", @@ -14,6 +22,12 @@ extern "C" Configuration CrafterBuildProject(std::span a cfg.path = "./"; cfg.name = "VulkanTriangle"; cfg.outputName = "VulkanTriangle"; + cfg.type = ConfigurationType::Executable; + if (isWasm) { + cfg.target = "wasm32-wasip1"; + cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_DOM", ""}); + cfg.compileFlags.push_back("-msimd128"); + } ApplyStandardArgs(cfg, args); cfg.dependencies = { graphics }; @@ -21,8 +35,15 @@ extern "C" Configuration CrafterBuildProject(std::span a std::array impls = { "main" }; cfg.GetInterfacesAndImplementations(ifaces, impls); - cfg.shaders.emplace_back(fs::path("raygen.glsl"), std::string("main"), ShaderType::RayGen); - cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit); - cfg.shaders.emplace_back(fs::path("miss.glsl"), std::string("main"), ShaderType::Miss); + if (isWasm) { + cfg.files.emplace_back(fs::path("raygen.wgsl")); + cfg.files.emplace_back(fs::path("closesthit.wgsl")); + cfg.files.emplace_back(fs::path("miss.wgsl")); + EnableWasiBrowserRuntime(cfg); + } else { + cfg.shaders.emplace_back(fs::path("raygen.glsl"), std::string("main"), ShaderType::RayGen); + cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit); + cfg.shaders.emplace_back(fs::path("miss.glsl"), std::string("main"), ShaderType::Miss); + } return cfg; } diff --git a/examples/VulkanTriangle/raygen.glsl b/examples/VulkanTriangle/raygen.glsl index 1031ffd..eff2633 100644 --- a/examples/VulkanTriangle/raygen.glsl +++ b/examples/VulkanTriangle/raygen.glsl @@ -34,17 +34,17 @@ void main() { 1.0 )); - // traceRayEXT( - // topLevelAS[bufferStart], - // gl_RayFlagsNoneEXT, - // 0xff, - // 0, 0, 0, - // origin, - // 0.001, - // direction, - // 10000.0, - // 0 - // ); + traceRayEXT( + topLevelAS[bufferStart], + gl_RayFlagsNoneEXT, + 0xff, + 0, 0, 0, + origin, + 0.001, + direction, + 10000.0, + 0 + ); imageStore(image[0], ivec2(pixel), vec4(hitValue, 1)); } diff --git a/examples/VulkanTriangle/raygen.wgsl b/examples/VulkanTriangle/raygen.wgsl new file mode 100644 index 0000000..cdf66fd --- /dev/null +++ b/examples/VulkanTriangle/raygen.wgsl @@ -0,0 +1,39 @@ +// WebGPU port of raygen.glsl. Mirrors the pinhole camera setup — the +// Payload type is declared in closesthit.wgsl (concatenated earlier). + +fn raygen_main(gid: vec3) { + if (gid.x >= hdr.surfaceW || gid.y >= hdr.surfaceH) { return; } + + let pixel = vec2(f32(gid.x), f32(gid.y)); + let resolution = vec2(f32(hdr.surfaceW), f32(hdr.surfaceH)); + let uv = (pixel + vec2(0.5)) / resolution; + let ndc = uv * 2.0 - vec2(1.0); + + let origin = vec3(0.0, 0.0, -300.0); + let aspect = resolution.x / resolution.y; + let fov = 60.0 * 3.14159265 / 180.0; + let tanHalfFov = tan(fov * 0.5); + + let direction = normalize(vec3( + ndc.x * aspect * tanHalfFov, + -ndc.y * tanHalfFov, + 1.0, + )); + + var payload: Payload; + payload.color = vec3(0.0); + + traceRay( + 0u, // tlasIdx (unused) + 0u, // ray flags + 0xFFu, // cull mask + 0u, 0u, 0u, // sbtRecordOffset, sbtRecordStride, missIndex + origin, 0.001, + direction, 10000.0, + &payload, + ); + + textureStore(outImage, + vec2(i32(gid.x), i32(gid.y)), + vec4(payload.color, 1.0)); +} diff --git a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp new file mode 100644 index 0000000..4c86ad7 --- /dev/null +++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp @@ -0,0 +1,240 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net +*/ + +// DOM-mode Mesh implementation: SAH BVH2 built on the host, then +// forwarded to the JS bridge which appends the four data streams +// (vertices, indices, BVH nodes, primRemap) into the global RT mesh +// heaps. The handle returned by wgpuRegisterMeshBLAS goes into +// RTInstance::accelerationStructureReference and lets the TLAS-build +// compute pass and the traversal kernel find the BLAS data later. +// +// BVH layout must stay binary-identical to the WGSL `BVHNode` struct +// declared in additional/dom-webgpu.js (rtWgslPrelude). + +module; +module Crafter.Graphics:Mesh_implWebGPU; + +import :Mesh; +import :WebGPU; +import Crafter.Math; +import std; + +using namespace Crafter; + +namespace { + // ─── BVH builder (binned SAH, 8 bins, BVH2) ──────────────────────── + + constexpr std::uint32_t kBinCount = 8; + constexpr std::uint32_t kMaxLeafSize = 4; + constexpr float kTraversalCost = 1.0f; + constexpr float kIntersectCost = 1.0f; + + struct AABB { + float lo[3] { std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity() }; + float hi[3] {-std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity() }; + + void Extend(const float p[3]) noexcept { + for (int a = 0; a < 3; ++a) { + if (p[a] < lo[a]) lo[a] = p[a]; + if (p[a] > hi[a]) hi[a] = p[a]; + } + } + void Extend(const AABB& o) noexcept { + for (int a = 0; a < 3; ++a) { + if (o.lo[a] < lo[a]) lo[a] = o.lo[a]; + if (o.hi[a] > hi[a]) hi[a] = o.hi[a]; + } + } + float SurfaceArea() const noexcept { + float dx = hi[0] - lo[0]; + float dy = hi[1] - lo[1]; + float dz = hi[2] - lo[2]; + if (dx < 0.0f || dy < 0.0f || dz < 0.0f) return 0.0f; + return 2.0f * (dx*dy + dx*dz + dy*dz); + } + }; + + struct PrimRef { + AABB box; + float centroid[3]; + std::uint32_t triIndex; + }; + + struct Bin { + AABB box; + std::uint32_t count = 0; + }; + + struct Builder { + std::vector prims; + std::vector nodes; + + std::pair AllocateChildren() { + std::uint32_t l = static_cast(nodes.size()); + nodes.emplace_back(); + nodes.emplace_back(); + return { l, l + 1 }; + } + + void BuildRecursive(std::uint32_t nodeIdx, + std::uint32_t first, + std::uint32_t count) { + AABB bounds, centroidBounds; + for (std::uint32_t i = 0; i < count; ++i) { + const auto& p = prims[first + i]; + bounds.Extend(p.box); + centroidBounds.Extend(p.centroid); + } + + auto emitLeaf = [&] { + BVHNode& n = nodes[nodeIdx]; + std::memcpy(n.aabbMin, bounds.lo, sizeof(bounds.lo)); + std::memcpy(n.aabbMax, bounds.hi, sizeof(bounds.hi)); + n.firstChildOrPrim = first; + n.primCount = count; + }; + + if (count <= kMaxLeafSize) { emitLeaf(); return; } + + int bestAxis = -1; + float bestCost = std::numeric_limits::infinity(); + std::uint32_t bestBin = 0; + + float parentArea = bounds.SurfaceArea(); + if (parentArea <= 0.0f) { emitLeaf(); return; } + + for (int axis = 0; axis < 3; ++axis) { + float extent = centroidBounds.hi[axis] - centroidBounds.lo[axis]; + if (extent <= 0.0f) continue; + float invExtent = static_cast(kBinCount) / extent; + + std::array bins{}; + for (std::uint32_t i = 0; i < count; ++i) { + const auto& p = prims[first + i]; + float t = (p.centroid[axis] - centroidBounds.lo[axis]) * invExtent; + std::uint32_t b = static_cast(t); + if (b >= kBinCount) b = kBinCount - 1; + bins[b].box.Extend(p.box); + bins[b].count += 1; + } + + std::array leftBox; + std::array leftCount{}; + { + AABB acc; std::uint32_t cnt = 0; + for (std::uint32_t i = 0; i < kBinCount - 1; ++i) { + acc.Extend(bins[i].box); + cnt += bins[i].count; + leftBox[i] = acc; + leftCount[i] = cnt; + } + } + { + AABB acc; std::uint32_t cnt = 0; + for (std::int32_t i = kBinCount - 1; i >= 1; --i) { + acc.Extend(bins[i].box); + cnt += bins[i].count; + std::uint32_t split = static_cast(i - 1); + if (leftCount[split] == 0 || cnt == 0) continue; + float cost = kTraversalCost + + (leftBox[split].SurfaceArea() * leftCount[split] + + acc.SurfaceArea() * cnt) * kIntersectCost / parentArea; + if (cost < bestCost) { + bestCost = cost; + bestAxis = axis; + bestBin = split; + } + } + } + } + + float leafCost = static_cast(count) * kIntersectCost; + if (bestAxis < 0 || bestCost >= leafCost) { emitLeaf(); return; } + + float invExtent = static_cast(kBinCount) + / (centroidBounds.hi[bestAxis] - centroidBounds.lo[bestAxis]); + float lo = centroidBounds.lo[bestAxis]; + auto mid = std::partition( + prims.begin() + first, prims.begin() + first + count, + [&](const PrimRef& p) { + float t = (p.centroid[bestAxis] - lo) * invExtent; + std::uint32_t b = static_cast(t); + if (b >= kBinCount) b = kBinCount - 1; + return b <= bestBin; + }); + std::uint32_t leftCount = + static_cast(mid - (prims.begin() + first)); + if (leftCount == 0 || leftCount == count) { emitLeaf(); return; } + + auto [leftIdx, rightIdx] = AllocateChildren(); + { + BVHNode& n = nodes[nodeIdx]; + std::memcpy(n.aabbMin, bounds.lo, sizeof(bounds.lo)); + std::memcpy(n.aabbMax, bounds.hi, sizeof(bounds.hi)); + n.firstChildOrPrim = leftIdx; + n.primCount = 0; + } + BuildRecursive(leftIdx, first, leftCount); + BuildRecursive(rightIdx, first + leftCount, count - leftCount); + } + + void Build(std::span> vertices, + std::span indices) { + std::uint32_t triCount = static_cast(indices.size()) / 3; + prims.resize(triCount); + for (std::uint32_t i = 0; i < triCount; ++i) { + std::uint32_t i0 = indices[i*3 + 0]; + std::uint32_t i1 = indices[i*3 + 1]; + std::uint32_t i2 = indices[i*3 + 2]; + const auto& v0 = vertices[i0]; + const auto& v1 = vertices[i1]; + const auto& v2 = vertices[i2]; + float p0[3] { v0.v[0], v0.v[1], v0.v[2] }; + float p1[3] { v1.v[0], v1.v[1], v1.v[2] }; + float p2[3] { v2.v[0], v2.v[1], v2.v[2] }; + auto& pr = prims[i]; + pr.box.Extend(p0); + pr.box.Extend(p1); + pr.box.Extend(p2); + pr.centroid[0] = (pr.box.lo[0] + pr.box.hi[0]) * 0.5f; + pr.centroid[1] = (pr.box.lo[1] + pr.box.hi[1]) * 0.5f; + pr.centroid[2] = (pr.box.lo[2] + pr.box.hi[2]) * 0.5f; + pr.triIndex = i; + } + nodes.reserve(triCount * 2); + nodes.emplace_back(); + BuildRecursive(0, 0, triCount); + } + }; +} + +void Mesh::Build(std::span> vertices, + std::span indices, + WebGPUCommandEncoderRef /*cmd*/) { + triangleCount = static_cast(indices.size()) / 3; + + Builder builder; + builder.Build(vertices, indices); + + std::vector primRemap(triangleCount); + for (std::uint32_t i = 0; i < triangleCount; ++i) { + primRemap[i] = builder.prims[i].triIndex; + } + + const BVHNode& root = builder.nodes[0]; + std::uint32_t h = WebGPU::wgpuRegisterMeshBLAS( + root.aabbMin[0], root.aabbMin[1], root.aabbMin[2], + root.aabbMax[0], root.aabbMax[1], root.aabbMax[2], + vertices.data(), static_cast(vertices.size()), + indices.data(), static_cast(indices.size()), + builder.nodes.data(), static_cast(builder.nodes.size()), + primRemap.data(), static_cast(primRemap.size())); + blasAddr = h; +} diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp new file mode 100644 index 0000000..bf9e115 --- /dev/null +++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp @@ -0,0 +1,187 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net +*/ + +// Megakernel WGSL assembly. The library prelude lives JS-side +// (additional/dom-webgpu.js, rtWgslPrelude) — we don't have access to +// it from C++ — so this file emits only the *user-controlled* portions +// (concatenated SBT sources + the generated switch statements) and the +// stable entry-point glue. The JS side wraps these with the prelude +// before handing to device.createShaderModule. +// +// Wire format passed across the JS boundary is a single WGSL string +// containing the substitution markers `// @CRAFTER_RT_USER_SOURCES`, +// `// @CRAFTER_RT_CLOSESTHIT_CASES`, `// @CRAFTER_RT_ANYHIT_CASES`, +// `// @CRAFTER_RT_MISS_CASES`, `// @CRAFTER_RT_RAYGEN_BODY` already +// expanded; the JS side just concatenates prelude + this string. + +module; +module Crafter.Graphics:PipelineRTWebGPU_impl; + +import :PipelineRTWebGPU; +import :ShaderBindingTableWebGPU; +import :RT; +import :WebGPU; +import std; + +using namespace Crafter; + +namespace { + constexpr std::string_view kPlaceholderClosestHit = + "fn _crafter_default_closesthit(ray: RayDesc, hit: HitInfo, payload: ptr) {}"; + constexpr std::string_view kPlaceholderAnyHit = + "fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr) -> u32 { return RT_ANYHIT_ACCEPT; }"; + constexpr std::string_view kPlaceholderMiss = + "fn _crafter_default_miss(ray: RayDesc, payload: ptr) {}"; + + void AppendCase(std::string& out, + std::uint32_t hitGroupIndex, + std::string_view entryFn, + std::string_view args) { + out += " case "; + out += std::to_string(hitGroupIndex); + out += "u: { "; + out += entryFn; + out += "("; + out += args; + out += "); }\n"; + } + + // anyhit has a return type — case body forwards the result. + void AppendAnyHitCase(std::string& out, + std::uint32_t hitGroupIndex, + std::string_view entryFn) { + out += " case "; + out += std::to_string(hitGroupIndex); + out += "u: { return "; + out += entryFn; + out += "(ray, hit, payload); }\n"; + } +} + +void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, + std::span raygenGroups, + std::span missGroups, + std::span hitGroups, + const ShaderBindingTableWebGPU& sbt) { + std::string wgsl; + wgsl.reserve(8 * 1024); + + // ── Section 1: user closesthit / anyhit / miss source files ──────── + // + // Raygens come later (after `traceRay` is declared) so we partition + // shaders by stage. Concatenating *all* non-raygen sources here lets + // them declare shared helpers, `struct Payload`, etc., in any order. + + wgsl += "// ── user closesthit / anyhit / miss sources ───────────────\n"; + for (const auto& shader : sbt.shaders) { + if (shader.stage == WebGPURTStage::Raygen) continue; + wgsl += shader.source; + wgsl += "\n"; + } + + // ── Section 2: mega-switch dispatchers ───────────────────────────── + // + // runClosestHit, runAnyHit, runMiss each dispatch on the per-hit / + // per-ray index registered against the appropriate group span. + // Indices match the user's expectations from VkRayTracingShaderGroup + // ordering: closest-hit group N (N from 0..hitGroups.size()-1) is + // selected by hitGroupIndex == N. + + wgsl += "\nfn runClosestHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr) {\n"; + wgsl += " switch hg {\n"; + bool anyClosestHit = false; + for (std::uint32_t i = 0; i < hitGroups.size(); ++i) { + const auto& g = hitGroups[i]; + if (g.closestHitShader == kRTShaderUnused) continue; + if (g.closestHitShader >= sbt.shaders.size()) continue; + const auto& fn = sbt.shaders[g.closestHitShader].entryFn; + AppendCase(wgsl, i, fn, "ray, hit, payload"); + anyClosestHit = true; + } + if (!anyClosestHit) wgsl += " // (no closest-hit shaders registered)\n"; + wgsl += " default: { }\n"; + wgsl += " }\n"; + wgsl += "}\n\n"; + + wgsl += "fn runAnyHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr) -> u32 {\n"; + wgsl += " switch hg {\n"; + bool anyAnyhit = false; + for (std::uint32_t i = 0; i < hitGroups.size(); ++i) { + const auto& g = hitGroups[i]; + if (g.anyHitShader == kRTShaderUnused) continue; + if (g.anyHitShader >= sbt.shaders.size()) continue; + const auto& fn = sbt.shaders[g.anyHitShader].entryFn; + AppendAnyHitCase(wgsl, i, fn); + anyAnyhit = true; + } + if (!anyAnyhit) wgsl += " // (no any-hit shaders registered)\n"; + wgsl += " default: { return RT_ANYHIT_ACCEPT; }\n"; + wgsl += " }\n"; + wgsl += "}\n\n"; + + wgsl += "fn runMiss(idx: u32, ray: RayDesc, payload: ptr) {\n"; + wgsl += " switch idx {\n"; + bool anyMiss = false; + for (std::uint32_t i = 0; i < missGroups.size(); ++i) { + const auto& g = missGroups[i]; + if (g.generalShader == kRTShaderUnused) continue; + if (g.generalShader >= sbt.shaders.size()) continue; + const auto& fn = sbt.shaders[g.generalShader].entryFn; + AppendCase(wgsl, i, fn, "ray, payload"); + anyMiss = true; + } + if (!anyMiss) wgsl += " // (no miss shaders registered)\n"; + wgsl += " default: { }\n"; + wgsl += " }\n"; + wgsl += "}\n"; + + // Marker — JS-side prelude/post-amble searches for this token to know + // where the library helpers (traverseBlas/traverseTlas/traceRay) get + // injected, followed by raygen sources and the @compute entry point. + wgsl += "\n// @CRAFTER_RT_LIBRARY_HELPERS_HERE\n"; + + // ── Section 3: user raygen source files ──────────────────────────── + // + // Comes after the library injects traceRay, so raygens can call it. + + wgsl += "\n// ── user raygen sources ───────────────────────────────────\n"; + std::uint32_t raygenEntryIndex = kRTShaderUnused; + std::string raygenEntryFn; + for (const auto& shader : sbt.shaders) { + if (shader.stage != WebGPURTStage::Raygen) continue; + wgsl += shader.source; + wgsl += "\n"; + // Pick the first raygen group's general shader as the entry. Mirrors + // Vulkan's pRayGenShaderBindingTable[0] → first invoked raygen. + if (raygenEntryFn.empty()) raygenEntryFn = shader.entryFn; + } + if (!raygenGroups.empty() + && raygenGroups[0].generalShader != kRTShaderUnused + && raygenGroups[0].generalShader < sbt.shaders.size()) { + raygenEntryIndex = raygenGroups[0].generalShader; + raygenEntryFn = sbt.shaders[raygenEntryIndex].entryFn; + } + if (raygenEntryFn.empty()) { + std::println("PipelineRTWebGPU::Init: no raygen shader registered"); + pipelineHandle = 0; + return; + } + + // ── Section 4: @compute entry point ──────────────────────────────── + // + // 8x8 tile workgroup matching the rest of the WebGPU backend. + + wgsl += "\n@compute @workgroup_size(8, 8, 1)\n"; + wgsl += "fn main(@builtin(global_invocation_id) gid: vec3) {\n"; + wgsl += " "; + wgsl += raygenEntryFn; + wgsl += "(gid);\n"; + wgsl += "}\n"; + + pipelineHandle = WebGPU::wgpuLoadRTPipeline( + wgsl.data(), + static_cast(wgsl.size())); +} diff --git a/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp b/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp new file mode 100644 index 0000000..2195f85 --- /dev/null +++ b/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp @@ -0,0 +1,91 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net +*/ + +// DOM-mode TLAS upkeep. BuildTLAS copies the per-element RTInstance into +// the host-visible instance buffer (skipping the transform for elements +// whose transform is GPU-owned), uploads it, then dispatches the JS-side +// TLAS-build compute pass — which consults the per-BLAS records published +// at Mesh::Build() time to produce world-space AABBs and inverse +// transforms in the format `traceRay` / `rayQuery` consume. + +module; +module Crafter.Graphics:RenderingElement3D_implWebGPU; + +import :RenderingElement3D; +import :Mesh; +import :WebGPU; +import :WebGPUBuffer; +import std; + +using namespace Crafter; + +std::vector RenderingElement3D::elements; + +void RenderingElement3D::Add(RenderingElement3D* e) { + e->indexInElements = static_cast(elements.size()); + elements.push_back(e); +} + +void RenderingElement3D::Remove(RenderingElement3D* e) { + std::uint32_t idx = e->indexInElements; + if (idx == std::numeric_limits::max()) return; + std::uint32_t last = static_cast(elements.size() - 1); + if (idx != last) { + elements[idx] = elements[last]; + elements[idx]->indexInElements = idx; + } + elements.pop_back(); + e->indexInElements = std::numeric_limits::max(); +} + +void RenderingElement3D::BuildTLAS(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t index) { + auto& tlas = tlases[index]; + const std::uint32_t primitiveCount = static_cast(elements.size()); + if (primitiveCount == 0) { + tlas.builtInstanceCount = 0; + return; + } + + // (Re)allocate instance + metadata + output TLAS buffers if the count + // changed. WebGPUBuffer::Resize destroys and recreates the GPU buffer; + // bind-group caches keyed on the buffer handle are invalidated in the + // JS bridge automatically. + if (primitiveCount != tlas.builtInstanceCount) { + tlas.instanceBuffer.Resize(primitiveCount); + tlas.metadataBuffer.Resize(primitiveCount); + // TLASEntry layout in WGSL is 144 bytes due to vec3 align/pad + // rules. Must match the struct declared in the rtWgslTypes + // block in additional/dom-webgpu.js. + tlas.buffer.Resize(primitiveCount * 144); + } + + for (std::uint32_t i = 0; i < primitiveCount; ++i) { + auto& dst = tlas.instanceBuffer.value[i]; + const auto& src = elements[i]->instance; + if (elements[i]->transformOwnedByGpu) { + // Preserve whatever the GPU compute shader most recently + // wrote into dst.transform. Update only the non-transform + // fields. + dst.instanceCustomIndex = src.instanceCustomIndex; + dst.mask = src.mask; + dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset; + dst.flags = src.flags; + dst.accelerationStructureReference = src.accelerationStructureReference; + } else { + dst = src; + } + tlas.metadataBuffer.value[i] = elements[i]->userMetadata; + } + + tlas.instanceBuffer.FlushDevice(); + tlas.metadataBuffer.FlushDevice(); + + WebGPU::wgpuBuildTLAS(tlas.instanceBuffer.handle, + static_cast(primitiveCount), + tlas.buffer.handle); + + tlas.builtInstanceCount = primitiveCount; +} diff --git a/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp b/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp new file mode 100644 index 0000000..745468b --- /dev/null +++ b/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp @@ -0,0 +1,32 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net +*/ + +module; +module Crafter.Graphics:ShaderBindingTableWebGPU_impl; + +import :ShaderBindingTableWebGPU; +import std; + +using namespace Crafter; + +WebGPUShader::WebGPUShader(const std::filesystem::path& wgslPath, + std::string fn, + WebGPURTStage s) + : entryFn(std::move(fn)), stage(s) { + std::ifstream f(wgslPath, std::ios::binary | std::ios::ate); + if (!f.is_open()) { + std::println("WebGPUShader: cannot open {}", wgslPath.string()); + return; + } + auto size = f.tellg(); + if (size <= 0) { + std::println("WebGPUShader: empty file {}", wgslPath.string()); + return; + } + f.seekg(0, std::ios::beg); + source.resize(static_cast(size)); + f.read(source.data(), size); +} diff --git a/implementations/Crafter.Graphics-WebGPUComputeShader.cpp b/implementations/Crafter.Graphics-WebGPUComputeShader.cpp index 5a8e554..8546eb1 100644 --- a/implementations/Crafter.Graphics-WebGPUComputeShader.cpp +++ b/implementations/Crafter.Graphics-WebGPUComputeShader.cpp @@ -12,18 +12,22 @@ import std; using namespace Crafter; void WebGPUComputeShader::Load(std::string_view wgsl, - std::span bindings) { + std::span bindings, + bool rayQuery) { customBindings.assign(bindings.begin(), bindings.end()); + rayQueryCapable = rayQuery; pipelineHandle = WebGPU::wgpuLoadCustomShader( wgsl.data(), static_cast(wgsl.size()), customBindings.data(), - static_cast(customBindings.size()) + static_cast(customBindings.size()), + rayQuery ? 1 : 0 ); } void WebGPUComputeShader::Load(const std::filesystem::path& wgslPath, - std::span bindings) { + std::span bindings, + bool rayQuery) { std::ifstream f(wgslPath, std::ios::binary | std::ios::ate); if (!f.is_open()) { std::println("WebGPUComputeShader::Load: cannot open {}", wgslPath.string()); @@ -37,5 +41,5 @@ void WebGPUComputeShader::Load(const std::filesystem::path& wgslPath, f.seekg(0, std::ios::beg); std::string src(static_cast(size), '\0'); f.read(src.data(), size); - Load(std::string_view{src}, bindings); + Load(std::string_view{src}, bindings, rayQuery); } diff --git a/interfaces/Crafter.Graphics-Mesh.cppm b/interfaces/Crafter.Graphics-Mesh.cppm index a2ef23c..d8385ad 100644 --- a/interfaces/Crafter.Graphics-Mesh.cppm +++ b/interfaces/Crafter.Graphics-Mesh.cppm @@ -60,3 +60,54 @@ export namespace Crafter { }; } #endif // !CRAFTER_GRAPHICS_WINDOW_DOM + +#ifdef CRAFTER_GRAPHICS_WINDOW_DOM +import std; +import Crafter.Math; +import :WebGPU; + +export namespace Crafter { + // Software-RT BLAS node, packed to 32 bytes. Matches the WGSL + // `BVHNode` struct in the RT WGSL prelude (additional/dom-webgpu.js, + // rtWgslPrelude) byte-for-byte. + // + // primCount == 0 → inner node, children at indices + // firstChildOrPrim and firstChildOrPrim+1. + // primCount > 0 → leaf, `primCount` primitives starting at + // primIndex `firstChildOrPrim` in the + // global primRemap heap. + // + // SAH-built BVH2; constructed CPU-side at Build() time, never refit. + struct BVHNode { + float aabbMin[3]; + std::uint32_t firstChildOrPrim; + float aabbMax[3]; + std::uint32_t primCount; + }; + static_assert(sizeof(BVHNode) == 32); + + class Mesh { + public: + // BLAS "handle": opaque identity that goes into + // RTInstance::accelerationStructureReference. Set by Build() to a + // stable u32 (widened to u64 for Vulkan-struct layout parity), used + // by the WebGPU TLAS-build compute shader to look up the BLAS root + // AABB and per-mesh heap offsets. Handle 0 is the unassigned + // sentinel; never returned by Build(). + std::uint64_t blasAddr = 0; + std::uint32_t triangleCount = 0; + + bool opaque = true; + + // Build BLAS from raw triangle data. Runs the CPU SAH BVH2 builder + // and forwards vertex/index/BVH/remap arrays to the JS-side mesh + // heap (additional/dom-webgpu.js), which queue.writeBuffers them + // into the global heaps and records the per-mesh offsets keyed by + // the returned handle. The `cmd` parameter is unused on WebGPU — + // kept for API symmetry with the Vulkan signature. + void Build(std::span> vertices, + std::span indices, + WebGPUCommandEncoderRef cmd = 0); + }; +} +#endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm new file mode 100644 index 0000000..b3df012 --- /dev/null +++ b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm @@ -0,0 +1,51 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net +*/ + +// DOM-mode RT pipeline. Mirrors PipelineRTVulkan's surface — Init takes +// the same kind of (raygen, miss, hit) shader-group spans plus an SBT. +// The big difference is implementation: there's no native RT pipeline on +// WebGPU, so Init assembles a single megakernel WGSL by concatenating +// 1. library prelude (types, bindings, ray-flag constants) +// 2. user closesthit / anyhit / miss source files +// 3. library mega-switches dispatched on per-hit hit-group index +// 4. library helpers (rayAabb / rayTriangle / traverseBlas / traverseTlas) +// 5. library traceRay function +// 6. user raygen source files +// 7. @compute entry calling the registered raygen +// and hands the result to wgpuLoadRTPipeline. +// +// The library WGSL itself lives in additional/dom-webgpu.js (rtWgslPrelude +// + rtWgslDispatchTemplate). C++ side only knows the substitution markers. + +export module Crafter.Graphics:PipelineRTWebGPU; +#ifdef CRAFTER_GRAPHICS_WINDOW_DOM +import std; +import :RT; +import :WebGPU; +import :ShaderBindingTableWebGPU; + +export namespace Crafter { + class PipelineRTWebGPU { + public: + std::uint32_t pipelineHandle = 0; + + // Build the megakernel pipeline. Groups carry indices into + // `sbt.shaders`. The library generates one `case` per registered + // group: closest-hit groups dispatch to their closestHitShader's + // entryFn, miss groups to their generalShader's entryFn, etc. + // The `cmd` parameter is unused on WebGPU; kept for API symmetry. + void Init(WebGPUCommandEncoderRef cmd, + std::span raygenGroups, + std::span missGroups, + std::span hitGroups, + const ShaderBindingTableWebGPU& sbt); + + PipelineRTWebGPU() = default; + PipelineRTWebGPU(const PipelineRTWebGPU&) = delete; + PipelineRTWebGPU& operator=(const PipelineRTWebGPU&) = delete; + }; +} +#endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-RT.cppm b/interfaces/Crafter.Graphics-RT.cppm new file mode 100644 index 0000000..77be96d --- /dev/null +++ b/interfaces/Crafter.Graphics-RT.cppm @@ -0,0 +1,83 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License version 3.0 as published by the Free Software Foundation; +*/ + +// Portable RT types & constants. +// +// Native: aliases the Vulkan struct so existing code that passes +// `RenderingElement3D::instance` directly into vkCmdBuildAccelerationStructuresKHR +// is a no-op layout-wise. +// DOM: provides a POD with the same byte layout + the same field names, so +// user code touching `instance.mask`, `instance.flags`, `instance.transform` +// etc. compiles unchanged. +// +// Flag constants are spelled out as Crafter::kRT* so portable user code can +// avoid referencing VK_* on the DOM target. The values match +// VkGeometryInstanceFlagBitsKHR / VkRayTracingShaderGroupTypeKHR so the +// constants compare-equal on Vulkan if the user wants to mix surfaces. + +module; +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM +#include "vulkan/vulkan.h" +#endif +export module Crafter.Graphics:RT; +import std; + +export namespace Crafter { +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM + using RTTransformMatrix = VkTransformMatrixKHR; + using RTInstance = VkAccelerationStructureInstanceKHR; +#else + // Mirrors VkTransformMatrixKHR: row-major affine 3x4. + struct RTTransformMatrix { + float matrix[3][4]; + }; + static_assert(sizeof(RTTransformMatrix) == 48); + + // Mirrors VkAccelerationStructureInstanceKHR byte-for-byte. + // On WebGPU the `accelerationStructureReference` slot holds the BLAS + // handle returned by MeshWebGPU::blasHandle (a u32 widened to u64). + struct RTInstance { + RTTransformMatrix transform; + std::uint32_t instanceCustomIndex : 24; + std::uint32_t mask : 8; + std::uint32_t instanceShaderBindingTableRecordOffset : 24; + std::uint32_t flags : 8; + std::uint64_t accelerationStructureReference; + }; + static_assert(sizeof(RTInstance) == 64); +#endif + + // VkGeometryInstanceFlagBitsKHR mirror. Values verbatim so equal on both. + inline constexpr std::uint8_t kRTGeometryInstanceTriangleFacingCullDisable = 0x1; + inline constexpr std::uint8_t kRTGeometryInstanceTriangleFlipFacing = 0x2; + inline constexpr std::uint8_t kRTGeometryInstanceForceOpaque = 0x4; + inline constexpr std::uint8_t kRTGeometryInstanceForceNoOpaque = 0x8; + + // Hit-group identification. Matches VkRayTracingShaderGroupTypeKHR for + // the two types we actually support (general + triangles-hit). + enum class RTShaderGroupType : std::uint8_t { + General = 0, // raygen / miss / callable + TrianglesHitGroup = 1, + }; + + // Cross-backend description of one entry in the shader-group array + // passed to PipelineRT::Init. Mirrors the meaningful subset of + // VkRayTracingShaderGroupCreateInfoKHR: per group, the type and the + // indices (into the SBT's shader array) for general / closestHit / + // anyHit, with kRTShaderUnused == VK_SHADER_UNUSED_KHR for "none". + inline constexpr std::uint32_t kRTShaderUnused = 0xFFFFFFFFu; + + struct RTShaderGroup { + RTShaderGroupType type = RTShaderGroupType::General; + std::uint32_t generalShader = kRTShaderUnused; + std::uint32_t closestHitShader = kRTShaderUnused; + std::uint32_t anyHitShader = kRTShaderUnused; + }; +} diff --git a/interfaces/Crafter.Graphics-RTPass.cppm b/interfaces/Crafter.Graphics-RTPass.cppm index ada74cf..78064c0 100644 --- a/interfaces/Crafter.Graphics-RTPass.cppm +++ b/interfaces/Crafter.Graphics-RTPass.cppm @@ -46,3 +46,42 @@ export namespace Crafter { }; } #endif // !CRAFTER_GRAPHICS_WINDOW_DOM + +#ifdef CRAFTER_GRAPHICS_WINDOW_DOM +import std; +import :RenderPass; +import :Window; +import :WebGPU; +import :PipelineRTWebGPU; +import :RenderingElement3D; + +export namespace Crafter { + // DOM-mode RT pass — dispatches the megakernel pipeline at frame Record + // time. Picks up the current TLAS for the frame and the application's + // raygen-side push data (typically empty in v1; pass via window.passes + // wiring if needed later). + struct RTPass : RenderPass { + PipelineRTWebGPU* pipeline; + // Optional per-dispatch push data forwarded after the standard + // RTDispatchHeader. Null means "no extra data". + const void* pushPtr = nullptr; + std::uint32_t pushBytes = 0; + + RTPass(PipelineRTWebGPU* p) : pipeline(p) {} + + void Record(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t frameIdx, Window& window) override { + const std::uint32_t gx = (window.width + 7u) / 8u; + const std::uint32_t gy = (window.height + 7u) / 8u; + auto& tlas = RenderingElement3D::tlases[frameIdx]; + WebGPU::wgpuDispatchRT( + pipeline->pipelineHandle, + pushPtr, + static_cast(pushBytes), + tlas.buffer.handle, + static_cast(tlas.builtInstanceCount), + static_cast(gx), + static_cast(gy)); + } + }; +} +#endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-RenderingElement3D.cppm b/interfaces/Crafter.Graphics-RenderingElement3D.cppm index 6cbb19b..9756745 100644 --- a/interfaces/Crafter.Graphics-RenderingElement3D.cppm +++ b/interfaces/Crafter.Graphics-RenderingElement3D.cppm @@ -22,6 +22,7 @@ module; #include "vulkan/vulkan.h" #endif // !CRAFTER_GRAPHICS_WINDOW_DOM export module Crafter.Graphics:RenderingElement3D; +import :RT; #ifndef CRAFTER_GRAPHICS_WINDOW_DOM import std; import :Mesh; @@ -55,7 +56,7 @@ export namespace Crafter { class RenderingElement3D { public: - VkAccelerationStructureInstanceKHR instance; + RTInstance instance; // Position in `elements`, maintained by Add/Remove for O(1) swap-and-pop. // Sentinel value = not currently registered. std::uint32_t indexInElements = std::numeric_limits::max(); @@ -87,3 +88,63 @@ export namespace Crafter { }; } #endif // !CRAFTER_GRAPHICS_WINDOW_DOM + +#ifdef CRAFTER_GRAPHICS_WINDOW_DOM +import std; +import :Mesh; +import :WebGPU; +import :WebGPUBuffer; +import :Window; + +export namespace Crafter { + // Per-frame TLAS storage. WebGPU has no real swapchain frame count + // (Window::numFrames = 1 on DOM), so this is effectively a singleton — + // the array form is kept for API symmetry with the Vulkan side so user + // code that indexes `tlases[frameIdx]` ports unchanged. + struct TlasWithBuffer { + // Host-visible instance buffer holding RTInstance entries — same + // layout as Vulkan's VkAccelerationStructureInstanceKHR, so user + // code touching .instance.mask / .flags / .transform.matrix is + // identical across backends. Also bound as a storage SSBO so + // application compute shaders (e.g. physics-tlas-transform.comp.wgsl) + // can write the .transform field directly when + // RenderingElement3D::transformOwnedByGpu is set. + WebGPUBuffer instanceBuffer; + // Per-instance application metadata; parallel to instanceBuffer, + // identical semantics to the Vulkan-side counterpart. + WebGPUBuffer metadataBuffer; + // GPU-built TLAS data: one TLASEntry per instance, written each + // BuildTLAS by a compute pass on the JS bridge. Read by traceRay / + // rayQuery as `@group(1) @binding(0) tlas: array`. + // TLASEntry layout: 96 bytes — aabbMin (12) + maskHGoffset (4) + + // aabbMax (12) + blasHandle (4) + invTransform 3x4 mat (48) + + // customIndex (4) + _pad (12). Defined in the WGSL traversal + // library; never directly read by C++. + WebGPUBuffer buffer; + + std::uint32_t builtInstanceCount = 0; + }; + + class RenderingElement3D { + public: + RTInstance instance{}; + std::uint32_t indexInElements = std::numeric_limits::max(); + std::uint32_t userMetadata = 0; + // Application compute shader writes the transform field of this + // element's instanceBuffer slot directly — BuildTLAS preserves it. + bool transformOwnedByGpu = false; + + static std::vector elements; + inline static TlasWithBuffer tlases[Window::numFrames]; + + // Repopulate the TLAS for frame `index`. WebGPU path always does + // a fresh build (no refit) — the GPU build pass is cheap at the + // ~10–100 instance counts the design targets; LBVH-for-TLAS is a + // future optimization for larger scenes. + static void BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index); + + static void Add(RenderingElement3D* e); + static void Remove(RenderingElement3D* e); + }; +} +#endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm b/interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm new file mode 100644 index 0000000..c175a52 --- /dev/null +++ b/interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm @@ -0,0 +1,64 @@ +/* +Crafter®.Graphics +Copyright (C) 2026 Catcrafts® +catcrafts.net +*/ + +// DOM-mode shader-binding-table analog. Stores raw WGSL source strings +// plus an explicit entry-function name per shader. PipelineRTWebGPU::Init +// concatenates these into the megakernel WGSL at pipeline-build time. + +export module Crafter.Graphics:ShaderBindingTableWebGPU; +#ifdef CRAFTER_GRAPHICS_WINDOW_DOM +import std; + +export namespace Crafter { + enum class WebGPURTStage : std::uint8_t { + Raygen = 0, + Miss = 1, + ClosestHit = 2, + AnyHit = 3, + }; + + // One WGSL shader source + the function name PipelineRTWebGPU should + // call from the megakernel switch. The source may declare any helper + // functions and (in exactly one raygen file) the `Payload` struct. + // + // Required signatures inside `source` for `entryFn`: + // Raygen: fn (gid: vec3) + // Miss: fn (ray: RayDesc, payload: ptr) + // ClosestHit: fn (ray: RayDesc, hit: HitInfo, payload: ptr) + // AnyHit: fn (ray: RayDesc, hit: HitInfo, payload: ptr) -> u32 + // returns RT_ANYHIT_ACCEPT / RT_ANYHIT_IGNORE / RT_ANYHIT_END_SEARCH. + // + // `RayDesc`, `HitInfo`, the `RT_*` flag/return constants, the `tlas` / + // BLAS / mesh-record bindings, and the `traceRay` function are all + // injected by the library prelude — see the rtWgslPrelude block in + // additional/dom-webgpu.js. + struct WebGPUShader { + std::string source; + std::string entryFn; + WebGPURTStage stage = WebGPURTStage::Raygen; + + WebGPUShader() = default; + WebGPUShader(std::string src, std::string fn, WebGPURTStage s) + : source(std::move(src)), entryFn(std::move(fn)), stage(s) {} + + // Construct from a WGSL source file path. Reads via the WASI VFS + // so apps shipping their shaders as static files (see the + // `cfg.files.emplace_back("raygen.wgsl")` pattern in + // examples/VulkanTriangle/project.cpp) get them at runtime. + WebGPUShader(const std::filesystem::path& wgslPath, + std::string fn, + WebGPURTStage s); + }; + + class ShaderBindingTableWebGPU { + public: + std::vector shaders; + void Init(std::span shaders_) { + shaders.assign(shaders_.begin(), shaders_.end()); + } + }; +} +#endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-WebGPU.cppm b/interfaces/Crafter.Graphics-WebGPU.cppm index 6831322..616c8fe 100644 --- a/interfaces/Crafter.Graphics-WebGPU.cppm +++ b/interfaces/Crafter.Graphics-WebGPU.cppm @@ -73,13 +73,62 @@ namespace Crafter::WebGPU { std::uint32_t atlasHandle, std::uint32_t sampHandle); // ─── custom user-authored compute shaders ─────────────────────────── + // rayQueryFlag = 1 swaps group(1) from the UI ping-pong pair to the RT + // data heaps (TLAS, BVH, meshRecs, verts, idx, primRemap, outImage) and + // prepends a WGSL prelude exposing the rayQuery* API. Shaders that set + // this MUST NOT declare their own @group(1) bindings. __attribute__((import_module("env"), import_name("wgpuLoadCustomShader"))) extern "C" std::uint32_t wgpuLoadCustomShader(const void* wgslPtr, std::int32_t wgslLen, - const void* bindingsPtr, std::int32_t bindingsCount); + const void* bindingsPtr, std::int32_t bindingsCount, + std::int32_t rayQueryFlag); __attribute__((import_module("env"), import_name("wgpuDispatchCustom"))) extern "C" void wgpuDispatchCustom(std::uint32_t pipelineHandle, const void* pushPtr, std::int32_t pushBytes, const void* handlesPtr, std::int32_t handlesCount, std::int32_t gx, std::int32_t gy, std::int32_t gz); + + // ─── software raytracing ─────────────────────────────────────────── + // + // Mesh::Build forwards vertex / index / BVH-node / primRemap arrays + // to the JS bridge, which queue.writeBuffers them into the global + // RT mesh heaps (growing if needed) and records the per-mesh offsets + // under a freshly-allocated u32 handle. The handle is what user code + // stores in RTInstance::accelerationStructureReference; the WebGPU + // TLAS-build compute shader resolves it back to root AABB + heap + // offsets at dispatch time. Returns 0 on failure. + __attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS"))) + extern "C" std::uint32_t wgpuRegisterMeshBLAS( + float minX, float minY, float minZ, + float maxX, float maxY, float maxZ, + const void* verticesPtr, std::int32_t vertexCount, + const void* indicesPtr, std::int32_t indexCount, + const void* bvhNodesPtr, std::int32_t bvhNodeCount, + const void* primRemapPtr, std::int32_t primRemapCount); + + // RT pipeline build. The library composes WGSL by concatenating the + // traversal library, generated hit-group switches, and the user- + // supplied raygen / miss / closesthit / anyhit bodies. Returns an + // opaque pipeline handle. + __attribute__((import_module("env"), import_name("wgpuLoadRTPipeline"))) + extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen); + + // Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched + // over a (gx, gy) tile grid; the library writes the push data (camera, + // payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS + // + global mesh heap, and runs one workgroup per 8x8 screen tile. + __attribute__((import_module("env"), import_name("wgpuDispatchRT"))) + extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle, + const void* pushPtr, std::int32_t pushBytes, + std::uint32_t tlasBufHandle, + std::int32_t instanceCount, + std::int32_t gx, std::int32_t gy); + + // GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or + // GPU-written), produces per-instance world-space AABBs + per-instance + // transform matrices in a flat tlasBuf SSBO consumed by traceRay / rayQuery. + __attribute__((import_module("env"), import_name("wgpuBuildTLAS"))) + extern "C" void wgpuBuildTLAS(std::uint32_t instanceBufHandle, + std::int32_t instanceCount, + std::uint32_t tlasOutBufHandle); } #endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm index fac065f..867bb26 100644 --- a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm +++ b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm @@ -49,6 +49,7 @@ export namespace Crafter { class WebGPUComputeShader { public: std::uint32_t pipelineHandle = 0; + bool rayQueryCapable = false; std::vector customBindings; WebGPUComputeShader() = default; @@ -56,22 +57,28 @@ export namespace Crafter { WebGPUComputeShader& operator=(const WebGPUComputeShader&) = delete; WebGPUComputeShader(WebGPUComputeShader&& o) noexcept : pipelineHandle(o.pipelineHandle), + rayQueryCapable(o.rayQueryCapable), customBindings(std::move(o.customBindings)) { o.pipelineHandle = 0; } // Compile + link a custom compute shader. `wgsl` is the source - // string; the library does NOT add anything to it — the user's - // shader must declare @group(0)/@group(1) bindings matching the - // contract above. `bindings` lists every additional resource - // (groups 2+) that the renderer should bind at dispatch time. + // string; the library does NOT add anything to it (except when + // `rayQuery` is true — then a RT prelude exposing the rayQuery* + // API is prepended). The user's shader must declare + // @group(0)/@group(1) bindings matching the contract above + // (rayQuery shaders MUST NOT redeclare group(1)). + // `bindings` lists every additional resource (groups 2+) that the + // renderer should bind at dispatch time. void Load(std::string_view wgsl, - std::span bindings = {}); + std::span bindings = {}, + bool rayQuery = false); // Path-based overload for symmetry with the Vulkan ComputeShader. // Reads the file from disk (browser VFS) and forwards to Load(wgsl). void Load(const std::filesystem::path& wgslPath, - std::span bindings = {}); + std::span bindings = {}, + bool rayQuery = false); }; } #endif // CRAFTER_GRAPHICS_WINDOW_DOM diff --git a/interfaces/Crafter.Graphics.cppm b/interfaces/Crafter.Graphics.cppm index 8b9f6fd..b48f029 100644 --- a/interfaces/Crafter.Graphics.cppm +++ b/interfaces/Crafter.Graphics.cppm @@ -58,6 +58,10 @@ export import :UIComponents; export import :InputField; export import :Decompress; +// Portable RT type aliases (provided on both targets — uses Vulkan +// structs natively, plain PODs of the same layout on DOM). +export import :RT; + // DOM-only partitions — empty under native. export import :Dom; export import :DomEvents; @@ -66,3 +70,5 @@ export import :WebGPU; export import :WebGPUBuffer; export import :DescriptorHeapWebGPU; export import :WebGPUComputeShader; +export import :ShaderBindingTableWebGPU; +export import :PipelineRTWebGPU; diff --git a/project.cpp b/project.cpp index 06f64ac..c1a162c 100644 --- a/project.cpp +++ b/project.cpp @@ -131,7 +131,7 @@ extern "C" Configuration CrafterBuildProject(std::span a // when its body is gated out. Vulkan-typed partitions stub to empty // modules under CRAFTER_GRAPHICS_WINDOW_DOM; the Dom/DomEvents/Router // partitions stub to empty modules in the opposite direction. - std::array ifaces = { + std::array ifaces = { "interfaces/Crafter.Graphics", "interfaces/Crafter.Graphics-Animation", "interfaces/Crafter.Graphics-Clipboard", @@ -153,12 +153,15 @@ extern "C" Configuration CrafterBuildProject(std::span a "interfaces/Crafter.Graphics-Keys", "interfaces/Crafter.Graphics-Mesh", "interfaces/Crafter.Graphics-PipelineRTVulkan", + "interfaces/Crafter.Graphics-PipelineRTWebGPU", "interfaces/Crafter.Graphics-RenderingElement3D", "interfaces/Crafter.Graphics-RenderPass", "interfaces/Crafter.Graphics-Router", + "interfaces/Crafter.Graphics-RT", "interfaces/Crafter.Graphics-RTPass", "interfaces/Crafter.Graphics-SamplerVulkan", "interfaces/Crafter.Graphics-ShaderBindingTableVulkan", + "interfaces/Crafter.Graphics-ShaderBindingTableWebGPU", "interfaces/Crafter.Graphics-ShaderVulkan", "interfaces/Crafter.Graphics-Types", "interfaces/Crafter.Graphics-UI", @@ -175,14 +178,18 @@ extern "C" Configuration CrafterBuildProject(std::span a // DOM impl set. UI-Shared.cpp is backend-agnostic; UI-WebGPU.cpp // is the DOM-only implementation of UIRenderer's GPU-touching // methods. Font / FontAtlas / UIComponents are now portable. - std::array domImpls = { + std::array domImpls = { "implementations/Crafter.Graphics-Clipboard", "implementations/Crafter.Graphics-Dom", "implementations/Crafter.Graphics-Font", "implementations/Crafter.Graphics-FontAtlas", "implementations/Crafter.Graphics-Gamepad", "implementations/Crafter.Graphics-Input", + "implementations/Crafter.Graphics-Mesh-WebGPU", + "implementations/Crafter.Graphics-PipelineRTWebGPU", + "implementations/Crafter.Graphics-RenderingElement3D-WebGPU", "implementations/Crafter.Graphics-Router", + "implementations/Crafter.Graphics-ShaderBindingTableWebGPU", "implementations/Crafter.Graphics-UI-Shared", "implementations/Crafter.Graphics-UI-WebGPU", "implementations/Crafter.Graphics-UIComponents",