From a91603c70b20ff2d5705070d439ff839de72b43c Mon Sep 17 00:00:00 2001 From: catbot Date: Tue, 2 Jun 2026 22:09:20 +0000 Subject: [PATCH] feat(webgpu-rt): emit intersection/any-hit dispatch + build AABB BVH PipelineRTWebGPU emits a runIntersection mega-switch and the RT_HAS_ANYHIT / RT_HAS_INTERSECTION consts (+ the @CRAFTER_RT_TRACE_USER marker) that gate the library's new TRACE-stage user callbacks, so an opaque triangle-only scene still const-folds them away. Mesh-WebGPU builds a SAH BVH2 over AABB primitives and uploads them in primitive order for the intersection shader to fetch. Co-Authored-By: Claude Opus 4.8 --- .../Crafter.Graphics-Mesh-WebGPU.cpp | 64 ++++++++++++++++++- .../Crafter.Graphics-PipelineRTWebGPU.cpp | 53 +++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp index 2ebd128..69f3c9a 100644 --- a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp +++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp @@ -213,6 +213,25 @@ namespace { nodes.emplace_back(); BuildRecursive(0, 0, triCount); } + + // AABB (procedural) geometry: one PrimRef per box, the box itself + // is the primitive bound. The same SAH BVH2 then partitions them. + void BuildFromAabbs(std::span aabbs) { + std::uint32_t count = static_cast(aabbs.size()); + prims.resize(count); + for (std::uint32_t i = 0; i < count; ++i) { + auto& pr = prims[i]; + pr.box.Extend(aabbs[i].min); + pr.box.Extend(aabbs[i].max); + pr.centroid[0] = (pr.box.lo[0] + pr.box.hi[0]) * 0.5f; + pr.centroid[1] = (pr.box.lo[1] + pr.box.hi[1]) * 0.5f; + pr.centroid[2] = (pr.box.lo[2] + pr.box.hi[2]) * 0.5f; + pr.triIndex = i; + } + nodes.reserve(count * 2); + nodes.emplace_back(); + BuildRecursive(0, 0, count); + } }; } @@ -243,7 +262,10 @@ namespace { indices.data(), static_cast(indices.size()), builder.nodes.data(), static_cast(builder.nodes.size()), primRemap.data(), static_cast(primRemap.size()), - attribsBytes.data(), static_cast(attribsBytes.size())); + attribsBytes.data(), static_cast(attribsBytes.size()), + /*geomType*/ 0, + /*opaqueFlag*/ mesh.opaque ? 1 : 0, + /*primCount*/ static_cast(mesh.triangleCount)); } } @@ -273,3 +295,43 @@ void Mesh::Build(const CompressedMeshAsset& asset, BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes)); } + +void Mesh::BuildProcedural(std::span aabbs, + bool opaque_, + WebGPUCommandEncoderRef /*cmd*/) { + const std::uint32_t count = static_cast(aabbs.size()); + opaque = opaque_; + triangleCount = 0; // not a triangle mesh + vertexCount = count * 2; // 2 "vertices" (min,max) per box + + Builder builder; + builder.BuildFromAabbs(aabbs); + + // The AABB stream is uploaded in *original* primitive order (2 vec3 per + // box). primRemap maps each BVH leaf slot back to its original index, so + // the intersection shader's _rtFetchAabb(meshRec, primId) reads the + // right box — exactly mirroring how the triangle path indexes vertices. + std::vector> boxVerts(count * 2); + for (std::uint32_t i = 0; i < count; ++i) { + boxVerts[i*2 + 0] = Vector{ aabbs[i].min[0], aabbs[i].min[1], aabbs[i].min[2] }; + boxVerts[i*2 + 1] = Vector{ aabbs[i].max[0], aabbs[i].max[1], aabbs[i].max[2] }; + } + + std::vector primRemap(count); + for (std::uint32_t i = 0; i < count; ++i) { + primRemap[i] = builder.prims[i].triIndex; + } + + const BVHNode& root = builder.nodes[0]; + blasAddr = WebGPU::wgpuRegisterMeshBLAS( + root.aabbMin[0], root.aabbMin[1], root.aabbMin[2], + root.aabbMax[0], root.aabbMax[1], root.aabbMax[2], + boxVerts.data(), static_cast(boxVerts.size()), + nullptr, 0, + builder.nodes.data(), static_cast(builder.nodes.size()), + primRemap.data(), static_cast(primRemap.size()), + nullptr, 0, + /*geomType*/ 1, + /*opaqueFlag*/ opaque ? 1 : 0, + /*primCount*/ static_cast(count)); +} diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp index 373249a..27642a5 100644 --- a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp +++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp @@ -36,6 +36,8 @@ namespace { "fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr) -> u32 { return RT_ANYHIT_ACCEPT; }"; constexpr std::string_view kPlaceholderMiss = "fn _crafter_default_miss(ray: RayDesc, payload: ptr) {}"; + constexpr std::string_view kPlaceholderIntersection = + "fn _crafter_default_intersection(ray: RayDesc, aabbMin: vec3, aabbMax: vec3, primitiveId: u32) -> IntersectionResult { var r: IntersectionResult; r.hit = false; return r; }"; void AppendCase(std::string& out, std::uint32_t hitGroupIndex, @@ -60,6 +62,17 @@ namespace { out += entryFn; out += "(ray, hit, payload); }\n"; } + + // intersection has a return type — forwards the AABB args + the result. + void AppendIntersectionCase(std::string& out, + std::uint32_t hitGroupIndex, + std::string_view entryFn) { + out += " case "; + out += std::to_string(hitGroupIndex); + out += "u: { return "; + out += entryFn; + out += "(ray, aabbMin, aabbMax, primitiveId); }\n"; + } } void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, @@ -150,6 +163,46 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, wgsl += " }\n"; wgsl += "}\n"; + // runIntersection — per-AABB procedural intersection dispatch. For a + // ProceduralHitGroup the intersection shader determines the hit; for + // triangle groups (or groups with no intersection shader) the default + // reports no hit, so the BLAS leaf falls back to the triangle path. + wgsl += "\nfn runIntersection(hg: u32, ray: RayDesc, aabbMin: vec3, aabbMax: vec3, primitiveId: u32) -> IntersectionResult {\n"; + wgsl += " switch hg {\n"; + bool anyIntersection = false; + for (std::uint32_t i = 0; i < hitGroups.size(); ++i) { + const auto& g = hitGroups[i]; + if (g.intersectionShader == kRTShaderUnused) continue; + if (g.intersectionShader >= sbt.shaders.size()) continue; + const auto& fn = sbt.shaders[g.intersectionShader].entryFn; + AppendIntersectionCase(wgsl, i, fn); + anyIntersection = true; + } + if (!anyIntersection) wgsl += " // (no intersection shaders registered)\n"; + wgsl += " default: { }\n"; + wgsl += " }\n"; + wgsl += " var none: IntersectionResult;\n"; + wgsl += " none.hit = false;\n"; + wgsl += " return none;\n"; + wgsl += "}\n"; + + // Trace-time capability flags. The library traversal (injected at the + // marker below) gates its any-hit / intersection callbacks on these + // consts, so a triangle-only opaque scene dead-strips all user code out + // of TRACE and keeps its zero-user-code register footprint. When either + // is set the JS side also gives the TRACE pipeline the user bind-group + // layout (so any-hit / intersection shaders can sample @group(3+) + // resources) — it scans for the exact `@CRAFTER_RT_TRACE_USER` marker. + wgsl += "\nconst RT_HAS_ANYHIT: bool = "; + wgsl += (anyAnyhit ? "true" : "false"); + wgsl += ";\n"; + wgsl += "const RT_HAS_INTERSECTION: bool = "; + wgsl += (anyIntersection ? "true" : "false"); + wgsl += ";\n"; + if (anyAnyhit || anyIntersection) { + wgsl += "// @CRAFTER_RT_TRACE_USER = true\n"; + } + // runResolve — RESOLVE-stage tonemap hook. The first registered // Resolve shader wins; with none, identity passthrough (alpha forced // to 1) so the wavefront output matches a megakernel that wrote raw