From a91603c70b20ff2d5705070d439ff839de72b43c Mon Sep 17 00:00:00 2001
From: catbot <catbot@bot.local>
Date: Tue, 2 Jun 2026 22:09:20 +0000
Subject: [PATCH] feat(webgpu-rt): emit intersection/any-hit dispatch + build
 AABB BVH

PipelineRTWebGPU emits a runIntersection mega-switch and the
RT_HAS_ANYHIT / RT_HAS_INTERSECTION consts (+ the @CRAFTER_RT_TRACE_USER
marker) that gate the library's new TRACE-stage user callbacks, so an
opaque triangle-only scene still const-folds them away. Mesh-WebGPU
builds a SAH BVH2 over AABB primitives and uploads them in primitive
order for the intersection shader to fetch.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../Crafter.Graphics-Mesh-WebGPU.cpp          | 64 ++++++++++++++++++-
 .../Crafter.Graphics-PipelineRTWebGPU.cpp     | 53 +++++++++++++++
 2 files changed, 116 insertions(+), 1 deletion(-)
diff --git a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
index 2ebd128..69f3c9a 100644
--- a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
+++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
@@ -213,6 +213,25 @@ namespace {
             nodes.emplace_back();
             BuildRecursive(0, 0, triCount);
         }
+
+        // AABB (procedural) geometry: one PrimRef per box, the box itself
+        // is the primitive bound. The same SAH BVH2 then partitions them.
+        void BuildFromAabbs(std::span<const RTAabb> aabbs) {
+            std::uint32_t count = static_cast<std::uint32_t>(aabbs.size());
+            prims.resize(count);
+            for (std::uint32_t i = 0; i < count; ++i) {
+                auto& pr = prims[i];
+                pr.box.Extend(aabbs[i].min);
+                pr.box.Extend(aabbs[i].max);
+                pr.centroid[0] = (pr.box.lo[0] + pr.box.hi[0]) * 0.5f;
+                pr.centroid[1] = (pr.box.lo[1] + pr.box.hi[1]) * 0.5f;
+                pr.centroid[2] = (pr.box.lo[2] + pr.box.hi[2]) * 0.5f;
+                pr.triIndex = i;
+            }
+            nodes.reserve(count * 2);
+            nodes.emplace_back();
+            BuildRecursive(0, 0, count);
+        }
     };
 }
 
@@ -243,7 +262,10 @@ namespace {
             indices.data(),        static_cast<std::int32_t>(indices.size()),
             builder.nodes.data(),  static_cast<std::int32_t>(builder.nodes.size()),
             primRemap.data(),      static_cast<std::int32_t>(primRemap.size()),
-            attribsBytes.data(),   static_cast<std::int32_t>(attribsBytes.size()));
+            attribsBytes.data(),   static_cast<std::int32_t>(attribsBytes.size()),
+            /*geomType*/ 0,
+            /*opaqueFlag*/ mesh.opaque ? 1 : 0,
+            /*primCount*/ static_cast<std::int32_t>(mesh.triangleCount));
     }
 }
 
@@ -273,3 +295,43 @@ void Mesh::Build(const CompressedMeshAsset& asset,
 
     BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes));
 }
+
+void Mesh::BuildProcedural(std::span<const RTAabb> aabbs,
+                           bool                    opaque_,
+                           WebGPUCommandEncoderRef /*cmd*/) {
+    const std::uint32_t count = static_cast<std::uint32_t>(aabbs.size());
+    opaque        = opaque_;
+    triangleCount = 0;                 // not a triangle mesh
+    vertexCount   = count * 2;         // 2 "vertices" (min,max) per box
+
+    Builder builder;
+    builder.BuildFromAabbs(aabbs);
+
+    // The AABB stream is uploaded in *original* primitive order (2 vec3 per
+    // box). primRemap maps each BVH leaf slot back to its original index, so
+    // the intersection shader's _rtFetchAabb(meshRec, primId) reads the
+    // right box — exactly mirroring how the triangle path indexes vertices.
+    std::vector<Vector<float, 3, 3>> boxVerts(count * 2);
+    for (std::uint32_t i = 0; i < count; ++i) {
+        boxVerts[i*2 + 0] = Vector<float, 3, 3>{ aabbs[i].min[0], aabbs[i].min[1], aabbs[i].min[2] };
+        boxVerts[i*2 + 1] = Vector<float, 3, 3>{ aabbs[i].max[0], aabbs[i].max[1], aabbs[i].max[2] };
+    }
+
+    std::vector<std::uint32_t> primRemap(count);
+    for (std::uint32_t i = 0; i < count; ++i) {
+        primRemap[i] = builder.prims[i].triIndex;
+    }
+
+    const BVHNode& root = builder.nodes[0];
+    blasAddr = WebGPU::wgpuRegisterMeshBLAS(
+        root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
+        root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
+        boxVerts.data(),      static_cast<std::int32_t>(boxVerts.size()),
+        nullptr,              0,
+        builder.nodes.data(), static_cast<std::int32_t>(builder.nodes.size()),
+        primRemap.data(),     static_cast<std::int32_t>(primRemap.size()),
+        nullptr,              0,
+        /*geomType*/ 1,
+        /*opaqueFlag*/ opaque ? 1 : 0,
+        /*primCount*/ static_cast<std::int32_t>(count));
+}
diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
index 373249a..27642a5 100644
--- a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
+++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
@@ -36,6 +36,8 @@ namespace {
         "fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 { return RT_ANYHIT_ACCEPT; }";
     constexpr std::string_view kPlaceholderMiss =
         "fn _crafter_default_miss(ray: RayDesc, payload: ptr<function, Payload>) {}";
+    constexpr std::string_view kPlaceholderIntersection =
+        "fn _crafter_default_intersection(ray: RayDesc, aabbMin: vec3<f32>, aabbMax: vec3<f32>, primitiveId: u32) -> IntersectionResult { var r: IntersectionResult; r.hit = false; return r; }";
 
     void AppendCase(std::string& out,
                     std::uint32_t hitGroupIndex,
@@ -60,6 +62,17 @@ namespace {
         out += entryFn;
         out += "(ray, hit, payload); }\n";
     }
+
+    // intersection has a return type — forwards the AABB args + the result.
+    void AppendIntersectionCase(std::string& out,
+                                std::uint32_t hitGroupIndex,
+                                std::string_view entryFn) {
+        out += "        case ";
+        out += std::to_string(hitGroupIndex);
+        out += "u: { return ";
+        out += entryFn;
+        out += "(ray, aabbMin, aabbMax, primitiveId); }\n";
+    }
 }
 
 void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef                 /*cmd*/,
@@ -150,6 +163,46 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef                 /*cmd*/,
     wgsl += "    }\n";
     wgsl += "}\n";
 
+    // runIntersection — per-AABB procedural intersection dispatch. For a
+    // ProceduralHitGroup the intersection shader determines the hit; for
+    // triangle groups (or groups with no intersection shader) the default
+    // reports no hit, so the BLAS leaf falls back to the triangle path.
+    wgsl += "\nfn runIntersection(hg: u32, ray: RayDesc, aabbMin: vec3<f32>, aabbMax: vec3<f32>, primitiveId: u32) -> IntersectionResult {\n";
+    wgsl += "    switch hg {\n";
+    bool anyIntersection = false;
+    for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
+        const auto& g = hitGroups[i];
+        if (g.intersectionShader == kRTShaderUnused) continue;
+        if (g.intersectionShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.intersectionShader].entryFn;
+        AppendIntersectionCase(wgsl, i, fn);
+        anyIntersection = true;
+    }
+    if (!anyIntersection) wgsl += "        // (no intersection shaders registered)\n";
+    wgsl += "        default: { }\n";
+    wgsl += "    }\n";
+    wgsl += "    var none: IntersectionResult;\n";
+    wgsl += "    none.hit = false;\n";
+    wgsl += "    return none;\n";
+    wgsl += "}\n";
+
+    // Trace-time capability flags. The library traversal (injected at the
+    // marker below) gates its any-hit / intersection callbacks on these
+    // consts, so a triangle-only opaque scene dead-strips all user code out
+    // of TRACE and keeps its zero-user-code register footprint. When either
+    // is set the JS side also gives the TRACE pipeline the user bind-group
+    // layout (so any-hit / intersection shaders can sample @group(3+)
+    // resources) — it scans for the exact `@CRAFTER_RT_TRACE_USER` marker.
+    wgsl += "\nconst RT_HAS_ANYHIT: bool = ";
+    wgsl += (anyAnyhit ? "true" : "false");
+    wgsl += ";\n";
+    wgsl += "const RT_HAS_INTERSECTION: bool = ";
+    wgsl += (anyIntersection ? "true" : "false");
+    wgsl += ";\n";
+    if (anyAnyhit || anyIntersection) {
+        wgsl += "// @CRAFTER_RT_TRACE_USER = true\n";
+    }
+
     // runResolve — RESOLVE-stage tonemap hook. The first registered
     // Resolve shader wins; with none, identity passthrough (alpha forced
     // to 1) so the wavefront output matches a megakernel that wrote raw