feat(webgpu-rt): emit intersection/any-hit dispatch + build AABB BVH

PipelineRTWebGPU emits a runIntersection mega-switch and the
RT_HAS_ANYHIT / RT_HAS_INTERSECTION consts (+ the @CRAFTER_RT_TRACE_USER
marker) that gate the library's new TRACE-stage user callbacks, so an
opaque triangle-only scene still const-folds them away. Mesh-WebGPU
builds a SAH BVH2 over AABB primitives and uploads them in primitive
order for the intersection shader to fetch.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
catbot 2026-06-02 22:09:20 +00:00
commit a91603c70b
2 changed files with 116 additions and 1 deletions

View file

@ -213,6 +213,25 @@ namespace {
nodes.emplace_back(); nodes.emplace_back();
BuildRecursive(0, 0, triCount); BuildRecursive(0, 0, triCount);
} }
// AABB (procedural) geometry: one PrimRef per box, the box itself
// is the primitive bound. The same SAH BVH2 then partitions them.
void BuildFromAabbs(std::span<const RTAabb> aabbs) {
std::uint32_t count = static_cast<std::uint32_t>(aabbs.size());
prims.resize(count);
for (std::uint32_t i = 0; i < count; ++i) {
auto& pr = prims[i];
pr.box.Extend(aabbs[i].min);
pr.box.Extend(aabbs[i].max);
pr.centroid[0] = (pr.box.lo[0] + pr.box.hi[0]) * 0.5f;
pr.centroid[1] = (pr.box.lo[1] + pr.box.hi[1]) * 0.5f;
pr.centroid[2] = (pr.box.lo[2] + pr.box.hi[2]) * 0.5f;
pr.triIndex = i;
}
nodes.reserve(count * 2);
nodes.emplace_back();
BuildRecursive(0, 0, count);
}
}; };
} }
@ -243,7 +262,10 @@ namespace {
indices.data(), static_cast<std::int32_t>(indices.size()), indices.data(), static_cast<std::int32_t>(indices.size()),
builder.nodes.data(), static_cast<std::int32_t>(builder.nodes.size()), builder.nodes.data(), static_cast<std::int32_t>(builder.nodes.size()),
primRemap.data(), static_cast<std::int32_t>(primRemap.size()), primRemap.data(), static_cast<std::int32_t>(primRemap.size()),
attribsBytes.data(), static_cast<std::int32_t>(attribsBytes.size())); attribsBytes.data(), static_cast<std::int32_t>(attribsBytes.size()),
/*geomType*/ 0,
/*opaqueFlag*/ mesh.opaque ? 1 : 0,
/*primCount*/ static_cast<std::int32_t>(mesh.triangleCount));
} }
} }
@ -273,3 +295,43 @@ void Mesh::Build(const CompressedMeshAsset& asset,
BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes)); BuildBVHAndRegister(*this, vertices, indices, std::span(dataBytes));
} }
void Mesh::BuildProcedural(std::span<const RTAabb> aabbs,
bool opaque_,
WebGPUCommandEncoderRef /*cmd*/) {
const std::uint32_t count = static_cast<std::uint32_t>(aabbs.size());
opaque = opaque_;
triangleCount = 0; // not a triangle mesh
vertexCount = count * 2; // 2 "vertices" (min,max) per box
Builder builder;
builder.BuildFromAabbs(aabbs);
// The AABB stream is uploaded in *original* primitive order (2 vec3 per
// box). primRemap maps each BVH leaf slot back to its original index, so
// the intersection shader's _rtFetchAabb(meshRec, primId) reads the
// right box — exactly mirroring how the triangle path indexes vertices.
std::vector<Vector<float, 3, 3>> boxVerts(count * 2);
for (std::uint32_t i = 0; i < count; ++i) {
boxVerts[i*2 + 0] = Vector<float, 3, 3>{ aabbs[i].min[0], aabbs[i].min[1], aabbs[i].min[2] };
boxVerts[i*2 + 1] = Vector<float, 3, 3>{ aabbs[i].max[0], aabbs[i].max[1], aabbs[i].max[2] };
}
std::vector<std::uint32_t> primRemap(count);
for (std::uint32_t i = 0; i < count; ++i) {
primRemap[i] = builder.prims[i].triIndex;
}
const BVHNode& root = builder.nodes[0];
blasAddr = WebGPU::wgpuRegisterMeshBLAS(
root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
boxVerts.data(), static_cast<std::int32_t>(boxVerts.size()),
nullptr, 0,
builder.nodes.data(), static_cast<std::int32_t>(builder.nodes.size()),
primRemap.data(), static_cast<std::int32_t>(primRemap.size()),
nullptr, 0,
/*geomType*/ 1,
/*opaqueFlag*/ opaque ? 1 : 0,
/*primCount*/ static_cast<std::int32_t>(count));
}

View file

@ -36,6 +36,8 @@ namespace {
"fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 { return RT_ANYHIT_ACCEPT; }"; "fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 { return RT_ANYHIT_ACCEPT; }";
constexpr std::string_view kPlaceholderMiss = constexpr std::string_view kPlaceholderMiss =
"fn _crafter_default_miss(ray: RayDesc, payload: ptr<function, Payload>) {}"; "fn _crafter_default_miss(ray: RayDesc, payload: ptr<function, Payload>) {}";
constexpr std::string_view kPlaceholderIntersection =
"fn _crafter_default_intersection(ray: RayDesc, aabbMin: vec3<f32>, aabbMax: vec3<f32>, primitiveId: u32) -> IntersectionResult { var r: IntersectionResult; r.hit = false; return r; }";
void AppendCase(std::string& out, void AppendCase(std::string& out,
std::uint32_t hitGroupIndex, std::uint32_t hitGroupIndex,
@ -60,6 +62,17 @@ namespace {
out += entryFn; out += entryFn;
out += "(ray, hit, payload); }\n"; out += "(ray, hit, payload); }\n";
} }
// intersection has a return type — forwards the AABB args + the result.
void AppendIntersectionCase(std::string& out,
std::uint32_t hitGroupIndex,
std::string_view entryFn) {
out += " case ";
out += std::to_string(hitGroupIndex);
out += "u: { return ";
out += entryFn;
out += "(ray, aabbMin, aabbMax, primitiveId); }\n";
}
} }
void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/,
@ -150,6 +163,46 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/,
wgsl += " }\n"; wgsl += " }\n";
wgsl += "}\n"; wgsl += "}\n";
// runIntersection — per-AABB procedural intersection dispatch. For a
// ProceduralHitGroup the intersection shader determines the hit; for
// triangle groups (or groups with no intersection shader) the default
// reports no hit, so the BLAS leaf falls back to the triangle path.
wgsl += "\nfn runIntersection(hg: u32, ray: RayDesc, aabbMin: vec3<f32>, aabbMax: vec3<f32>, primitiveId: u32) -> IntersectionResult {\n";
wgsl += " switch hg {\n";
bool anyIntersection = false;
for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
const auto& g = hitGroups[i];
if (g.intersectionShader == kRTShaderUnused) continue;
if (g.intersectionShader >= sbt.shaders.size()) continue;
const auto& fn = sbt.shaders[g.intersectionShader].entryFn;
AppendIntersectionCase(wgsl, i, fn);
anyIntersection = true;
}
if (!anyIntersection) wgsl += " // (no intersection shaders registered)\n";
wgsl += " default: { }\n";
wgsl += " }\n";
wgsl += " var none: IntersectionResult;\n";
wgsl += " none.hit = false;\n";
wgsl += " return none;\n";
wgsl += "}\n";
// Trace-time capability flags. The library traversal (injected at the
// marker below) gates its any-hit / intersection callbacks on these
// consts, so a triangle-only opaque scene dead-strips all user code out
// of TRACE and keeps its zero-user-code register footprint. When either
// is set the JS side also gives the TRACE pipeline the user bind-group
// layout (so any-hit / intersection shaders can sample @group(3+)
// resources) — it scans for the exact `@CRAFTER_RT_TRACE_USER` marker.
wgsl += "\nconst RT_HAS_ANYHIT: bool = ";
wgsl += (anyAnyhit ? "true" : "false");
wgsl += ";\n";
wgsl += "const RT_HAS_INTERSECTION: bool = ";
wgsl += (anyIntersection ? "true" : "false");
wgsl += ";\n";
if (anyAnyhit || anyIntersection) {
wgsl += "// @CRAFTER_RT_TRACE_USER = true\n";
}
// runResolve — RESOLVE-stage tonemap hook. The first registered // runResolve — RESOLVE-stage tonemap hook. The first registered
// Resolve shader wins; with none, identity passthrough (alpha forced // Resolve shader wins; with none, identity passthrough (alpha forced
// to 1) so the wavefront output matches a megakernel that wrote raw // to 1) so the wavefront output matches a megakernel that wrote raw