WebGPU RT: remove dead megakernel WGSL (no dual path)

The RT pipeline now only builds the wavefront kernels, so the old
single-megakernel traversal/traceRay block (rtWgslMegakernelHelpers) and
the unused rtWgslPrelude alias are dead. Remove them. The rayQuery compute
path keeps rtWgslMegakernelBindings (its own _rq* traversal uses it).
RTStress still renders correctly with the trimmed prelude.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
catbot 2026-05-31 20:24:04 +00:00
commit 82e5e867d4

View file

@ -1439,7 +1439,6 @@ const TLAS_BVH_N_PADDED: u32 = 16384u;
const TLAS_BVH_LEAVES_START: u32 = TLAS_BVH_N_PADDED - 1u;
`;
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
// Injected after the user-supplied closesthit/anyhit/miss sources +
@ -1526,204 +1525,6 @@ fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f
}
`;
// Megakernel-only helpers: traversal routines that invoke runAnyHit /
// runClosestHit / runMiss (emitted by the megakernel SBT switch) and
// `traceRay` that closes over them. Only the raygen-pipeline path
// prepends this.
const rtWgslMegakernelHelpers = String.raw`
// Iterative stack-based BLAS traversal. Returns true if traversal was
// terminated by an END_SEARCH from anyhit (caller should stop entirely).
fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
instanceId: u32, hitGroupBase: u32,
bestHit: ptr<function, HitInfo>,
bestT: ptr<function, f32>,
payload: ptr<function, Payload>) -> bool {
let invD = vec3<f32>(1.0) / rayObj.direction;
var stack: array<u32, 32>;
var sp: u32 = 0u;
var nodeRel: u32 = 0u;
loop {
let abs = meshRec.bvhOffset + nodeRel;
let node = bvhNodes[abs];
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
if (node.primCount > 0u) {
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
let verts = _rtFetchTri(meshRec, triIndex);
let tr = _rtTri(rayObj.origin, rayObj.direction,
verts[0], verts[1], verts[2],
rayObj.tMin, *bestT);
if (!tr.hit) { continue; }
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
let facing = dot(geomNormal, rayObj.direction);
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
var candidate: HitInfo;
candidate.t = tr.t;
candidate.instanceId = instanceId;
candidate.primitiveId = triIndex;
candidate.hitGroupIndex = hitGroupBase;
candidate.attribs = vec2<f32>(tr.u, tr.v);
candidate.objectRayOrigin = rayObj.origin;
candidate.objectRayDirection = rayObj.direction;
let opaque = (flags & RT_FLAG_OPAQUE) != 0u
|| (flags & RT_FLAG_NO_OPAQUE) == 0u; // default opaque
if (opaque) {
*bestHit = candidate;
*bestT = tr.t;
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
} else {
let r = runAnyHit(hitGroupBase, rayObj, candidate, payload);
if (r == RT_ANYHIT_END_SEARCH) {
*bestHit = candidate;
*bestT = tr.t;
return true;
}
if (r == RT_ANYHIT_ACCEPT) {
*bestHit = candidate;
*bestT = tr.t;
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
}
}
}
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
// inner node — push right, descend left
let left = node.firstChildOrPrim;
let right = left + 1u;
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
nodeRel = left;
}
return false;
}
fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
sbtRecordOffset: u32, sbtRecordStride: u32,
bestHit: ptr<function, HitInfo>,
bestT: ptr<function, f32>,
payload: ptr<function, Payload>) -> bool {
let invD = vec3<f32>(1.0) / rayWorld.direction;
// Stack-based descent of the sweep-tree BVH. Internal nodes
// [0, TLAS_BVH_LEAVES_START); leaves [LEAVES_START, 2*N_PADDED-1).
// Node i's children are 2i+1 / 2i+2 (implicit perfect binary tree).
// Stack depth = tree depth = log2(N_PADDED) = 14 for N_PADDED=16384;
// 24 gives generous headroom.
var stack: array<u32, 24>;
var sp: u32 = 0u;
stack[sp] = 0u; sp = sp + 1u;
loop {
if (sp == 0u) { break; }
sp = sp - 1u;
let nodeIdx = stack[sp];
let node = tlasBvhNodes[nodeIdx];
if (!_rtAabb(rayWorld.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
continue;
}
if (nodeIdx >= TLAS_BVH_LEAVES_START) {
// Leaf: resolve entry, do the existing per-instance test.
let leafIdx = nodeIdx - TLAS_BVH_LEAVES_START;
let i = tlasEntryOrder[leafIdx];
// Sentinel-padded leaves get instanceMask=0; cullMask check
// (and degenerate AABB above) means they fall out cheaply.
if (i == 0xFFFFFFFFu) { continue; }
let inst = tlasEntries[i];
let instanceMask = inst.maskHGOffset & 0xFFu;
if ((instanceMask & cullMask) == 0u) { continue; }
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; }
// Transform ray to object space.
let r0 = inst.worldToObjectR0;
let r1 = inst.worldToObjectR1;
let r2 = inst.worldToObjectR2;
var rayObj: RayDesc;
rayObj.origin = vec3<f32>(
dot(r0.xyz, rayWorld.origin) + r0.w,
dot(r1.xyz, rayWorld.origin) + r1.w,
dot(r2.xyz, rayWorld.origin) + r2.w,
);
rayObj.direction = vec3<f32>(
dot(r0.xyz, rayWorld.direction),
dot(r1.xyz, rayWorld.direction),
dot(r2.xyz, rayWorld.direction),
);
rayObj.tMin = rayWorld.tMin;
rayObj.tMax = *bestT;
var effective = flags;
let iflags = inst.instanceFlags;
if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) {
effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE;
}
if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) {
effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE;
}
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
}
let hitGroupOffset = inst.maskHGOffset >> 8u;
let hitGroupBase = sbtRecordOffset + hitGroupOffset;
let meshRec = meshRecords[inst.blasMeshIdx];
let pre = *bestT;
let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase,
bestHit, bestT, payload);
if (endSearch) { return true; }
if ((*bestT) < pre) {
// record world-space object-to-world for the closest-hit shader
(*bestHit).objectToWorldR0 = inst.objectToWorldR0;
(*bestHit).objectToWorldR1 = inst.objectToWorldR1;
(*bestHit).objectToWorldR2 = inst.objectToWorldR2;
(*bestHit).customIndex = inst.customIndex;
if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
}
} else {
// Internal node: push both children (skip overflow).
let left = 2u * nodeIdx + 1u;
let right = 2u * nodeIdx + 2u;
if (sp + 1u < 24u) {
stack[sp] = right; sp = sp + 1u;
stack[sp] = left; sp = sp + 1u;
}
}
}
return false;
}
fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32,
sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32,
rayOrigin: vec3<f32>, rayTMin: f32,
rayDir: vec3<f32>, rayTMax: f32,
payload: ptr<function, Payload>) {
var ray: RayDesc;
ray.origin = rayOrigin;
ray.direction = rayDir;
ray.tMin = rayTMin;
ray.tMax = rayTMax;
var bestHit: HitInfo;
bestHit.t = rayTMax;
var bestT = rayTMax;
let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu,
sbtRecordOffset, sbtRecordStride,
&bestHit, &bestT, payload);
if (bestT < rayTMax) {
if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) {
runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload);
}
} else {
runMiss(missIndex, ray, payload);
}
}
`;
// ════════════════════════════════════════════════════════════════════════
// WAVEFRONT RT — streaming tracer (GENERATE → PREP → TRACE → SHADE →