WebGPU RT: ordered (nearest-child-first) traversal

Add _rtAabbT (AABB test returning entry-t); in both _rtwTraverseBlas and
_rtwTraverseTlas descend the nearer child first and push the farther only
when it hits, re-culling it against the (tightened) bestT when popped.
Render is identical (same closest hit) on VulkanTriangle, RTStress
(512/4096), and Sponza; cuts node visits on dense scenes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
catbot 2026-05-31 20:21:44 +00:00
commit dd4122f2ba

View file

@ -1481,6 +1481,27 @@ fn _rtAabb(ro: vec3<f32>, invRd: vec3<f32>, mn: vec3<f32>, mx: vec3<f32>, tMax:
return tExit >= max(tEnter, 0.0) && tEnter <= tMax;
}
// AABB test that also returns the (clamped) entry distance, for ordered
// nearest-child-first traversal. t is meaningless when hit == false.
struct _RtAabbHit { hit: bool, t: f32 };
fn _rtAabbT(ro: vec3<f32>, invRd: vec3<f32>, mn: vec3<f32>, mx: vec3<f32>, tMax: f32) -> _RtAabbHit {
var r: _RtAabbHit;
r.hit = false;
r.t = 0.0;
if (any(mn > mx)) { return r; }
let t0 = (mn - ro) * invRd;
let t1 = (mx - ro) * invRd;
let tmin = min(t0, t1);
let tmax = max(t0, t1);
let tEnter = max(max(tmin.x, tmin.y), tmin.z);
let tExit = min(min(tmax.x, tmax.y), tmax.z);
if (tExit >= max(tEnter, 0.0) && tEnter <= tMax) {
r.hit = true;
r.t = max(tEnter, 0.0);
}
return r;
}
struct _RtTriHit { hit: bool, t: f32, u: f32, v: f32 };
fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>,
tMin: f32, tMax: f32) -> _RtTriHit {
@ -1896,10 +1917,26 @@ fn _rtwTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp]; continue;
}
// Internal node: descend the nearer child first; push the farther
// only when it hits (and re-cull it against bestT when popped).
let left = node.firstChildOrPrim;
let right = left + 1u;
let ln = bvhNodes[meshRec.bvhOffset + left];
let rn = bvhNodes[meshRec.bvhOffset + right];
let lr = _rtAabbT(rayObj.origin, invD, ln.aabbMin, ln.aabbMax, *bestT);
let rr = _rtAabbT(rayObj.origin, invD, rn.aabbMin, rn.aabbMax, *bestT);
if (lr.hit && rr.hit) {
if (sp + 1u < 32u) {
if (lr.t <= rr.t) { stack[sp] = right; sp = sp + 1u; stack[sp] = left; sp = sp + 1u; }
else { stack[sp] = left; sp = sp + 1u; stack[sp] = right; sp = sp + 1u; }
}
} else if (lr.hit) {
if (sp < 32u) { stack[sp] = left; sp = sp + 1u; }
} else if (rr.hit) {
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
nodeRel = left;
}
if (sp == 0u) { break; }
sp = sp - 1u; nodeRel = stack[sp];
}
return false;
}
@ -1960,11 +1997,23 @@ fn _rtwTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
if (endSearch) { return true; }
if ((*bestT) < pre && (effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
} else {
// Internal node: nearest-child-first. Sentinel-padded leaves
// carry an inverted AABB so _rtAabbT rejects them for free.
let left = 2u * nodeIdx + 1u;
let right = 2u * nodeIdx + 2u;
let ln = tlasBvhNodes[left];
let rn = tlasBvhNodes[right];
let lr = _rtAabbT(rayWorld.origin, invD, ln.aabbMin, ln.aabbMax, *bestT);
let rr = _rtAabbT(rayWorld.origin, invD, rn.aabbMin, rn.aabbMax, *bestT);
if (lr.hit && rr.hit) {
if (sp + 1u < 32u) {
stack[sp] = right; sp = sp + 1u;
stack[sp] = left; sp = sp + 1u;
if (lr.t <= rr.t) { stack[sp] = right; sp = sp + 1u; stack[sp] = left; sp = sp + 1u; }
else { stack[sp] = left; sp = sp + 1u; stack[sp] = right; sp = sp + 1u; }
}
} else if (lr.hit) {
if (sp < 32u) { stack[sp] = left; sp = sp + 1u; }
} else if (rr.hit) {
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
}
}
}