From dd4122f2baf634ea9957343f99974376931c90be Mon Sep 17 00:00:00 2001 From: catbot Date: Sun, 31 May 2026 20:21:44 +0000 Subject: [PATCH] WebGPU RT: ordered (nearest-child-first) traversal Add _rtAabbT (AABB test returning entry-t); in both _rtwTraverseBlas and _rtwTraverseTlas descend the nearer child first and push the farther only when it hits, re-culling it against the (tightened) bestT when popped. Render is identical (same closest hit) on VulkanTriangle, RTStress (512/4096), and Sponza; cuts node visits on dense scenes. Co-Authored-By: Claude Opus 4.8 --- additional/dom-webgpu.js | 59 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js index fcadaba..00bf317 100644 --- a/additional/dom-webgpu.js +++ b/additional/dom-webgpu.js @@ -1481,6 +1481,27 @@ fn _rtAabb(ro: vec3, invRd: vec3, mn: vec3, mx: vec3, tMax: return tExit >= max(tEnter, 0.0) && tEnter <= tMax; } +// AABB test that also returns the (clamped) entry distance, for ordered +// nearest-child-first traversal. t is meaningless when hit == false. +struct _RtAabbHit { hit: bool, t: f32 }; +fn _rtAabbT(ro: vec3, invRd: vec3, mn: vec3, mx: vec3, tMax: f32) -> _RtAabbHit { + var r: _RtAabbHit; + r.hit = false; + r.t = 0.0; + if (any(mn > mx)) { return r; } + let t0 = (mn - ro) * invRd; + let t1 = (mx - ro) * invRd; + let tmin = min(t0, t1); + let tmax = max(t0, t1); + let tEnter = max(max(tmin.x, tmin.y), tmin.z); + let tExit = min(min(tmax.x, tmax.y), tmax.z); + if (tExit >= max(tEnter, 0.0) && tEnter <= tMax) { + r.hit = true; + r.t = max(tEnter, 0.0); + } + return r; +} + struct _RtTriHit { hit: bool, t: f32, u: f32, v: f32 }; fn _rtTri(ro: vec3, rd: vec3, p0: vec3, p1: vec3, p2: vec3, tMin: f32, tMax: f32) -> _RtTriHit { @@ -1896,10 +1917,26 @@ fn _rtwTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord, if (sp == 0u) { break; } sp = sp - 1u; nodeRel = stack[sp]; continue; } + // Internal node: descend the nearer child first; push the farther + // only when it hits (and re-cull it against bestT when popped). let left = node.firstChildOrPrim; let right = left + 1u; - if (sp < 32u) { stack[sp] = right; sp = sp + 1u; } - nodeRel = left; + let ln = bvhNodes[meshRec.bvhOffset + left]; + let rn = bvhNodes[meshRec.bvhOffset + right]; + let lr = _rtAabbT(rayObj.origin, invD, ln.aabbMin, ln.aabbMax, *bestT); + let rr = _rtAabbT(rayObj.origin, invD, rn.aabbMin, rn.aabbMax, *bestT); + if (lr.hit && rr.hit) { + if (sp + 1u < 32u) { + if (lr.t <= rr.t) { stack[sp] = right; sp = sp + 1u; stack[sp] = left; sp = sp + 1u; } + else { stack[sp] = left; sp = sp + 1u; stack[sp] = right; sp = sp + 1u; } + } + } else if (lr.hit) { + if (sp < 32u) { stack[sp] = left; sp = sp + 1u; } + } else if (rr.hit) { + if (sp < 32u) { stack[sp] = right; sp = sp + 1u; } + } + if (sp == 0u) { break; } + sp = sp - 1u; nodeRel = stack[sp]; } return false; } @@ -1960,11 +1997,23 @@ fn _rtwTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32, if (endSearch) { return true; } if ((*bestT) < pre && (effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; } } else { + // Internal node: nearest-child-first. Sentinel-padded leaves + // carry an inverted AABB so _rtAabbT rejects them for free. let left = 2u * nodeIdx + 1u; let right = 2u * nodeIdx + 2u; - if (sp + 1u < 32u) { - stack[sp] = right; sp = sp + 1u; - stack[sp] = left; sp = sp + 1u; + let ln = tlasBvhNodes[left]; + let rn = tlasBvhNodes[right]; + let lr = _rtAabbT(rayWorld.origin, invD, ln.aabbMin, ln.aabbMax, *bestT); + let rr = _rtAabbT(rayWorld.origin, invD, rn.aabbMin, rn.aabbMax, *bestT); + if (lr.hit && rr.hit) { + if (sp + 1u < 32u) { + if (lr.t <= rr.t) { stack[sp] = right; sp = sp + 1u; stack[sp] = left; sp = sp + 1u; } + else { stack[sp] = left; sp = sp + 1u; stack[sp] = right; sp = sp + 1u; } + } + } else if (lr.hit) { + if (sp < 32u) { stack[sp] = left; sp = sp + 1u; } + } else if (rr.hit) { + if (sp < 32u) { stack[sp] = right; sp = sp + 1u; } } } }