WebGPU RT: wavefront/streaming tracer (replaces megakernel) #4
1 changed files with 0 additions and 199 deletions
WebGPU RT: remove dead megakernel WGSL (no dual path)
The RT pipeline now only builds the wavefront kernels, so the old single-megakernel traversal/traceRay block (rtWgslMegakernelHelpers) and the unused rtWgslPrelude alias are dead. Remove them. The rayQuery compute path keeps rtWgslMegakernelBindings (its own _rq* traversal uses it). RTStress still renders correctly with the trimmed prelude. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
commit
82e5e867d4
|
|
@ -1439,7 +1439,6 @@ const TLAS_BVH_N_PADDED: u32 = 16384u;
|
|||
const TLAS_BVH_LEAVES_START: u32 = TLAS_BVH_N_PADDED - 1u;
|
||||
`;
|
||||
|
||||
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
|
||||
|
||||
// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
|
||||
// Injected after the user-supplied closesthit/anyhit/miss sources +
|
||||
|
|
@ -1526,204 +1525,6 @@ fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f
|
|||
}
|
||||
`;
|
||||
|
||||
// Megakernel-only helpers: traversal routines that invoke runAnyHit /
|
||||
// runClosestHit / runMiss (emitted by the megakernel SBT switch) and
|
||||
// `traceRay` that closes over them. Only the raygen-pipeline path
|
||||
// prepends this.
|
||||
const rtWgslMegakernelHelpers = String.raw`
|
||||
// Iterative stack-based BLAS traversal. Returns true if traversal was
|
||||
// terminated by an END_SEARCH from anyhit (caller should stop entirely).
|
||||
fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
|
||||
instanceId: u32, hitGroupBase: u32,
|
||||
bestHit: ptr<function, HitInfo>,
|
||||
bestT: ptr<function, f32>,
|
||||
payload: ptr<function, Payload>) -> bool {
|
||||
let invD = vec3<f32>(1.0) / rayObj.direction;
|
||||
var stack: array<u32, 32>;
|
||||
var sp: u32 = 0u;
|
||||
var nodeRel: u32 = 0u;
|
||||
|
||||
loop {
|
||||
let abs = meshRec.bvhOffset + nodeRel;
|
||||
let node = bvhNodes[abs];
|
||||
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
|
||||
if (sp == 0u) { break; }
|
||||
sp = sp - 1u; nodeRel = stack[sp]; continue;
|
||||
}
|
||||
if (node.primCount > 0u) {
|
||||
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
|
||||
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
|
||||
let verts = _rtFetchTri(meshRec, triIndex);
|
||||
let tr = _rtTri(rayObj.origin, rayObj.direction,
|
||||
verts[0], verts[1], verts[2],
|
||||
rayObj.tMin, *bestT);
|
||||
if (!tr.hit) { continue; }
|
||||
|
||||
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
|
||||
let facing = dot(geomNormal, rayObj.direction);
|
||||
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
|
||||
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
|
||||
|
||||
var candidate: HitInfo;
|
||||
candidate.t = tr.t;
|
||||
candidate.instanceId = instanceId;
|
||||
candidate.primitiveId = triIndex;
|
||||
candidate.hitGroupIndex = hitGroupBase;
|
||||
candidate.attribs = vec2<f32>(tr.u, tr.v);
|
||||
candidate.objectRayOrigin = rayObj.origin;
|
||||
candidate.objectRayDirection = rayObj.direction;
|
||||
|
||||
let opaque = (flags & RT_FLAG_OPAQUE) != 0u
|
||||
|| (flags & RT_FLAG_NO_OPAQUE) == 0u; // default opaque
|
||||
|
||||
if (opaque) {
|
||||
*bestHit = candidate;
|
||||
*bestT = tr.t;
|
||||
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
|
||||
} else {
|
||||
let r = runAnyHit(hitGroupBase, rayObj, candidate, payload);
|
||||
if (r == RT_ANYHIT_END_SEARCH) {
|
||||
*bestHit = candidate;
|
||||
*bestT = tr.t;
|
||||
return true;
|
||||
}
|
||||
if (r == RT_ANYHIT_ACCEPT) {
|
||||
*bestHit = candidate;
|
||||
*bestT = tr.t;
|
||||
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sp == 0u) { break; }
|
||||
sp = sp - 1u; nodeRel = stack[sp]; continue;
|
||||
}
|
||||
// inner node — push right, descend left
|
||||
let left = node.firstChildOrPrim;
|
||||
let right = left + 1u;
|
||||
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
|
||||
nodeRel = left;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
|
||||
sbtRecordOffset: u32, sbtRecordStride: u32,
|
||||
bestHit: ptr<function, HitInfo>,
|
||||
bestT: ptr<function, f32>,
|
||||
payload: ptr<function, Payload>) -> bool {
|
||||
let invD = vec3<f32>(1.0) / rayWorld.direction;
|
||||
// Stack-based descent of the sweep-tree BVH. Internal nodes
|
||||
// [0, TLAS_BVH_LEAVES_START); leaves [LEAVES_START, 2*N_PADDED-1).
|
||||
// Node i's children are 2i+1 / 2i+2 (implicit perfect binary tree).
|
||||
// Stack depth = tree depth = log2(N_PADDED) = 14 for N_PADDED=16384;
|
||||
// 24 gives generous headroom.
|
||||
var stack: array<u32, 24>;
|
||||
var sp: u32 = 0u;
|
||||
stack[sp] = 0u; sp = sp + 1u;
|
||||
loop {
|
||||
if (sp == 0u) { break; }
|
||||
sp = sp - 1u;
|
||||
let nodeIdx = stack[sp];
|
||||
let node = tlasBvhNodes[nodeIdx];
|
||||
if (!_rtAabb(rayWorld.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
|
||||
continue;
|
||||
}
|
||||
if (nodeIdx >= TLAS_BVH_LEAVES_START) {
|
||||
// Leaf: resolve entry, do the existing per-instance test.
|
||||
let leafIdx = nodeIdx - TLAS_BVH_LEAVES_START;
|
||||
let i = tlasEntryOrder[leafIdx];
|
||||
// Sentinel-padded leaves get instanceMask=0; cullMask check
|
||||
// (and degenerate AABB above) means they fall out cheaply.
|
||||
if (i == 0xFFFFFFFFu) { continue; }
|
||||
let inst = tlasEntries[i];
|
||||
let instanceMask = inst.maskHGOffset & 0xFFu;
|
||||
if ((instanceMask & cullMask) == 0u) { continue; }
|
||||
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; }
|
||||
|
||||
// Transform ray to object space.
|
||||
let r0 = inst.worldToObjectR0;
|
||||
let r1 = inst.worldToObjectR1;
|
||||
let r2 = inst.worldToObjectR2;
|
||||
var rayObj: RayDesc;
|
||||
rayObj.origin = vec3<f32>(
|
||||
dot(r0.xyz, rayWorld.origin) + r0.w,
|
||||
dot(r1.xyz, rayWorld.origin) + r1.w,
|
||||
dot(r2.xyz, rayWorld.origin) + r2.w,
|
||||
);
|
||||
rayObj.direction = vec3<f32>(
|
||||
dot(r0.xyz, rayWorld.direction),
|
||||
dot(r1.xyz, rayWorld.direction),
|
||||
dot(r2.xyz, rayWorld.direction),
|
||||
);
|
||||
rayObj.tMin = rayWorld.tMin;
|
||||
rayObj.tMax = *bestT;
|
||||
|
||||
var effective = flags;
|
||||
let iflags = inst.instanceFlags;
|
||||
if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) {
|
||||
effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE;
|
||||
}
|
||||
if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) {
|
||||
effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE;
|
||||
}
|
||||
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
|
||||
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
|
||||
}
|
||||
|
||||
let hitGroupOffset = inst.maskHGOffset >> 8u;
|
||||
let hitGroupBase = sbtRecordOffset + hitGroupOffset;
|
||||
let meshRec = meshRecords[inst.blasMeshIdx];
|
||||
|
||||
let pre = *bestT;
|
||||
let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase,
|
||||
bestHit, bestT, payload);
|
||||
if (endSearch) { return true; }
|
||||
if ((*bestT) < pre) {
|
||||
// record world-space object-to-world for the closest-hit shader
|
||||
(*bestHit).objectToWorldR0 = inst.objectToWorldR0;
|
||||
(*bestHit).objectToWorldR1 = inst.objectToWorldR1;
|
||||
(*bestHit).objectToWorldR2 = inst.objectToWorldR2;
|
||||
(*bestHit).customIndex = inst.customIndex;
|
||||
if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
|
||||
}
|
||||
} else {
|
||||
// Internal node: push both children (skip overflow).
|
||||
let left = 2u * nodeIdx + 1u;
|
||||
let right = 2u * nodeIdx + 2u;
|
||||
if (sp + 1u < 24u) {
|
||||
stack[sp] = right; sp = sp + 1u;
|
||||
stack[sp] = left; sp = sp + 1u;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32,
|
||||
sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32,
|
||||
rayOrigin: vec3<f32>, rayTMin: f32,
|
||||
rayDir: vec3<f32>, rayTMax: f32,
|
||||
payload: ptr<function, Payload>) {
|
||||
var ray: RayDesc;
|
||||
ray.origin = rayOrigin;
|
||||
ray.direction = rayDir;
|
||||
ray.tMin = rayTMin;
|
||||
ray.tMax = rayTMax;
|
||||
var bestHit: HitInfo;
|
||||
bestHit.t = rayTMax;
|
||||
var bestT = rayTMax;
|
||||
let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu,
|
||||
sbtRecordOffset, sbtRecordStride,
|
||||
&bestHit, &bestT, payload);
|
||||
if (bestT < rayTMax) {
|
||||
if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) {
|
||||
runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload);
|
||||
}
|
||||
} else {
|
||||
runMiss(missIndex, ray, payload);
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
// WAVEFRONT RT — streaming tracer (GENERATE → PREP → TRACE → SHADE →
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue