WebGPU RT: wavefront/streaming tracer (replaces megakernel) #4
1 changed files with 0 additions and 199 deletions
WebGPU RT: remove dead megakernel WGSL (no dual path)
The RT pipeline now only builds the wavefront kernels, so the old single-megakernel traversal/traceRay block (rtWgslMegakernelHelpers) and the unused rtWgslPrelude alias are dead. Remove them. The rayQuery compute path keeps rtWgslMegakernelBindings (its own _rq* traversal uses it). RTStress still renders correctly with the trimmed prelude. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
commit
82e5e867d4
|
|
@ -1439,7 +1439,6 @@ const TLAS_BVH_N_PADDED: u32 = 16384u;
|
||||||
const TLAS_BVH_LEAVES_START: u32 = TLAS_BVH_N_PADDED - 1u;
|
const TLAS_BVH_LEAVES_START: u32 = TLAS_BVH_N_PADDED - 1u;
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
|
|
||||||
|
|
||||||
// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
|
// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
|
||||||
// Injected after the user-supplied closesthit/anyhit/miss sources +
|
// Injected after the user-supplied closesthit/anyhit/miss sources +
|
||||||
|
|
@ -1526,204 +1525,6 @@ fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f
|
||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
// Megakernel-only helpers: traversal routines that invoke runAnyHit /
|
|
||||||
// runClosestHit / runMiss (emitted by the megakernel SBT switch) and
|
|
||||||
// `traceRay` that closes over them. Only the raygen-pipeline path
|
|
||||||
// prepends this.
|
|
||||||
const rtWgslMegakernelHelpers = String.raw`
|
|
||||||
// Iterative stack-based BLAS traversal. Returns true if traversal was
|
|
||||||
// terminated by an END_SEARCH from anyhit (caller should stop entirely).
|
|
||||||
fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
|
|
||||||
instanceId: u32, hitGroupBase: u32,
|
|
||||||
bestHit: ptr<function, HitInfo>,
|
|
||||||
bestT: ptr<function, f32>,
|
|
||||||
payload: ptr<function, Payload>) -> bool {
|
|
||||||
let invD = vec3<f32>(1.0) / rayObj.direction;
|
|
||||||
var stack: array<u32, 32>;
|
|
||||||
var sp: u32 = 0u;
|
|
||||||
var nodeRel: u32 = 0u;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
let abs = meshRec.bvhOffset + nodeRel;
|
|
||||||
let node = bvhNodes[abs];
|
|
||||||
if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
|
|
||||||
if (sp == 0u) { break; }
|
|
||||||
sp = sp - 1u; nodeRel = stack[sp]; continue;
|
|
||||||
}
|
|
||||||
if (node.primCount > 0u) {
|
|
||||||
for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
|
|
||||||
let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
|
|
||||||
let verts = _rtFetchTri(meshRec, triIndex);
|
|
||||||
let tr = _rtTri(rayObj.origin, rayObj.direction,
|
|
||||||
verts[0], verts[1], verts[2],
|
|
||||||
rayObj.tMin, *bestT);
|
|
||||||
if (!tr.hit) { continue; }
|
|
||||||
|
|
||||||
let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
|
|
||||||
let facing = dot(geomNormal, rayObj.direction);
|
|
||||||
if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
|
|
||||||
if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
|
|
||||||
|
|
||||||
var candidate: HitInfo;
|
|
||||||
candidate.t = tr.t;
|
|
||||||
candidate.instanceId = instanceId;
|
|
||||||
candidate.primitiveId = triIndex;
|
|
||||||
candidate.hitGroupIndex = hitGroupBase;
|
|
||||||
candidate.attribs = vec2<f32>(tr.u, tr.v);
|
|
||||||
candidate.objectRayOrigin = rayObj.origin;
|
|
||||||
candidate.objectRayDirection = rayObj.direction;
|
|
||||||
|
|
||||||
let opaque = (flags & RT_FLAG_OPAQUE) != 0u
|
|
||||||
|| (flags & RT_FLAG_NO_OPAQUE) == 0u; // default opaque
|
|
||||||
|
|
||||||
if (opaque) {
|
|
||||||
*bestHit = candidate;
|
|
||||||
*bestT = tr.t;
|
|
||||||
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
|
|
||||||
} else {
|
|
||||||
let r = runAnyHit(hitGroupBase, rayObj, candidate, payload);
|
|
||||||
if (r == RT_ANYHIT_END_SEARCH) {
|
|
||||||
*bestHit = candidate;
|
|
||||||
*bestT = tr.t;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (r == RT_ANYHIT_ACCEPT) {
|
|
||||||
*bestHit = candidate;
|
|
||||||
*bestT = tr.t;
|
|
||||||
if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sp == 0u) { break; }
|
|
||||||
sp = sp - 1u; nodeRel = stack[sp]; continue;
|
|
||||||
}
|
|
||||||
// inner node — push right, descend left
|
|
||||||
let left = node.firstChildOrPrim;
|
|
||||||
let right = left + 1u;
|
|
||||||
if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
|
|
||||||
nodeRel = left;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
|
|
||||||
sbtRecordOffset: u32, sbtRecordStride: u32,
|
|
||||||
bestHit: ptr<function, HitInfo>,
|
|
||||||
bestT: ptr<function, f32>,
|
|
||||||
payload: ptr<function, Payload>) -> bool {
|
|
||||||
let invD = vec3<f32>(1.0) / rayWorld.direction;
|
|
||||||
// Stack-based descent of the sweep-tree BVH. Internal nodes
|
|
||||||
// [0, TLAS_BVH_LEAVES_START); leaves [LEAVES_START, 2*N_PADDED-1).
|
|
||||||
// Node i's children are 2i+1 / 2i+2 (implicit perfect binary tree).
|
|
||||||
// Stack depth = tree depth = log2(N_PADDED) = 14 for N_PADDED=16384;
|
|
||||||
// 24 gives generous headroom.
|
|
||||||
var stack: array<u32, 24>;
|
|
||||||
var sp: u32 = 0u;
|
|
||||||
stack[sp] = 0u; sp = sp + 1u;
|
|
||||||
loop {
|
|
||||||
if (sp == 0u) { break; }
|
|
||||||
sp = sp - 1u;
|
|
||||||
let nodeIdx = stack[sp];
|
|
||||||
let node = tlasBvhNodes[nodeIdx];
|
|
||||||
if (!_rtAabb(rayWorld.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (nodeIdx >= TLAS_BVH_LEAVES_START) {
|
|
||||||
// Leaf: resolve entry, do the existing per-instance test.
|
|
||||||
let leafIdx = nodeIdx - TLAS_BVH_LEAVES_START;
|
|
||||||
let i = tlasEntryOrder[leafIdx];
|
|
||||||
// Sentinel-padded leaves get instanceMask=0; cullMask check
|
|
||||||
// (and degenerate AABB above) means they fall out cheaply.
|
|
||||||
if (i == 0xFFFFFFFFu) { continue; }
|
|
||||||
let inst = tlasEntries[i];
|
|
||||||
let instanceMask = inst.maskHGOffset & 0xFFu;
|
|
||||||
if ((instanceMask & cullMask) == 0u) { continue; }
|
|
||||||
if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; }
|
|
||||||
|
|
||||||
// Transform ray to object space.
|
|
||||||
let r0 = inst.worldToObjectR0;
|
|
||||||
let r1 = inst.worldToObjectR1;
|
|
||||||
let r2 = inst.worldToObjectR2;
|
|
||||||
var rayObj: RayDesc;
|
|
||||||
rayObj.origin = vec3<f32>(
|
|
||||||
dot(r0.xyz, rayWorld.origin) + r0.w,
|
|
||||||
dot(r1.xyz, rayWorld.origin) + r1.w,
|
|
||||||
dot(r2.xyz, rayWorld.origin) + r2.w,
|
|
||||||
);
|
|
||||||
rayObj.direction = vec3<f32>(
|
|
||||||
dot(r0.xyz, rayWorld.direction),
|
|
||||||
dot(r1.xyz, rayWorld.direction),
|
|
||||||
dot(r2.xyz, rayWorld.direction),
|
|
||||||
);
|
|
||||||
rayObj.tMin = rayWorld.tMin;
|
|
||||||
rayObj.tMax = *bestT;
|
|
||||||
|
|
||||||
var effective = flags;
|
|
||||||
let iflags = inst.instanceFlags;
|
|
||||||
if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) {
|
|
||||||
effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE;
|
|
||||||
}
|
|
||||||
if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) {
|
|
||||||
effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE;
|
|
||||||
}
|
|
||||||
if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
|
|
||||||
effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
|
|
||||||
}
|
|
||||||
|
|
||||||
let hitGroupOffset = inst.maskHGOffset >> 8u;
|
|
||||||
let hitGroupBase = sbtRecordOffset + hitGroupOffset;
|
|
||||||
let meshRec = meshRecords[inst.blasMeshIdx];
|
|
||||||
|
|
||||||
let pre = *bestT;
|
|
||||||
let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase,
|
|
||||||
bestHit, bestT, payload);
|
|
||||||
if (endSearch) { return true; }
|
|
||||||
if ((*bestT) < pre) {
|
|
||||||
// record world-space object-to-world for the closest-hit shader
|
|
||||||
(*bestHit).objectToWorldR0 = inst.objectToWorldR0;
|
|
||||||
(*bestHit).objectToWorldR1 = inst.objectToWorldR1;
|
|
||||||
(*bestHit).objectToWorldR2 = inst.objectToWorldR2;
|
|
||||||
(*bestHit).customIndex = inst.customIndex;
|
|
||||||
if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Internal node: push both children (skip overflow).
|
|
||||||
let left = 2u * nodeIdx + 1u;
|
|
||||||
let right = 2u * nodeIdx + 2u;
|
|
||||||
if (sp + 1u < 24u) {
|
|
||||||
stack[sp] = right; sp = sp + 1u;
|
|
||||||
stack[sp] = left; sp = sp + 1u;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32,
|
|
||||||
sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32,
|
|
||||||
rayOrigin: vec3<f32>, rayTMin: f32,
|
|
||||||
rayDir: vec3<f32>, rayTMax: f32,
|
|
||||||
payload: ptr<function, Payload>) {
|
|
||||||
var ray: RayDesc;
|
|
||||||
ray.origin = rayOrigin;
|
|
||||||
ray.direction = rayDir;
|
|
||||||
ray.tMin = rayTMin;
|
|
||||||
ray.tMax = rayTMax;
|
|
||||||
var bestHit: HitInfo;
|
|
||||||
bestHit.t = rayTMax;
|
|
||||||
var bestT = rayTMax;
|
|
||||||
let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu,
|
|
||||||
sbtRecordOffset, sbtRecordStride,
|
|
||||||
&bestHit, &bestT, payload);
|
|
||||||
if (bestT < rayTMax) {
|
|
||||||
if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) {
|
|
||||||
runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
runMiss(missIndex, ray, payload);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
`;
|
|
||||||
|
|
||||||
// ════════════════════════════════════════════════════════════════════════
|
// ════════════════════════════════════════════════════════════════════════
|
||||||
// WAVEFRONT RT — streaming tracer (GENERATE → PREP → TRACE → SHADE →
|
// WAVEFRONT RT — streaming tracer (GENERATE → PREP → TRACE → SHADE →
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue