fix(webgpu): reshape wavefront TRACE/SHADE to 2-D to survive >4.19M rays #12

Merged
catbot merged 1 commit from claude/issue-11 into master 2026-06-01 13:10:05 +02:00
2 changed files with 14 additions and 5 deletions

View file

@ -1835,8 +1835,17 @@ fn _wfReadRay(i: u32) -> WfRay {
// buffer's emit counter.
fn _wfPrep() {
let n = _wfCurCount();
wfIndirect[0] = (n + 63u) / 64u;
wfIndirect[1] = 1u;
let wg = (n + 63u) / 64u;
// maxComputeWorkgroupsPerDimension is 65535 on Dawn/Firefox; a 1-D
// dispatch of ceil(W*H/64) overflows it past ~4.19M rays (~2560x1640)
// and WebGPU silently drops the indirect dispatch -> black screen. Spread
// across a 2-D grid; wfTrace/wfShade rebuild the linear index from
// num_workgroups.
let MAXDIM = 65535u;
let gx = min(wg, MAXDIM);
let gy = (wg + MAXDIM - 1u) / MAXDIM; // = 1 when wg <= MAXDIM
wfIndirect[0] = gx;
wfIndirect[1] = gy;
wfIndirect[2] = 1u;
if (wfParams.curIsA == 1u) { atomicStore(&wfCounters[1], 0u); }
else { atomicStore(&wfCounters[0], 0u); }

View file

@ -231,13 +231,13 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/,
// TRACE — zero user code: pure traversal + intersection. One thread
// per live ray; writes a HitResult into wfHits[i].
wgsl += "\n@compute @workgroup_size(64)\n";
wgsl += "fn wfTrace(@builtin(global_invocation_id) gid: vec3<u32>) { _wfTrace(gid.x); }\n";
wgsl += "fn wfTrace(@builtin(global_invocation_id) gid: vec3<u32>, @builtin(num_workgroups) nwg: vec3<u32>) { _wfTrace(gid.y * nwg.x * 64u + gid.x); }\n";
// SHADE — one thread per live ray; loads the ray + its hit + payload,
// dispatches to runMiss / runClosestHit, which may rtAccumulate and
// rtEmitRay continuation/shadow rays into the next buffer.
wgsl += "\n@compute @workgroup_size(64)\n";
wgsl += "fn wfShade(@builtin(global_invocation_id) gid: vec3<u32>) { _wfShade(gid.x); }\n";
wgsl += "fn wfShade(@builtin(global_invocation_id) gid: vec3<u32>, @builtin(num_workgroups) nwg: vec3<u32>) { _wfShade(gid.y * nwg.x * 64u + gid.x); }\n";
// RESOLVE — one thread per pixel; runs the user resolve (or identity)
// over the linear accumulator and stores to the output image.