diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js index 625a552..a15142f 100644 --- a/additional/dom-webgpu.js +++ b/additional/dom-webgpu.js @@ -1834,9 +1834,18 @@ fn _wfReadRay(i: u32) -> WfRay { // PREP — publish indirect args for the upcoming TRACE/SHADE; zero the next // buffer's emit counter. fn _wfPrep() { - let n = _wfCurCount(); - wfIndirect[0] = (n + 63u) / 64u; - wfIndirect[1] = 1u; + let n = _wfCurCount(); + let wg = (n + 63u) / 64u; + // maxComputeWorkgroupsPerDimension is 65535 on Dawn/Firefox; a 1-D + // dispatch of ceil(W*H/64) overflows it past ~4.19M rays (~2560x1640) + // and WebGPU silently drops the indirect dispatch -> black screen. Spread + // across a 2-D grid; wfTrace/wfShade rebuild the linear index from + // num_workgroups. + let MAXDIM = 65535u; + let gx = min(wg, MAXDIM); + let gy = (wg + MAXDIM - 1u) / MAXDIM; // = 1 when wg <= MAXDIM + wfIndirect[0] = gx; + wfIndirect[1] = gy; wfIndirect[2] = 1u; if (wfParams.curIsA == 1u) { atomicStore(&wfCounters[1], 0u); } else { atomicStore(&wfCounters[0], 0u); } diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp index 8dd949d..373249a 100644 --- a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp +++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp @@ -231,13 +231,13 @@ void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/, // TRACE — zero user code: pure traversal + intersection. One thread // per live ray; writes a HitResult into wfHits[i]. wgsl += "\n@compute @workgroup_size(64)\n"; - wgsl += "fn wfTrace(@builtin(global_invocation_id) gid: vec3) { _wfTrace(gid.x); }\n"; + wgsl += "fn wfTrace(@builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) nwg: vec3) { _wfTrace(gid.y * nwg.x * 64u + gid.x); }\n"; // SHADE — one thread per live ray; loads the ray + its hit + payload, // dispatches to runMiss / runClosestHit, which may rtAccumulate and // rtEmitRay continuation/shadow rays into the next buffer. wgsl += "\n@compute @workgroup_size(64)\n"; - wgsl += "fn wfShade(@builtin(global_invocation_id) gid: vec3) { _wfShade(gid.x); }\n"; + wgsl += "fn wfShade(@builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) nwg: vec3) { _wfShade(gid.y * nwg.x * 64u + gid.x); }\n"; // RESOLVE — one thread per pixel; runs the user resolve (or identity) // over the linear accumulator and stores to the output image.