Merge pull request 'fix(webgpu): request adapter's storage-buffer limit, not hardcoded 16' (#9) from claude/issue-8 into master
This commit is contained in:
commit
6470c12db5
2 changed files with 21 additions and 12 deletions
|
|
@ -66,7 +66,11 @@ maxDepth=1 (primary only). Sponza maxDepth=2 (primary + shadow).
|
|||
- [x] megakernel dead path removed (RT pipeline builds only wavefront)
|
||||
- [~] binding packing (Phase 7): SKIPPED — target device reports 64 storage
|
||||
buffers/stage (≥12), so the merge is unnecessary (issue makes it
|
||||
conditional on <12).
|
||||
conditional on <12). NOTE: this only holds because dom-webgpu.js now
|
||||
requests the adapter's reported maxStorageBuffersPerShaderStage at
|
||||
device creation (was hardcoded to 16, which left room for ~1 user
|
||||
storage buffer and broke RT pipelines with ≥2). Devices that genuinely
|
||||
report <12 storage buffers/stage still need this packing.
|
||||
|
||||
### Measured (this container's GPU, via timestamp-query; NOT a 4090)
|
||||
Per-pass GPU time, 1920×995, primary+shadow (maxDepth=2):
|
||||
|
|
|
|||
|
|
@ -111,15 +111,20 @@ if (!adapter) {
|
|||
throw initError;
|
||||
}
|
||||
// Ask for everything the adapter is willing to give us, up to the values
|
||||
// the RT pipeline actually needs. The megakernel prelude declares 7
|
||||
// storage buffers at group(1) (tlasEntries / bvhNodes / meshRecords /
|
||||
// vertices / indices / primRemap / vertexAttribs); user pipelines like
|
||||
// 3DForts add more at group(2), and the WebGPU baseline of 8 isn't
|
||||
// enough. Adapters routinely report 10+ — clamp our request to whatever
|
||||
// the adapter actually supports so the call doesn't reject on baseline-
|
||||
// only devices. Same pattern for storage textures (we use 1 output image
|
||||
// per dispatch but headroom is cheap) and for the global storage-buffer
|
||||
// pool which is the per-pipeline count's parent budget.
|
||||
// the RT pipeline actually needs. The wavefront SHADE kernel alone binds
|
||||
// ~16 storage buffers (14 RT/work buffers + wfPayload at group(1),
|
||||
// wfIndirect at group(2)) BEFORE a single user binding — and user
|
||||
// pipelines like 3DForts add several more at group(3) (camera, light,
|
||||
// brace-stress SoA, per-instance TLAS metadata). A hardcoded request of
|
||||
// 16 leaves room for ~1 user storage buffer and overflows the moment a
|
||||
// pipeline declares 2+, failing the build with "Too many bindings of
|
||||
// type StorageBuffers". So request whatever the adapter actually
|
||||
// supports (the GPUs we target report 64) rather than a fixed 16;
|
||||
// `clamp` already mins against the adapter cap, so baseline-only devices
|
||||
// (reporting just 8) still get a valid — if tight — request. Same
|
||||
// headroom-is-cheap pattern for storage textures (1 output image per
|
||||
// dispatch) and for the pipeline-layout pool that parents the per-stage
|
||||
// count.
|
||||
const adapterLimits = adapter.limits || {};
|
||||
const requiredLimits = {};
|
||||
const clamp = (name, want) => {
|
||||
|
|
@ -128,8 +133,8 @@ const clamp = (name, want) => {
|
|||
requiredLimits[name] = Math.min(want, cap);
|
||||
}
|
||||
};
|
||||
clamp("maxStorageBuffersPerShaderStage", 16);
|
||||
clamp("maxStorageBuffersInPipelineLayout", 16);
|
||||
clamp("maxStorageBuffersPerShaderStage", adapterLimits.maxStorageBuffersPerShaderStage || 16);
|
||||
clamp("maxStorageBuffersInPipelineLayout", adapterLimits.maxStorageBuffersInPipelineLayout || 16);
|
||||
clamp("maxStorageTexturesPerShaderStage", 8);
|
||||
// The TLAS BVH build runs one workgroup of up to N threads in shared
|
||||
// memory (bitonic sort over morton codes + sweep-tree refit). Need the
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue