diff --git a/WAVEFRONT-DESIGN.md b/WAVEFRONT-DESIGN.md
index 47e42d0..3b3742a 100644
--- a/WAVEFRONT-DESIGN.md
+++ b/WAVEFRONT-DESIGN.md
@@ -66,7 +66,11 @@ maxDepth=1 (primary only). Sponza maxDepth=2 (primary + shadow).
 - [x] megakernel dead path removed (RT pipeline builds only wavefront)
 - [~] binding packing (Phase 7): SKIPPED — target device reports 64 storage
       buffers/stage (≥12), so the merge is unnecessary (issue makes it
-      conditional on <12).
+      conditional on <12). NOTE: this only holds because dom-webgpu.js now
+      requests the adapter's reported maxStorageBuffersPerShaderStage at
+      device creation (was hardcoded to 16, which left room for ~1 user
+      storage buffer and broke RT pipelines with ≥2). Devices that genuinely
+      report <12 storage buffers/stage still need this packing.
 
 ### Measured (this container's GPU, via timestamp-query; NOT a 4090)
 Per-pass GPU time, 1920×995, primary+shadow (maxDepth=2):
diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js
index c0e83d1..625a552 100644
--- a/additional/dom-webgpu.js
+++ b/additional/dom-webgpu.js
@@ -111,15 +111,20 @@ if (!adapter) {
     throw initError;
 }
 // Ask for everything the adapter is willing to give us, up to the values
-// the RT pipeline actually needs. The megakernel prelude declares 7
-// storage buffers at group(1) (tlasEntries / bvhNodes / meshRecords /
-// vertices / indices / primRemap / vertexAttribs); user pipelines like
-// 3DForts add more at group(2), and the WebGPU baseline of 8 isn't
-// enough. Adapters routinely report 10+ — clamp our request to whatever
-// the adapter actually supports so the call doesn't reject on baseline-
-// only devices. Same pattern for storage textures (we use 1 output image
-// per dispatch but headroom is cheap) and for the global storage-buffer
-// pool which is the per-pipeline count's parent budget.
+// the RT pipeline actually needs. The wavefront SHADE kernel alone binds
+// ~16 storage buffers (14 RT/work buffers + wfPayload at group(1),
+// wfIndirect at group(2)) BEFORE a single user binding — and user
+// pipelines like 3DForts add several more at group(3) (camera, light,
+// brace-stress SoA, per-instance TLAS metadata). A hardcoded request of
+// 16 leaves room for ~1 user storage buffer and overflows the moment a
+// pipeline declares 2+, failing the build with "Too many bindings of
+// type StorageBuffers". So request whatever the adapter actually
+// supports (the GPUs we target report 64) rather than a fixed 16;
+// `clamp` already mins against the adapter cap, so baseline-only devices
+// (reporting just 8) still get a valid — if tight — request. Same
+// headroom-is-cheap pattern for storage textures (1 output image per
+// dispatch) and for the pipeline-layout pool that parents the per-stage
+// count.
 const adapterLimits = adapter.limits || {};
 const requiredLimits = {};
 const clamp = (name, want) => {
@@ -128,8 +133,8 @@ const clamp = (name, want) => {
         requiredLimits[name] = Math.min(want, cap);
     }
 };
-clamp("maxStorageBuffersPerShaderStage", 16);
-clamp("maxStorageBuffersInPipelineLayout", 16);
+clamp("maxStorageBuffersPerShaderStage", adapterLimits.maxStorageBuffersPerShaderStage || 16);
+clamp("maxStorageBuffersInPipelineLayout", adapterLimits.maxStorageBuffersInPipelineLayout || 16);
 clamp("maxStorageTexturesPerShaderStage", 8);
 // The TLAS BVH build runs one workgroup of up to N threads in shared
 // memory (bitonic sort over morton codes + sweep-tree refit). Need the