From 5553ded476bf747f407cbfc61723a2fc7e59f4f4 Mon Sep 17 00:00:00 2001
From: Jorijn van der Graaf <jorijnvdgraaf@catcrafts.net>
Date: Mon, 18 May 2026 18:43:30 +0200
Subject: [PATCH] webgpu triangle

---
 additional/dom-webgpu.js                      | 976 +++++++++++++++++-
 examples/VulkanTriangle/closesthit.wgsl       |  12 +
 examples/VulkanTriangle/main.cpp              |  68 +-
 examples/VulkanTriangle/miss.wgsl             |   5 +
 examples/VulkanTriangle/project.cpp           |  27 +-
 examples/VulkanTriangle/raygen.glsl           |  22 +-
 examples/VulkanTriangle/raygen.wgsl           |  39 +
 .../Crafter.Graphics-Mesh-WebGPU.cpp          | 240 +++++
 .../Crafter.Graphics-PipelineRTWebGPU.cpp     | 187 ++++
 ...ter.Graphics-RenderingElement3D-WebGPU.cpp |  91 ++
 ...fter.Graphics-ShaderBindingTableWebGPU.cpp |  32 +
 .../Crafter.Graphics-WebGPUComputeShader.cpp  |  12 +-
 interfaces/Crafter.Graphics-Mesh.cppm         |  51 +
 .../Crafter.Graphics-PipelineRTWebGPU.cppm    |  51 +
 interfaces/Crafter.Graphics-RT.cppm           |  83 ++
 interfaces/Crafter.Graphics-RTPass.cppm       |  39 +
 .../Crafter.Graphics-RenderingElement3D.cppm  |  63 +-
 ...ter.Graphics-ShaderBindingTableWebGPU.cppm |  64 ++
 interfaces/Crafter.Graphics-WebGPU.cppm       |  51 +-
 .../Crafter.Graphics-WebGPUComputeShader.cppm |  19 +-
 interfaces/Crafter.Graphics.cppm              |   6 +
 project.cpp                                   |  11 +-
 22 files changed, 2107 insertions(+), 42 deletions(-)
 create mode 100644 examples/VulkanTriangle/closesthit.wgsl
 create mode 100644 examples/VulkanTriangle/miss.wgsl
 create mode 100644 examples/VulkanTriangle/raygen.wgsl
 create mode 100644 implementations/Crafter.Graphics-Mesh-WebGPU.cpp
 create mode 100644 implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
 create mode 100644 implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp
 create mode 100644 implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp
 create mode 100644 interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm
 create mode 100644 interfaces/Crafter.Graphics-RT.cppm
 create mode 100644 interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm

diff --git a/additional/dom-webgpu.js b/additional/dom-webgpu.js
index 8706e90..834f320 100644
--- a/additional/dom-webgpu.js
+++ b/additional/dom-webgpu.js
@@ -45,9 +45,13 @@ function stub(name) {
         "wgpuCreateAtlasTexture", "wgpuWriteAtlasRegion", "wgpuDestroyTexture",
         "wgpuCreateLinearClampSampler", "wgpuFrameBegin", "wgpuFrameEnd",
         "wgpuDispatchQuads", "wgpuDispatchCircles", "wgpuDispatchImages", "wgpuDispatchText",
+        "wgpuLoadCustomShader", "wgpuDispatchCustom",
+        "wgpuRegisterMeshBLAS", "wgpuLoadRTPipeline", "wgpuDispatchRT", "wgpuBuildTLAS",
     ]) {
         // Read-write ints don't need a stub-throw; return 0 for the size queries.
-        e[n] = n.endsWith("Width") || n.endsWith("Height") ? () => 0 : stub(n);
+        e[n] = n.endsWith("Width") || n.endsWith("Height")
+            ? () => 0
+            : (n === "wgpuRegisterMeshBLAS" ? () => 0 : stub(n));
     }
 }
 
@@ -702,8 +706,16 @@ env.wgpuDispatchText = (itemsHandle, headerPtr, gx, gy, atlasHandle, sampHandle)
 
 const customPipelines = new Map();   // handle → { pipeline, bgls, hdrBG, byGroup }
 
-env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
-    const wgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
+env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount, rayQueryFlag) => {
+    if (!rtState.vertHeap && rayQueryFlag) rtInit();
+    const userWgsl = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
+    // For rayQuery-capable shaders, prepend the RT prelude + ray-query
+    // library. The user shader can declare its own group 0 / 2+ bindings
+    // but MUST NOT redeclare group(1) — that's reserved for RT data.
+    const wgsl = rayQueryFlag
+        ? (rtWgslTypes + rtWgslMegakernelBindings + rtWgslRayQueryLib + "\n" + userWgsl)
+        : userWgsl;
+
     const bindings = [];
     const dv = new DataView(memU8().buffer, bindingsPtr, bindingsCount * 8);
     for (let i = 0; i < bindingsCount; i++) {
@@ -726,18 +738,31 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
         byGroup.get(b.group).push(b);
     }
 
-    // Group 0 = header uniform, Group 1 = ping-pong out+prev — always injected.
+    // Group 0 = header uniform (same for both paths).
+    // Group 1 = ping-pong out+prev OR RT data (TLAS, BVH, meshRecs, verts,
+    // idx, primRemap, outImage) when rayQuery flag is on.
     const bgls = [
         device.createBindGroupLayout({ entries: [
             { binding: 0, visibility: GPUShaderStage.COMPUTE,
               buffer: { type: "uniform", hasDynamicOffset: true, minBindingSize: 48 } },
         ]}),
-        device.createBindGroupLayout({ entries: [
-            { binding: 0, visibility: GPUShaderStage.COMPUTE,
-              storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
-            { binding: 1, visibility: GPUShaderStage.COMPUTE,
-              texture: { sampleType: "float", viewDimension: "2d" } },
-        ]}),
+        rayQueryFlag
+            ? device.createBindGroupLayout({ entries: [
+                { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+                { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+                { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+                { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+                { binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+                { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+                { binding: 6, visibility: GPUShaderStage.COMPUTE,
+                  storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+            ]})
+            : device.createBindGroupLayout({ entries: [
+                { binding: 0, visibility: GPUShaderStage.COMPUTE,
+                  storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+                { binding: 1, visibility: GPUShaderStage.COMPUTE,
+                  texture: { sampleType: "float", viewDimension: "2d" } },
+            ]}),
     ];
     // Sorted custom groups. Pad any gaps with empty bgls (WebGPU pipeline
     // layouts require a contiguous array of GPUBindGroupLayout per group
@@ -766,6 +791,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
         pipeline = device.createComputePipeline({ layout, compute: { module: mod, entryPoint: "main" } });
     } catch (e) {
         console.error("[crafter-wgpu] custom shader compile failed:", e);
+        if (rayQueryFlag) console.error("[crafter-wgpu] WGSL was:\n", wgsl);
         return 0;
     }
 
@@ -775,7 +801,7 @@ env.wgpuLoadCustomShader = (wgslPtr, wgslLen, bindingsPtr, bindingsCount) => {
     });
 
     const handle = newHandle();
-    customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups });
+    customPipelines.set(handle, { pipeline, bgls, hdrBG, byGroup, sortedGroups, rayQueryCapable: !!rayQueryFlag });
     return handle;
 };
 
@@ -794,7 +820,31 @@ env.wgpuDispatchCustom = (pipelineHandle, pushPtr, pushBytes, handlesPtr, handle
 
     state.pass.setPipeline(pipe.pipeline);
     state.pass.setBindGroup(0, pipe.hdrBG, [off]);
-    state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1]));
+    // Group 1: rayQuery-capable shaders get the RT data heaps + the most
+    // recently built TLAS; everyone else gets the standard ping-pong pair.
+    if (pipe.rayQueryCapable) {
+        const tlasBuf = buffers.get(rtState.currentTlas);
+        if (!tlasBuf) {
+            console.error("[crafter-wgpu] rayQuery dispatch but no TLAS built yet");
+            return;
+        }
+        const outView = state.outIsPing ? state.pingView : state.pongView;
+        const rtBG = device.createBindGroup({
+            layout: pipe.bgls[1],
+            entries: [
+                { binding: 0, resource: { buffer: tlasBuf } },
+                { binding: 1, resource: { buffer: rtState.bvhHeap.gpu } },
+                { binding: 2, resource: { buffer: rtState.meshRecordsBuffer } },
+                { binding: 3, resource: { buffer: rtState.vertHeap.gpu } },
+                { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
+                { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
+                { binding: 6, resource: outView },
+            ],
+        });
+        state.pass.setBindGroup(1, rtBG);
+    } else {
+        state.pass.setBindGroup(1, getGroup1BG(pipe.bgls[1]));
+    }
 
     // Walk bindings in declaration order and assemble bind groups.
     // handles[] from wasm is in the SAME order as customBindings, so we
@@ -873,6 +923,908 @@ env.wgpuInit = () => {
 // listens to. We trigger sizing on next frame begin; no work here.
 window.addEventListener("resize", () => { /* ensureSized in wgpuFrameBegin */ });
 
+// ─────────────────────────────────────────────────────────────────────
+// ── Software raytracing subsystem ────────────────────────────────────
+// ─────────────────────────────────────────────────────────────────────
+//
+// WebGPU has no hardware RT. The library emulates DXR/VKRT semantics in
+// compute: a megakernel raygen pipeline traverses a CPU-built BLAS BVH +
+// GPU-built TLAS, dispatches user closesthit / anyhit / miss via a
+// generated `switch`. The same traversal kernel is also exposed as a
+// rayQuery* function set for regular compute shaders (see the
+// rayQueryCapable path under wgpuLoadCustomShader).
+//
+// The four mesh data streams live in *shared* GPU heaps; each Mesh::Build
+// appends to them and gets a u32 handle back. The handle is what the
+// application stores in RTInstance::accelerationStructureReference.
+
+// ── WGSL library: shared types + constants (no bindings) ─────────────
+// Used by both the megakernel pipeline (which adds group(0..1) bindings)
+// and the TLAS-build pipeline (which only uses group(2)). Keeping bindings
+// out of the shared block avoids inflating storage-buffer count past the
+// 8-per-stage baseline limit on pipelines that don't actually use them.
+const rtWgslTypes = String.raw`
+struct RTDispatchHeader {
+    surfaceW:      u32,
+    surfaceH:      u32,
+    instanceCount: u32,
+    flags:         u32,
+};
+
+struct RayDesc {
+    origin:    vec3<f32>,
+    tMin:      f32,
+    direction: vec3<f32>,
+    tMax:      f32,
+};
+
+struct HitInfo {
+    t:             f32,
+    instanceId:    u32,
+    primitiveId:   u32,
+    hitGroupIndex: u32,
+    attribs:       vec2<f32>,
+    objectRayOrigin:    vec3<f32>,
+    objectRayDirection: vec3<f32>,
+    objectToWorldR0:    vec4<f32>,
+    objectToWorldR1:    vec4<f32>,
+    objectToWorldR2:    vec4<f32>,
+    customIndex:        u32,
+};
+
+// Matches Crafter::BVHNode in interfaces/Crafter.Graphics-Mesh.cppm.
+struct BVHNode {
+    aabbMin:           vec3<f32>,
+    firstChildOrPrim:  u32,
+    aabbMax:           vec3<f32>,
+    primCount:         u32,
+};
+
+// Per-mesh record. Indexed by RTInstance::accelerationStructureReference.
+struct MeshRecord {
+    rootAabbMin:     vec3<f32>,
+    vertexOffset:    u32,
+    rootAabbMax:     vec3<f32>,
+    indexOffset:     u32,
+    bvhOffset:       u32,
+    primRemapOffset: u32,
+    triangleCount:   u32,
+    _pad:            u32,
+};
+
+// Per-instance TLAS record built by the TLAS-build compute pass.
+struct TLASEntry {
+    aabbMin:          vec3<f32>,
+    maskHGOffset:     u32,
+    aabbMax:          vec3<f32>,
+    blasMeshIdx:      u32,
+    objectToWorldR0:  vec4<f32>,
+    objectToWorldR1:  vec4<f32>,
+    objectToWorldR2:  vec4<f32>,
+    worldToObjectR0:  vec4<f32>,
+    worldToObjectR1:  vec4<f32>,
+    worldToObjectR2:  vec4<f32>,
+    customIndex:      u32,
+    instanceFlags:    u32,
+    _pad0:            u32,
+    _pad1:            u32,
+};
+
+// ── Ray flag mirror of VkGeometryInstanceFlagBitsKHR + DXR ray flags ──
+const RT_FLAG_OPAQUE:                          u32 = 0x1u;
+const RT_FLAG_NO_OPAQUE:                       u32 = 0x2u;
+const RT_FLAG_TERMINATE_ON_FIRST_HIT:          u32 = 0x4u;
+const RT_FLAG_SKIP_CLOSEST_HIT:                u32 = 0x8u;
+const RT_FLAG_CULL_BACK_FACING_TRIANGLES:      u32 = 0x10u;
+const RT_FLAG_CULL_FRONT_FACING_TRIANGLES:     u32 = 0x20u;
+const RT_FLAG_CULL_OPAQUE:                     u32 = 0x40u;
+const RT_FLAG_CULL_NO_OPAQUE:                  u32 = 0x80u;
+const RT_FLAG_SKIP_TRIANGLES:                  u32 = 0x100u;
+const RT_FLAG_SKIP_AABBS:                      u32 = 0x200u;
+
+const RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE: u32 = 0x1u;
+const RT_INSTANCE_TRIANGLE_FLIP_FACING:         u32 = 0x2u;
+const RT_INSTANCE_FORCE_OPAQUE:                 u32 = 0x4u;
+const RT_INSTANCE_FORCE_NO_OPAQUE:              u32 = 0x8u;
+
+const RT_ANYHIT_ACCEPT:      u32 = 0u;
+const RT_ANYHIT_IGNORE:      u32 = 1u;
+const RT_ANYHIT_END_SEARCH:  u32 = 2u;
+
+const RT_INTERSECTION_NONE:     u32 = 0u;
+const RT_INTERSECTION_TRIANGLE: u32 = 1u;
+`;
+
+// Megakernel-only bindings. Concatenated after rtWgslTypes for the
+// raygen pipeline; the TLAS-build pipeline omits these because it doesn't
+// touch them — declaring them would push it past 8 storage buffers per
+// stage on the WebGPU baseline.
+const rtWgslMegakernelBindings = String.raw`
+@group(0) @binding(0) var<uniform>       hdr         : RTDispatchHeader;
+@group(1) @binding(0) var<storage,read>  tlasEntries : array<TLASEntry>;
+@group(1) @binding(1) var<storage,read>  bvhNodes    : array<BVHNode>;
+@group(1) @binding(2) var<storage,read>  meshRecords : array<MeshRecord>;
+@group(1) @binding(3) var<storage,read>  vertices    : array<f32>;
+@group(1) @binding(4) var<storage,read>  indices     : array<u32>;
+@group(1) @binding(5) var<storage,read>  primRemap   : array<u32>;
+@group(1) @binding(6) var outImage : texture_storage_2d<rgba8unorm, write>;
+`;
+
+const rtWgslPrelude = rtWgslTypes + rtWgslMegakernelBindings;
+
+// ── WGSL library: helpers + traverseBlas + traverseTlas + traceRay ───
+// Injected after the user-supplied closesthit/anyhit/miss sources +
+// mega-switch dispatchers (which PipelineRTWebGPU emits). User raygen
+// sources sit after this block so they can call traceRay.
+const rtWgslHelpers = String.raw`
+fn _rtFetchTri(meshRec: MeshRecord, triIndex: u32) -> array<vec3<f32>, 3> {
+    let baseIdx = meshRec.indexOffset + triIndex * 3u;
+    let i0 = indices[baseIdx + 0u];
+    let i1 = indices[baseIdx + 1u];
+    let i2 = indices[baseIdx + 2u];
+    let baseV = meshRec.vertexOffset;
+    let v0i = (baseV + i0) * 3u;
+    let v1i = (baseV + i1) * 3u;
+    let v2i = (baseV + i2) * 3u;
+    return array<vec3<f32>, 3>(
+        vec3<f32>(vertices[v0i + 0u], vertices[v0i + 1u], vertices[v0i + 2u]),
+        vec3<f32>(vertices[v1i + 0u], vertices[v1i + 1u], vertices[v1i + 2u]),
+        vec3<f32>(vertices[v2i + 0u], vertices[v2i + 1u], vertices[v2i + 2u]),
+    );
+}
+
+fn _rtAabb(ro: vec3<f32>, invRd: vec3<f32>, mn: vec3<f32>, mx: vec3<f32>, tMax: f32) -> bool {
+    let t0 = (mn - ro) * invRd;
+    let t1 = (mx - ro) * invRd;
+    let tmin = min(t0, t1);
+    let tmax = max(t0, t1);
+    let tEnter = max(max(tmin.x, tmin.y), tmin.z);
+    let tExit  = min(min(tmax.x, tmax.y), tmax.z);
+    return tExit >= max(tEnter, 0.0) && tEnter <= tMax;
+}
+
+struct _RtTriHit { hit: bool, t: f32, u: f32, v: f32 };
+fn _rtTri(ro: vec3<f32>, rd: vec3<f32>, p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>,
+          tMin: f32, tMax: f32) -> _RtTriHit {
+    var r: _RtTriHit;
+    r.hit = false;
+    let e1 = p1 - p0;
+    let e2 = p2 - p0;
+    let h = cross(rd, e2);
+    let a = dot(e1, h);
+    if (abs(a) < 1e-8) { return r; }
+    let f = 1.0 / a;
+    let s = ro - p0;
+    let u = f * dot(s, h);
+    if (u < 0.0 || u > 1.0) { return r; }
+    let q = cross(s, e1);
+    let v = f * dot(rd, q);
+    if (v < 0.0 || u + v > 1.0) { return r; }
+    let t = f * dot(e2, q);
+    if (t < tMin || t > tMax) { return r; }
+    r.hit = true; r.t = t; r.u = u; r.v = v;
+    return r;
+}
+
+// Iterative stack-based BLAS traversal. Returns true if traversal was
+// terminated by an END_SEARCH from anyhit (caller should stop entirely).
+fn _rtTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
+                   instanceId: u32, hitGroupBase: u32,
+                   bestHit: ptr<function, HitInfo>,
+                   bestT:   ptr<function, f32>,
+                   payload: ptr<function, Payload>) -> bool {
+    let invD = vec3<f32>(1.0) / rayObj.direction;
+    var stack: array<u32, 32>;
+    var sp: u32 = 0u;
+    var nodeRel: u32 = 0u;
+
+    loop {
+        let abs = meshRec.bvhOffset + nodeRel;
+        let node = bvhNodes[abs];
+        if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, *bestT)) {
+            if (sp == 0u) { break; }
+            sp = sp - 1u; nodeRel = stack[sp]; continue;
+        }
+        if (node.primCount > 0u) {
+            for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
+                let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
+                let verts = _rtFetchTri(meshRec, triIndex);
+                let tr = _rtTri(rayObj.origin, rayObj.direction,
+                                verts[0], verts[1], verts[2],
+                                rayObj.tMin, *bestT);
+                if (!tr.hit) { continue; }
+
+                let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
+                let facing = dot(geomNormal, rayObj.direction);
+                if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES) != 0u && facing > 0.0) { continue; }
+                if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
+
+                var candidate: HitInfo;
+                candidate.t             = tr.t;
+                candidate.instanceId    = instanceId;
+                candidate.primitiveId   = triIndex;
+                candidate.hitGroupIndex = hitGroupBase;
+                candidate.attribs       = vec2<f32>(tr.u, tr.v);
+                candidate.objectRayOrigin    = rayObj.origin;
+                candidate.objectRayDirection = rayObj.direction;
+
+                let opaque = (flags & RT_FLAG_OPAQUE) != 0u
+                          || (flags & RT_FLAG_NO_OPAQUE) == 0u;  // default opaque
+
+                if (opaque) {
+                    *bestHit = candidate;
+                    *bestT   = tr.t;
+                    if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
+                } else {
+                    let r = runAnyHit(hitGroupBase, rayObj, candidate, payload);
+                    if (r == RT_ANYHIT_END_SEARCH) {
+                        *bestHit = candidate;
+                        *bestT   = tr.t;
+                        return true;
+                    }
+                    if (r == RT_ANYHIT_ACCEPT) {
+                        *bestHit = candidate;
+                        *bestT   = tr.t;
+                        if ((flags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
+                    }
+                }
+            }
+            if (sp == 0u) { break; }
+            sp = sp - 1u; nodeRel = stack[sp]; continue;
+        }
+        // inner node — push right, descend left
+        let left  = node.firstChildOrPrim;
+        let right = left + 1u;
+        if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
+        nodeRel = left;
+    }
+    return false;
+}
+
+fn _rtTraverseTlas(rayWorld: RayDesc, flags: u32, cullMask: u32,
+                   sbtRecordOffset: u32, sbtRecordStride: u32,
+                   bestHit: ptr<function, HitInfo>,
+                   bestT:   ptr<function, f32>,
+                   payload: ptr<function, Payload>) -> bool {
+    let invD = vec3<f32>(1.0) / rayWorld.direction;
+    let n = hdr.instanceCount;
+    for (var i: u32 = 0u; i < n; i = i + 1u) {
+        let inst = tlasEntries[i];
+        let instanceMask = inst.maskHGOffset & 0xFFu;
+        if ((instanceMask & cullMask) == 0u) { continue; }
+        if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, *bestT)) { continue; }
+
+        // Transform ray to object space.
+        let r0 = inst.worldToObjectR0;
+        let r1 = inst.worldToObjectR1;
+        let r2 = inst.worldToObjectR2;
+        var rayObj: RayDesc;
+        rayObj.origin = vec3<f32>(
+            dot(r0.xyz, rayWorld.origin) + r0.w,
+            dot(r1.xyz, rayWorld.origin) + r1.w,
+            dot(r2.xyz, rayWorld.origin) + r2.w,
+        );
+        rayObj.direction = vec3<f32>(
+            dot(r0.xyz, rayWorld.direction),
+            dot(r1.xyz, rayWorld.direction),
+            dot(r2.xyz, rayWorld.direction),
+        );
+        rayObj.tMin = rayWorld.tMin;
+        rayObj.tMax = *bestT;
+
+        var effective = flags;
+        let iflags = inst.instanceFlags;
+        if ((iflags & RT_INSTANCE_FORCE_OPAQUE) != 0u) {
+            effective = (effective | RT_FLAG_OPAQUE) & ~RT_FLAG_NO_OPAQUE;
+        }
+        if ((iflags & RT_INSTANCE_FORCE_NO_OPAQUE) != 0u) {
+            effective = (effective | RT_FLAG_NO_OPAQUE) & ~RT_FLAG_OPAQUE;
+        }
+        if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
+            effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
+        }
+
+        let hitGroupOffset = inst.maskHGOffset >> 8u;
+        let hitGroupBase   = sbtRecordOffset + hitGroupOffset;
+        let meshRec        = meshRecords[inst.blasMeshIdx];
+
+        let pre = *bestT;
+        let endSearch = _rtTraverseBlas(rayObj, effective, meshRec, i, hitGroupBase,
+                                        bestHit, bestT, payload);
+        if (endSearch) { return true; }
+        if ((*bestT) < pre) {
+            // record world-space object-to-world for the closest-hit shader
+            (*bestHit).objectToWorldR0 = inst.objectToWorldR0;
+            (*bestHit).objectToWorldR1 = inst.objectToWorldR1;
+            (*bestHit).objectToWorldR2 = inst.objectToWorldR2;
+            (*bestHit).customIndex     = inst.customIndex;
+            if ((effective & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u) { return true; }
+        }
+    }
+    return false;
+}
+
+fn traceRay(tlasIdx: u32, flags: u32, cullMask: u32,
+            sbtRecordOffset: u32, sbtRecordStride: u32, missIndex: u32,
+            rayOrigin: vec3<f32>, rayTMin: f32,
+            rayDir:    vec3<f32>, rayTMax: f32,
+            payload: ptr<function, Payload>) {
+    var ray: RayDesc;
+    ray.origin    = rayOrigin;
+    ray.direction = rayDir;
+    ray.tMin      = rayTMin;
+    ray.tMax      = rayTMax;
+    var bestHit: HitInfo;
+    bestHit.t = rayTMax;
+    var bestT = rayTMax;
+    let ended = _rtTraverseTlas(ray, flags, cullMask & 0xFFu,
+                                sbtRecordOffset, sbtRecordStride,
+                                &bestHit, &bestT, payload);
+    if (bestT < rayTMax) {
+        if ((flags & RT_FLAG_SKIP_CLOSEST_HIT) == 0u) {
+            runClosestHit(bestHit.hitGroupIndex, ray, bestHit, payload);
+        }
+    } else {
+        runMiss(missIndex, ray, payload);
+    }
+}
+`;
+
+// ── WGSL library: rayQuery API for non-megakernel compute shaders ────
+//
+// Mirrors GL_EXT_ray_query semantics that 3DForts's physics shaders use
+// (projectile-collide, splash, builder-pick). User WGSL:
+//   var rq: RayQuery;
+//   rayQueryInitialize(&rq, 0u, flags, mask, origin, tMin, dir, tMax);
+//   while (rayQueryProceed(&rq)) {}    // run traversal to completion
+//   if (rayQueryGetCommittedIntersectionType(&rq) != RT_INTERSECTION_NONE) {
+//       let t = rayQueryGetCommittedT(&rq);
+//       ...
+//   }
+//
+// v1 simplification: traversal force-opaques every hit (no anyhit). The
+// user can still test for triangle vs miss and read t/instance/bary.
+// Anyhit-style candidate inspection is a future extension.
+const rtWgslRayQueryLib = String.raw`
+struct RayQuery {
+    ray: RayDesc,
+    flags: u32,
+    cullMask: u32,
+    committedType:                u32,
+    committedT:                   f32,
+    committedInstanceId:          u32,
+    committedInstanceCustomIndex: u32,
+    committedPrimitiveIndex:      u32,
+    committedBarycentrics:        vec2<f32>,
+    committedObjectRayOrigin:     vec3<f32>,
+    committedObjectRayDirection:  vec3<f32>,
+    committedObjectToWorldR0:     vec4<f32>,
+    committedObjectToWorldR1:     vec4<f32>,
+    committedObjectToWorldR2:     vec4<f32>,
+    committedWorldToObjectR0:     vec4<f32>,
+    committedWorldToObjectR1:     vec4<f32>,
+    committedWorldToObjectR2:     vec4<f32>,
+    done: u32,
+};
+
+fn _rqTraverseBlas(rayObj: RayDesc, flags: u32, meshRec: MeshRecord,
+                   instanceId: u32, customIndex: u32,
+                   inst: TLASEntry,
+                   rq: ptr<function, RayQuery>) {
+    let invD = vec3<f32>(1.0) / rayObj.direction;
+    var stack: array<u32, 32>;
+    var sp: u32 = 0u;
+    var nodeRel: u32 = 0u;
+
+    loop {
+        let abs = meshRec.bvhOffset + nodeRel;
+        let node = bvhNodes[abs];
+        if (!_rtAabb(rayObj.origin, invD, node.aabbMin, node.aabbMax, (*rq).committedT)) {
+            if (sp == 0u) { break; }
+            sp = sp - 1u; nodeRel = stack[sp]; continue;
+        }
+        if (node.primCount > 0u) {
+            for (var i: u32 = 0u; i < node.primCount; i = i + 1u) {
+                let triIndex = primRemap[meshRec.primRemapOffset + node.firstChildOrPrim + i];
+                let verts = _rtFetchTri(meshRec, triIndex);
+                let tr = _rtTri(rayObj.origin, rayObj.direction,
+                                verts[0], verts[1], verts[2],
+                                rayObj.tMin, (*rq).committedT);
+                if (!tr.hit) { continue; }
+
+                let geomNormal = cross(verts[1] - verts[0], verts[2] - verts[0]);
+                let facing = dot(geomNormal, rayObj.direction);
+                if ((flags & RT_FLAG_CULL_BACK_FACING_TRIANGLES)  != 0u && facing > 0.0) { continue; }
+                if ((flags & RT_FLAG_CULL_FRONT_FACING_TRIANGLES) != 0u && facing < 0.0) { continue; }
+
+                (*rq).committedType                = RT_INTERSECTION_TRIANGLE;
+                (*rq).committedT                   = tr.t;
+                (*rq).committedInstanceId          = instanceId;
+                (*rq).committedInstanceCustomIndex = customIndex;
+                (*rq).committedPrimitiveIndex      = triIndex;
+                (*rq).committedBarycentrics        = vec2<f32>(tr.u, tr.v);
+                (*rq).committedObjectRayOrigin     = rayObj.origin;
+                (*rq).committedObjectRayDirection  = rayObj.direction;
+                (*rq).committedObjectToWorldR0     = inst.objectToWorldR0;
+                (*rq).committedObjectToWorldR1     = inst.objectToWorldR1;
+                (*rq).committedObjectToWorldR2     = inst.objectToWorldR2;
+                (*rq).committedWorldToObjectR0     = inst.worldToObjectR0;
+                (*rq).committedWorldToObjectR1     = inst.worldToObjectR1;
+                (*rq).committedWorldToObjectR2     = inst.worldToObjectR2;
+            }
+            if (sp == 0u) { break; }
+            sp = sp - 1u; nodeRel = stack[sp]; continue;
+        }
+        let left  = node.firstChildOrPrim;
+        let right = left + 1u;
+        if (sp < 32u) { stack[sp] = right; sp = sp + 1u; }
+        nodeRel = left;
+    }
+}
+
+fn _rqTraverseTlas(rq: ptr<function, RayQuery>) {
+    let rayWorld = (*rq).ray;
+    let invD = vec3<f32>(1.0) / rayWorld.direction;
+    let n = hdr.instanceCount;
+    let cullMask = (*rq).cullMask;
+    let rayFlags = (*rq).flags;
+    for (var i: u32 = 0u; i < n; i = i + 1u) {
+        let inst = tlasEntries[i];
+        let instanceMask = inst.maskHGOffset & 0xFFu;
+        if ((instanceMask & cullMask) == 0u) { continue; }
+        if (!_rtAabb(rayWorld.origin, invD, inst.aabbMin, inst.aabbMax, (*rq).committedT)) { continue; }
+
+        let r0 = inst.worldToObjectR0;
+        let r1 = inst.worldToObjectR1;
+        let r2 = inst.worldToObjectR2;
+        var rayObj: RayDesc;
+        rayObj.origin = vec3<f32>(
+            dot(r0.xyz, rayWorld.origin) + r0.w,
+            dot(r1.xyz, rayWorld.origin) + r1.w,
+            dot(r2.xyz, rayWorld.origin) + r2.w,
+        );
+        rayObj.direction = vec3<f32>(
+            dot(r0.xyz, rayWorld.direction),
+            dot(r1.xyz, rayWorld.direction),
+            dot(r2.xyz, rayWorld.direction),
+        );
+        rayObj.tMin = rayWorld.tMin;
+        rayObj.tMax = (*rq).committedT;
+
+        var effective = rayFlags;
+        let iflags = inst.instanceFlags;
+        if ((iflags & RT_INSTANCE_TRIANGLE_FACING_CULL_DISABLE) != 0u) {
+            effective = effective & ~(RT_FLAG_CULL_BACK_FACING_TRIANGLES | RT_FLAG_CULL_FRONT_FACING_TRIANGLES);
+        }
+
+        let meshRec = meshRecords[inst.blasMeshIdx];
+        _rqTraverseBlas(rayObj, effective, meshRec, i, inst.customIndex, inst, rq);
+
+        if ((rayFlags & RT_FLAG_TERMINATE_ON_FIRST_HIT) != 0u
+            && (*rq).committedType != RT_INTERSECTION_NONE) {
+            return;
+        }
+    }
+}
+
+fn rayQueryInitialize(rq: ptr<function, RayQuery>, tlasIdx: u32, flags: u32, cullMask: u32,
+                      origin: vec3<f32>, tMin: f32, direction: vec3<f32>, tMax: f32) {
+    (*rq).ray.origin    = origin;
+    (*rq).ray.tMin      = tMin;
+    (*rq).ray.direction = direction;
+    (*rq).ray.tMax      = tMax;
+    (*rq).flags         = flags;
+    (*rq).cullMask      = cullMask & 0xFFu;
+    (*rq).committedType = RT_INTERSECTION_NONE;
+    (*rq).committedT    = tMax;
+    (*rq).done          = 0u;
+}
+
+fn rayQueryProceed(rq: ptr<function, RayQuery>) -> bool {
+    if ((*rq).done != 0u) { return false; }
+    _rqTraverseTlas(rq);
+    (*rq).done = 1u;
+    return false;
+}
+
+fn rayQueryTerminate(rq: ptr<function, RayQuery>) {
+    (*rq).done = 1u;
+}
+
+fn rayQueryGetCommittedIntersectionType(rq: ptr<function, RayQuery>)         -> u32        { return (*rq).committedType; }
+fn rayQueryGetCommittedT(rq: ptr<function, RayQuery>)                        -> f32        { return (*rq).committedT; }
+fn rayQueryGetCommittedInstanceId(rq: ptr<function, RayQuery>)               -> u32        { return (*rq).committedInstanceId; }
+fn rayQueryGetCommittedInstanceCustomIndex(rq: ptr<function, RayQuery>)      -> u32        { return (*rq).committedInstanceCustomIndex; }
+fn rayQueryGetCommittedPrimitiveIndex(rq: ptr<function, RayQuery>)           -> u32        { return (*rq).committedPrimitiveIndex; }
+fn rayQueryGetCommittedBarycentrics(rq: ptr<function, RayQuery>)             -> vec2<f32>  { return (*rq).committedBarycentrics; }
+fn rayQueryGetCommittedObjectRayOrigin(rq: ptr<function, RayQuery>)          -> vec3<f32>  { return (*rq).committedObjectRayOrigin; }
+fn rayQueryGetCommittedObjectRayDirection(rq: ptr<function, RayQuery>)       -> vec3<f32>  { return (*rq).committedObjectRayDirection; }
+fn rayQueryGetCommittedWorldPosition(rq: ptr<function, RayQuery>)            -> vec3<f32>  {
+    return (*rq).ray.origin + (*rq).ray.direction * (*rq).committedT;
+}
+`;
+
+// ── Internal compute pipeline: builds TLASEntry[] from the RTInstance[]
+// + the meshRecords table. One thread per instance.
+// Uses only rtWgslTypes (no megakernel bindings) so it stays well under
+// the 8-storage-buffer-per-stage baseline limit.
+const tlasBuildWgsl = rtWgslTypes /* needs MeshRecord + TLASEntry */ + String.raw`
+struct RTInstance {
+    transformR0:        vec4<f32>,
+    transformR1:        vec4<f32>,
+    transformR2:        vec4<f32>,
+    customIndexMask:    u32,   // customIndex: low 24, mask: high 8
+    sbtFlags:           u32,   // sbtOffset: low 24, flags: high 8
+    accelStructureRef:  vec2<u32>,
+};
+
+@group(0) @binding(0) var<storage, read>       inInstances : array<RTInstance>;
+@group(0) @binding(1) var<storage, read>       inMeshes    : array<MeshRecord>;
+@group(0) @binding(2) var<storage, read_write> outEntries  : array<TLASEntry>;
+
+fn _invMat3(c0: vec3<f32>, c1: vec3<f32>, c2: vec3<f32>) -> array<vec3<f32>, 3> {
+    let m00 = c1.y*c2.z - c1.z*c2.y;
+    let m01 = c1.z*c2.x - c1.x*c2.z;
+    let m02 = c1.x*c2.y - c1.y*c2.x;
+    let det = c0.x*m00 + c0.y*m01 + c0.z*m02;
+    let inv = 1.0 / det;
+    return array<vec3<f32>, 3>(
+        vec3<f32>(m00,                       c0.z*c2.y - c0.y*c2.z, c0.y*c1.z - c0.z*c1.y) * inv,
+        vec3<f32>(m01,                       c0.x*c2.z - c0.z*c2.x, c0.z*c1.x - c0.x*c1.z) * inv,
+        vec3<f32>(m02,                       c0.y*c2.x - c0.x*c2.y, c0.x*c1.y - c0.y*c1.x) * inv,
+    );
+}
+
+@compute @workgroup_size(64, 1, 1)
+fn tlasBuildMain(@builtin(global_invocation_id) gid: vec3<u32>) {
+    let i = gid.x;
+    if (i >= arrayLength(&inInstances)) { return; }
+    let inst = inInstances[i];
+    let meshIdx    = inst.accelStructureRef.x;
+    let custom     = inst.customIndexMask & 0xFFFFFFu;
+    let mask       = (inst.customIndexMask >> 24u) & 0xFFu;
+    let sbtOffset  = inst.sbtFlags & 0xFFFFFFu;
+    let iflags     = (inst.sbtFlags >> 24u) & 0xFFu;
+
+    let rec = inMeshes[meshIdx];
+    // 8 corners → world AABB.
+    var worldMin = vec3<f32>(1e30);
+    var worldMax = vec3<f32>(-1e30);
+    for (var c: u32 = 0u; c < 8u; c = c + 1u) {
+        let corner = vec3<f32>(
+            select(rec.rootAabbMin.x, rec.rootAabbMax.x, (c & 1u) != 0u),
+            select(rec.rootAabbMin.y, rec.rootAabbMax.y, (c & 2u) != 0u),
+            select(rec.rootAabbMin.z, rec.rootAabbMax.z, (c & 4u) != 0u),
+        );
+        let wc = vec3<f32>(
+            dot(inst.transformR0.xyz, corner) + inst.transformR0.w,
+            dot(inst.transformR1.xyz, corner) + inst.transformR1.w,
+            dot(inst.transformR2.xyz, corner) + inst.transformR2.w,
+        );
+        worldMin = min(worldMin, wc);
+        worldMax = max(worldMax, wc);
+    }
+
+    // Inverse 3x4 affine.
+    let inv = _invMat3(inst.transformR0.xyz, inst.transformR1.xyz, inst.transformR2.xyz);
+    let T = vec3<f32>(inst.transformR0.w, inst.transformR1.w, inst.transformR2.w);
+    let invT = vec3<f32>(-dot(inv[0], T), -dot(inv[1], T), -dot(inv[2], T));
+
+    var e: TLASEntry;
+    e.aabbMin = worldMin;
+    e.aabbMax = worldMax;
+    e.maskHGOffset = mask | (sbtOffset << 8u);
+    e.blasMeshIdx  = meshIdx;
+    e.objectToWorldR0 = inst.transformR0;
+    e.objectToWorldR1 = inst.transformR1;
+    e.objectToWorldR2 = inst.transformR2;
+    e.worldToObjectR0 = vec4<f32>(inv[0], invT.x);
+    e.worldToObjectR1 = vec4<f32>(inv[1], invT.y);
+    e.worldToObjectR2 = vec4<f32>(inv[2], invT.z);
+    e.customIndex     = custom;
+    e.instanceFlags   = iflags;
+    outEntries[i]     = e;
+}
+`;
+
+// ── RT runtime state ──────────────────────────────────────────────────
+// Mesh heaps grow geometrically; each Mesh::Build appends + records its
+// offsets in meshRecordsCpu/Buffer. nextMeshHandle is what gets returned
+// to C++ as RTInstance::accelerationStructureReference.
+const RT_HEAP_INITIAL_BYTES = 64 * 1024;
+function makeRtHeap() {
+    return {
+        gpu: device.createBuffer({
+            size: RT_HEAP_INITIAL_BYTES,
+            usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
+        }),
+        capacity: RT_HEAP_INITIAL_BYTES,
+        cursor:   0,
+    };
+}
+function rtHeapEnsure(h, neededBytes) {
+    if (h.cursor + neededBytes <= h.capacity) return;
+    let cap = h.capacity;
+    while (h.cursor + neededBytes > cap) cap *= 2;
+    const ng = device.createBuffer({
+        size: cap,
+        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
+    });
+    const enc = device.createCommandEncoder();
+    if (h.cursor > 0) enc.copyBufferToBuffer(h.gpu, 0, ng, 0, h.cursor);
+    queue.submit([enc.finish()]);
+    h.gpu.destroy();
+    h.gpu = ng;
+    h.capacity = cap;
+    // Invalidate cached bind groups that referenced the heap.
+    rtState.bindGroupCache.clear();
+}
+
+const rtState = {
+    vertHeap: null,        // f32 stream (3 floats per vertex)
+    indexHeap: null,       // u32 stream
+    bvhHeap: null,         // BVHNode stream (32 bytes per node)
+    primRemapHeap: null,   // u32 stream
+
+    meshRecordsBuffer: null,    // GPUBuffer of MeshRecord[]
+    meshRecordsCapacity: 0,
+    nextMeshHandle: 1,
+
+    rtHeader: null,             // uniform buffer for RTDispatchHeader (256 B aligned)
+
+    bindGroupCache: new Map(),  // key → bind group
+
+    tlasBuildPipeline: null,
+    tlasBuildBgl: null,
+
+    // Latest TLAS buffer handle from wgpuBuildTLAS, used by rayQuery-capable
+    // compute shaders at dispatch time.
+    currentTlas: 0,
+    currentTlasInstanceCount: 0,
+};
+
+function rtInit() {
+    rtState.vertHeap      = makeRtHeap();
+    rtState.indexHeap     = makeRtHeap();
+    rtState.bvhHeap       = makeRtHeap();
+    rtState.primRemapHeap = makeRtHeap();
+    rtState.meshRecordsCapacity = 16;
+    rtState.meshRecordsBuffer = device.createBuffer({
+        size: rtState.meshRecordsCapacity * 48,
+        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+    });
+    rtState.rtHeader = device.createBuffer({
+        size: 256,
+        usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
+    });
+
+    // TLAS-build compute pipeline. Only group(0) is used (3 SSBOs).
+    const mod = device.createShaderModule({ code: tlasBuildWgsl, label: "rt-tlas-build" });
+    const tlasBuildBgl = device.createBindGroupLayout({ entries: [
+        { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+        { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+        { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
+    ]});
+    rtState.tlasBuildBgl = tlasBuildBgl;
+    rtState.tlasBuildPipeline = device.createComputePipeline({
+        layout: device.createPipelineLayout({ bindGroupLayouts: [tlasBuildBgl] }),
+        compute: { module: mod, entryPoint: "tlasBuildMain" },
+    });
+}
+
+function rtMeshRecordsEnsure(meshCount) {
+    if (meshCount <= rtState.meshRecordsCapacity) return;
+    let cap = rtState.meshRecordsCapacity;
+    while (cap < meshCount) cap *= 2;
+    const ng = device.createBuffer({
+        size: cap * 48,
+        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+    });
+    const enc = device.createCommandEncoder();
+    enc.copyBufferToBuffer(rtState.meshRecordsBuffer, 0, ng, 0,
+                           rtState.meshRecordsCapacity * 48);
+    queue.submit([enc.finish()]);
+    rtState.meshRecordsBuffer.destroy();
+    rtState.meshRecordsBuffer = ng;
+    rtState.meshRecordsCapacity = cap;
+    rtState.bindGroupCache.clear();
+}
+
+env.wgpuRegisterMeshBLAS = (minX, minY, minZ, maxX, maxY, maxZ,
+                            verticesPtr, vertexCount,
+                            indicesPtr,  indexCount,
+                            bvhNodesPtr, bvhNodeCount,
+                            primRemapPtr, primRemapCount) => {
+    if (!rtState.vertHeap) rtInit();
+
+    const vBytes   = vertexCount  * 12;
+    const iBytes   = indexCount   * 4;
+    const nBytes   = bvhNodeCount * 32;
+    const rBytes   = primRemapCount * 4;
+
+    rtHeapEnsure(rtState.vertHeap,      vBytes);
+    rtHeapEnsure(rtState.indexHeap,     iBytes);
+    rtHeapEnsure(rtState.bvhHeap,       nBytes);
+    rtHeapEnsure(rtState.primRemapHeap, rBytes);
+
+    const vOff = rtState.vertHeap.cursor      / 12;   // in vec3 units
+    const iOff = rtState.indexHeap.cursor     / 4;    // in u32 units
+    const nOff = rtState.bvhHeap.cursor       / 32;   // in BVHNode units
+    const rOff = rtState.primRemapHeap.cursor / 4;
+
+    // queue.writeBuffer requires multiple-of-4 sizes. Vertex byte count is
+    // already 12*n; index/bvh/remap are 4*n / 32*n / 4*n — all multiples of 4.
+    queue.writeBuffer(rtState.vertHeap.gpu,      rtState.vertHeap.cursor,
+                      memU8().buffer, verticesPtr, vBytes);
+    queue.writeBuffer(rtState.indexHeap.gpu,     rtState.indexHeap.cursor,
+                      memU8().buffer, indicesPtr,  iBytes);
+    queue.writeBuffer(rtState.bvhHeap.gpu,       rtState.bvhHeap.cursor,
+                      memU8().buffer, bvhNodesPtr, nBytes);
+    queue.writeBuffer(rtState.primRemapHeap.gpu, rtState.primRemapHeap.cursor,
+                      memU8().buffer, primRemapPtr, rBytes);
+
+    rtState.vertHeap.cursor      += vBytes;
+    rtState.indexHeap.cursor     += iBytes;
+    rtState.bvhHeap.cursor       += nBytes;
+    rtState.primRemapHeap.cursor += rBytes;
+
+    const handle = rtState.nextMeshHandle++;
+    rtMeshRecordsEnsure(handle + 1);
+
+    // Build the MeshRecord (48 bytes) and write it.
+    const rec = new ArrayBuffer(48);
+    const f32 = new Float32Array(rec);
+    const u32 = new Uint32Array(rec);
+    f32[0] = minX; f32[1] = minY; f32[2] = minZ;
+    u32[3] = vOff;
+    f32[4] = maxX; f32[5] = maxY; f32[6] = maxZ;
+    u32[7] = iOff;
+    u32[8] = nOff;
+    u32[9] = rOff;
+    u32[10] = (vertexCount > 0) ? (indexCount / 3) : 0;
+    u32[11] = 0;
+    queue.writeBuffer(rtState.meshRecordsBuffer, handle * 48, rec);
+
+    return handle;
+};
+
+env.wgpuBuildTLAS = (instanceBufHandle, instanceCount, tlasOutBufHandle) => {
+    if (!rtState.tlasBuildPipeline) return;
+    const inst = buffers.get(instanceBufHandle);
+    const out  = buffers.get(tlasOutBufHandle);
+    if (!inst || !out) {
+        console.error("[crafter-wgpu] wgpuBuildTLAS: unknown buffer handle");
+        return;
+    }
+
+    const bg = device.createBindGroup({
+        layout: rtState.tlasBuildBgl,
+        entries: [
+            { binding: 0, resource: { buffer: inst } },
+            { binding: 1, resource: { buffer: rtState.meshRecordsBuffer } },
+            { binding: 2, resource: { buffer: out } },
+        ],
+    });
+
+    if (state.pass) {
+        // Mid-frame — close the user's compute pass, run our build pass
+        // on the same encoder, then reopen.
+        state.pass.end();
+        state.pass = null;
+    }
+    const enc = state.encoder || device.createCommandEncoder();
+    const ownEncoder = !state.encoder;
+    const pass = enc.beginComputePass({ label: "tlas-build" });
+    pass.setPipeline(rtState.tlasBuildPipeline);
+    pass.setBindGroup(0, bg);
+    const groups = Math.ceil(instanceCount / 64);
+    pass.dispatchWorkgroups(groups, 1, 1);
+    pass.end();
+    if (ownEncoder) {
+        queue.submit([enc.finish()]);
+    } else {
+        state.pass = state.encoder.beginComputePass();
+    }
+
+    // Publish so rayQuery-capable compute pipelines pick up the latest TLAS
+    // without each dispatch having to thread the handle explicitly.
+    rtState.currentTlas = tlasOutBufHandle;
+    rtState.currentTlasInstanceCount = instanceCount;
+};
+
+// RT pipeline loader — wraps user-supplied WGSL (sources + generated mega
+// switches + raygen + @compute entry) with the library prelude/helpers.
+const rtPipelines = new Map(); // handle → { pipeline, bgls }
+
+env.wgpuLoadRTPipeline = (wgslPtr, wgslLen) => {
+    if (!rtState.vertHeap) rtInit();
+    const userPart = new TextDecoder().decode(memU8().subarray(wgslPtr, wgslPtr + wgslLen));
+
+    // Insert helpers at the marker; prepend prelude.
+    const marker = "// @CRAFTER_RT_LIBRARY_HELPERS_HERE";
+    let beforeHelpers = userPart;
+    let afterHelpers = "";
+    const mi = userPart.indexOf(marker);
+    if (mi >= 0) {
+        beforeHelpers = userPart.substring(0, mi);
+        afterHelpers  = userPart.substring(mi + marker.length);
+    }
+    const fullWgsl = rtWgslPrelude + "\n" + beforeHelpers + "\n" + rtWgslHelpers + "\n" + afterHelpers;
+
+    let pipeline;
+    try {
+        const mod = device.createShaderModule({ code: fullWgsl, label: "rt-megakernel" });
+        // RTDispatchHeader is 16 bytes; bind exactly that.
+        const headerBgl = device.createBindGroupLayout({ entries: [
+            { binding: 0, visibility: GPUShaderStage.COMPUTE,
+              buffer: { type: "uniform", minBindingSize: 16 } },
+        ]});
+        const dataBgl = device.createBindGroupLayout({ entries: [
+            { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+            { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+            { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+            { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+            { binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+            { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
+            { binding: 6, visibility: GPUShaderStage.COMPUTE,
+              storageTexture: { format: "rgba8unorm", access: "write-only", viewDimension: "2d" } },
+        ]});
+        pipeline = device.createComputePipeline({
+            layout: device.createPipelineLayout({ bindGroupLayouts: [headerBgl, dataBgl] }),
+            compute: { module: mod, entryPoint: "main" },
+        });
+        const handle = newHandle();
+        rtPipelines.set(handle, { pipeline, headerBgl, dataBgl });
+        return handle;
+    } catch (e) {
+        console.error("[crafter-wgpu] RT pipeline compile failed:", e);
+        console.error("[crafter-wgpu] WGSL was:\n", fullWgsl);
+        return 0;
+    }
+};
+
+env.wgpuDispatchRT = (pipelineHandle, pushPtr, pushBytes,
+                      tlasBufHandle, instanceCount, gx, gy) => {
+    if (!state.pass) return;
+    const pipe = rtPipelines.get(pipelineHandle);
+    const tlas = buffers.get(tlasBufHandle);
+    if (!pipe || !tlas) {
+        console.error("[crafter-wgpu] wgpuDispatchRT: unknown pipeline or tlas");
+        return;
+    }
+    // Write RT header from push data (first 16 bytes). Surface dims + instance count + flags.
+    const hdr32 = new Uint32Array(4);
+    hdr32[0] = state.width;
+    hdr32[1] = state.height;
+    hdr32[2] = instanceCount;
+    hdr32[3] = 0;
+    queue.writeBuffer(rtState.rtHeader, 0, hdr32);
+
+    const headerBg = device.createBindGroup({
+        layout: pipe.headerBgl,
+        entries: [{ binding: 0, resource: { buffer: rtState.rtHeader, offset: 0, size: 16 } }],
+    });
+    const outView = state.outIsPing ? state.pingView : state.pongView;
+    const dataBg = device.createBindGroup({
+        layout: pipe.dataBgl,
+        entries: [
+            { binding: 0, resource: { buffer: tlas } },
+            { binding: 1, resource: { buffer: rtState.bvhHeap.gpu } },
+            { binding: 2, resource: { buffer: rtState.meshRecordsBuffer } },
+            { binding: 3, resource: { buffer: rtState.vertHeap.gpu } },
+            { binding: 4, resource: { buffer: rtState.indexHeap.gpu } },
+            { binding: 5, resource: { buffer: rtState.primRemapHeap.gpu } },
+            { binding: 6, resource: outView },
+        ],
+    });
+
+    state.pass.setPipeline(pipe.pipeline);
+    state.pass.setBindGroup(0, headerBg);
+    state.pass.setBindGroup(1, dataBg);
+    state.pass.dispatchWorkgroups(gx, gy, 1);
+    state.outIsPing = !state.outIsPing;
+};
+
 console.log("[crafter-wgpu] init complete; env handlers wired");
 } catch (e) {
     // Capture any throw so the stub error messages name the real cause
diff --git a/examples/VulkanTriangle/closesthit.wgsl b/examples/VulkanTriangle/closesthit.wgsl
new file mode 100644
index 0000000..e0d29ae
--- /dev/null
+++ b/examples/VulkanTriangle/closesthit.wgsl
@@ -0,0 +1,12 @@
+// WebGPU port of closesthit.glsl. Library concatenates this BEFORE the
+// library helpers, so `Payload` declared here is visible to traceRay,
+// runClosestHit, the mega-switch, and the user's raygen source.
+
+struct Payload {
+    color: vec3<f32>,
+};
+
+fn closesthit_main(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {
+    let bary = vec3<f32>(1.0 - hit.attribs.x - hit.attribs.y, hit.attribs.x, hit.attribs.y);
+    (*payload).color = bary;
+}
diff --git a/examples/VulkanTriangle/main.cpp b/examples/VulkanTriangle/main.cpp
index 1c5184c..3fcd287 100644
--- a/examples/VulkanTriangle/main.cpp
+++ b/examples/VulkanTriangle/main.cpp
@@ -1,4 +1,6 @@
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
 #include "vulkan/vulkan.h"
+#endif
 #include <cassert>
 
 import Crafter.Graphics;
@@ -7,7 +9,7 @@ import std;
 import Crafter.Event;
 import Crafter.Math;
 
-
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
 int main() {
     Device::Initialize();
     Window window(1280, 720, "HelloVulkan");
@@ -89,7 +91,7 @@ int main() {
     RenderingElement3D::elements.emplace_back(&renderer);
 
     MatrixRowMajor<float, 4, 3, 1> transform = MatrixRowMajor<float, 4, 3, 1>::Identity();
-    std::memcpy(renderer.instance.transform.matrix, transform.m, sizeof(transform.m));
+    transform.Store(reinterpret_cast<float*>(renderer.instance.transform.matrix));
     RenderingElement3D::BuildTLAS(cmd, 0);
     RenderingElement3D::BuildTLAS(cmd, 1);
     RenderingElement3D::BuildTLAS(cmd, 2);
@@ -202,3 +204,65 @@ int main() {
     window.Render();
     window.StartSync();
 }
+#else
+// DOM-mode port. Same scene (one triangle), software-emulated raytracing
+// via compute. Shaders are read from .wgsl files shipped as static
+// assets (see project.cpp). Renders barycentric colors via the
+// hit/miss/raygen mega-switch in PipelineRTWebGPU.
+int main() {
+    Device::Initialize();
+    static Window window(1280, 720, "HelloVulkan");
+    auto cmd = window.StartInit();
+
+    DescriptorHeapWebGPU heap;
+    heap.Initialize(/*images*/ 4, /*buffers*/ 4, /*samplers*/ 2);
+
+    std::array<WebGPUShader, 3> shaders {{
+        WebGPUShader(std::filesystem::path("raygen.wgsl"),     "raygen_main",     WebGPURTStage::Raygen),
+        WebGPUShader(std::filesystem::path("miss.wgsl"),       "miss_main",       WebGPURTStage::Miss),
+        WebGPUShader(std::filesystem::path("closesthit.wgsl"), "closesthit_main", WebGPURTStage::ClosestHit),
+    }};
+    ShaderBindingTableWebGPU sbt;
+    sbt.Init(shaders);
+
+    std::array<RTShaderGroup, 1> raygenGroups {{
+        { .type = RTShaderGroupType::General,           .generalShader = 0 },
+    }};
+    std::array<RTShaderGroup, 1> missGroups {{
+        { .type = RTShaderGroupType::General,           .generalShader = 1 },
+    }};
+    std::array<RTShaderGroup, 1> hitGroups {{
+        { .type = RTShaderGroupType::TrianglesHitGroup, .closestHitShader = 2 },
+    }};
+
+    PipelineRTWebGPU pipeline;
+    pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, sbt);
+
+    Mesh triangleMesh;
+    std::array<Vector<float, 3, 3>, 3> verts {{{-150, -150, 100}, {0, 150, 100}, {150, -150, 100}}};
+    std::array<std::uint32_t, 3> index {{2, 1, 0}};
+    triangleMesh.Build(verts, index, cmd);
+
+    static RenderingElement3D renderer;
+    renderer.instance.transform.matrix[0][0] = 1; renderer.instance.transform.matrix[0][1] = 0; renderer.instance.transform.matrix[0][2] = 0; renderer.instance.transform.matrix[0][3] = 0;
+    renderer.instance.transform.matrix[1][0] = 0; renderer.instance.transform.matrix[1][1] = 1; renderer.instance.transform.matrix[1][2] = 0; renderer.instance.transform.matrix[1][3] = 0;
+    renderer.instance.transform.matrix[2][0] = 0; renderer.instance.transform.matrix[2][1] = 0; renderer.instance.transform.matrix[2][2] = 1; renderer.instance.transform.matrix[2][3] = 0;
+    renderer.instance.instanceCustomIndex                    = 0;
+    renderer.instance.mask                                   = 0xFF;
+    renderer.instance.instanceShaderBindingTableRecordOffset = 0;
+    renderer.instance.flags                                  = kRTGeometryInstanceForceOpaque;
+    renderer.instance.accelerationStructureReference         = triangleMesh.blasAddr;
+    RenderingElement3D::Add(&renderer);
+    RenderingElement3D::BuildTLAS(cmd, 0);
+
+    window.descriptorHeap = &heap;
+    window.FinishInit();
+
+    RTPass rtPass(&pipeline);
+    window.passes.push_back(&rtPass);
+
+    window.Render();
+    window.StartUpdate();
+    window.StartSync();
+}
+#endif
diff --git a/examples/VulkanTriangle/miss.wgsl b/examples/VulkanTriangle/miss.wgsl
new file mode 100644
index 0000000..a0ba944
--- /dev/null
+++ b/examples/VulkanTriangle/miss.wgsl
@@ -0,0 +1,5 @@
+// WebGPU port of miss.glsl.
+
+fn miss_main(ray: RayDesc, payload: ptr<function, Payload>) {
+    (*payload).color = vec3<f32>(1.0, 1.0, 1.0);
+}
diff --git a/examples/VulkanTriangle/project.cpp b/examples/VulkanTriangle/project.cpp
index fca4a1e..f5f7bb7 100644
--- a/examples/VulkanTriangle/project.cpp
+++ b/examples/VulkanTriangle/project.cpp
@@ -4,6 +4,14 @@ namespace fs = std::filesystem;
 using namespace Crafter;
 
 extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> args) {
+    bool isWasm = false;
+    for (std::string_view a : args) {
+        if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) {
+            isWasm = true;
+            break;
+        }
+    }
+
     std::vector<std::string> graphicsArgs(args.begin(), args.end());
     Configuration* graphics = LocalProject({
         .projectFile = "../../project.cpp",
@@ -14,6 +22,12 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
     cfg.path = "./";
     cfg.name = "VulkanTriangle";
     cfg.outputName = "VulkanTriangle";
+    cfg.type = ConfigurationType::Executable;
+    if (isWasm) {
+        cfg.target = "wasm32-wasip1";
+        cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_DOM", ""});
+        cfg.compileFlags.push_back("-msimd128");
+    }
     ApplyStandardArgs(cfg, args);
     cfg.dependencies = { graphics };
 
@@ -21,8 +35,15 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
     std::array<fs::path, 1> impls = { "main" };
     cfg.GetInterfacesAndImplementations(ifaces, impls);
 
-    cfg.shaders.emplace_back(fs::path("raygen.glsl"),  std::string("main"), ShaderType::RayGen);
-    cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit);
-    cfg.shaders.emplace_back(fs::path("miss.glsl"), std::string("main"), ShaderType::Miss);
+    if (isWasm) {
+        cfg.files.emplace_back(fs::path("raygen.wgsl"));
+        cfg.files.emplace_back(fs::path("closesthit.wgsl"));
+        cfg.files.emplace_back(fs::path("miss.wgsl"));
+        EnableWasiBrowserRuntime(cfg);
+    } else {
+        cfg.shaders.emplace_back(fs::path("raygen.glsl"),     std::string("main"), ShaderType::RayGen);
+        cfg.shaders.emplace_back(fs::path("closesthit.glsl"), std::string("main"), ShaderType::ClosestHit);
+        cfg.shaders.emplace_back(fs::path("miss.glsl"),       std::string("main"), ShaderType::Miss);
+    }
     return cfg;
 }
diff --git a/examples/VulkanTriangle/raygen.glsl b/examples/VulkanTriangle/raygen.glsl
index 1031ffd..eff2633 100644
--- a/examples/VulkanTriangle/raygen.glsl
+++ b/examples/VulkanTriangle/raygen.glsl
@@ -34,17 +34,17 @@ void main() {
         1.0
     ));
 
-    // traceRayEXT(
-    //     topLevelAS[bufferStart],
-    //     gl_RayFlagsNoneEXT,
-    //     0xff,
-    //     0, 0, 0,
-    //     origin,
-    //     0.001,
-    //     direction,
-    //     10000.0,
-    //     0
-    // );
+    traceRayEXT(
+        topLevelAS[bufferStart],
+        gl_RayFlagsNoneEXT,
+        0xff,
+        0, 0, 0,
+        origin,
+        0.001,
+        direction,
+        10000.0,
+        0
+    );
 
     imageStore(image[0], ivec2(pixel), vec4(hitValue, 1));
 }
diff --git a/examples/VulkanTriangle/raygen.wgsl b/examples/VulkanTriangle/raygen.wgsl
new file mode 100644
index 0000000..cdf66fd
--- /dev/null
+++ b/examples/VulkanTriangle/raygen.wgsl
@@ -0,0 +1,39 @@
+// WebGPU port of raygen.glsl. Mirrors the pinhole camera setup — the
+// Payload type is declared in closesthit.wgsl (concatenated earlier).
+
+fn raygen_main(gid: vec3<u32>) {
+    if (gid.x >= hdr.surfaceW || gid.y >= hdr.surfaceH) { return; }
+
+    let pixel = vec2<f32>(f32(gid.x), f32(gid.y));
+    let resolution = vec2<f32>(f32(hdr.surfaceW), f32(hdr.surfaceH));
+    let uv  = (pixel + vec2<f32>(0.5)) / resolution;
+    let ndc = uv * 2.0 - vec2<f32>(1.0);
+
+    let origin = vec3<f32>(0.0, 0.0, -300.0);
+    let aspect = resolution.x / resolution.y;
+    let fov = 60.0 * 3.14159265 / 180.0;
+    let tanHalfFov = tan(fov * 0.5);
+
+    let direction = normalize(vec3<f32>(
+        ndc.x * aspect * tanHalfFov,
+        -ndc.y * tanHalfFov,
+        1.0,
+    ));
+
+    var payload: Payload;
+    payload.color = vec3<f32>(0.0);
+
+    traceRay(
+        0u,            // tlasIdx (unused)
+        0u,            // ray flags
+        0xFFu,         // cull mask
+        0u, 0u, 0u,    // sbtRecordOffset, sbtRecordStride, missIndex
+        origin, 0.001,
+        direction, 10000.0,
+        &payload,
+    );
+
+    textureStore(outImage,
+                 vec2<i32>(i32(gid.x), i32(gid.y)),
+                 vec4<f32>(payload.color, 1.0));
+}
diff --git a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
new file mode 100644
index 0000000..4c86ad7
--- /dev/null
+++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
@@ -0,0 +1,240 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// DOM-mode Mesh implementation: SAH BVH2 built on the host, then
+// forwarded to the JS bridge which appends the four data streams
+// (vertices, indices, BVH nodes, primRemap) into the global RT mesh
+// heaps. The handle returned by wgpuRegisterMeshBLAS goes into
+// RTInstance::accelerationStructureReference and lets the TLAS-build
+// compute pass and the traversal kernel find the BLAS data later.
+//
+// BVH layout must stay binary-identical to the WGSL `BVHNode` struct
+// declared in additional/dom-webgpu.js (rtWgslPrelude).
+
+module;
+module Crafter.Graphics:Mesh_implWebGPU;
+
+import :Mesh;
+import :WebGPU;
+import Crafter.Math;
+import std;
+
+using namespace Crafter;
+
+namespace {
+    // ─── BVH builder (binned SAH, 8 bins, BVH2) ────────────────────────
+
+    constexpr std::uint32_t kBinCount      = 8;
+    constexpr std::uint32_t kMaxLeafSize   = 4;
+    constexpr float         kTraversalCost = 1.0f;
+    constexpr float         kIntersectCost = 1.0f;
+
+    struct AABB {
+        float lo[3] { std::numeric_limits<float>::infinity(),
+                      std::numeric_limits<float>::infinity(),
+                      std::numeric_limits<float>::infinity() };
+        float hi[3] {-std::numeric_limits<float>::infinity(),
+                     -std::numeric_limits<float>::infinity(),
+                     -std::numeric_limits<float>::infinity() };
+
+        void Extend(const float p[3]) noexcept {
+            for (int a = 0; a < 3; ++a) {
+                if (p[a] < lo[a]) lo[a] = p[a];
+                if (p[a] > hi[a]) hi[a] = p[a];
+            }
+        }
+        void Extend(const AABB& o) noexcept {
+            for (int a = 0; a < 3; ++a) {
+                if (o.lo[a] < lo[a]) lo[a] = o.lo[a];
+                if (o.hi[a] > hi[a]) hi[a] = o.hi[a];
+            }
+        }
+        float SurfaceArea() const noexcept {
+            float dx = hi[0] - lo[0];
+            float dy = hi[1] - lo[1];
+            float dz = hi[2] - lo[2];
+            if (dx < 0.0f || dy < 0.0f || dz < 0.0f) return 0.0f;
+            return 2.0f * (dx*dy + dx*dz + dy*dz);
+        }
+    };
+
+    struct PrimRef {
+        AABB box;
+        float centroid[3];
+        std::uint32_t triIndex;
+    };
+
+    struct Bin {
+        AABB box;
+        std::uint32_t count = 0;
+    };
+
+    struct Builder {
+        std::vector<PrimRef> prims;
+        std::vector<BVHNode> nodes;
+
+        std::pair<std::uint32_t, std::uint32_t> AllocateChildren() {
+            std::uint32_t l = static_cast<std::uint32_t>(nodes.size());
+            nodes.emplace_back();
+            nodes.emplace_back();
+            return { l, l + 1 };
+        }
+
+        void BuildRecursive(std::uint32_t nodeIdx,
+                            std::uint32_t first,
+                            std::uint32_t count) {
+            AABB bounds, centroidBounds;
+            for (std::uint32_t i = 0; i < count; ++i) {
+                const auto& p = prims[first + i];
+                bounds.Extend(p.box);
+                centroidBounds.Extend(p.centroid);
+            }
+
+            auto emitLeaf = [&] {
+                BVHNode& n = nodes[nodeIdx];
+                std::memcpy(n.aabbMin, bounds.lo, sizeof(bounds.lo));
+                std::memcpy(n.aabbMax, bounds.hi, sizeof(bounds.hi));
+                n.firstChildOrPrim = first;
+                n.primCount        = count;
+            };
+
+            if (count <= kMaxLeafSize) { emitLeaf(); return; }
+
+            int   bestAxis  = -1;
+            float bestCost  = std::numeric_limits<float>::infinity();
+            std::uint32_t bestBin = 0;
+
+            float parentArea = bounds.SurfaceArea();
+            if (parentArea <= 0.0f) { emitLeaf(); return; }
+
+            for (int axis = 0; axis < 3; ++axis) {
+                float extent = centroidBounds.hi[axis] - centroidBounds.lo[axis];
+                if (extent <= 0.0f) continue;
+                float invExtent = static_cast<float>(kBinCount) / extent;
+
+                std::array<Bin, kBinCount> bins{};
+                for (std::uint32_t i = 0; i < count; ++i) {
+                    const auto& p = prims[first + i];
+                    float t = (p.centroid[axis] - centroidBounds.lo[axis]) * invExtent;
+                    std::uint32_t b = static_cast<std::uint32_t>(t);
+                    if (b >= kBinCount) b = kBinCount - 1;
+                    bins[b].box.Extend(p.box);
+                    bins[b].count += 1;
+                }
+
+                std::array<AABB,         kBinCount - 1> leftBox;
+                std::array<std::uint32_t,kBinCount - 1> leftCount{};
+                {
+                    AABB acc; std::uint32_t cnt = 0;
+                    for (std::uint32_t i = 0; i < kBinCount - 1; ++i) {
+                        acc.Extend(bins[i].box);
+                        cnt += bins[i].count;
+                        leftBox[i]   = acc;
+                        leftCount[i] = cnt;
+                    }
+                }
+                {
+                    AABB acc; std::uint32_t cnt = 0;
+                    for (std::int32_t i = kBinCount - 1; i >= 1; --i) {
+                        acc.Extend(bins[i].box);
+                        cnt += bins[i].count;
+                        std::uint32_t split = static_cast<std::uint32_t>(i - 1);
+                        if (leftCount[split] == 0 || cnt == 0) continue;
+                        float cost = kTraversalCost
+                                   + (leftBox[split].SurfaceArea() * leftCount[split]
+                                      + acc.SurfaceArea() * cnt) * kIntersectCost / parentArea;
+                        if (cost < bestCost) {
+                            bestCost = cost;
+                            bestAxis = axis;
+                            bestBin  = split;
+                        }
+                    }
+                }
+            }
+
+            float leafCost = static_cast<float>(count) * kIntersectCost;
+            if (bestAxis < 0 || bestCost >= leafCost) { emitLeaf(); return; }
+
+            float invExtent = static_cast<float>(kBinCount)
+                            / (centroidBounds.hi[bestAxis] - centroidBounds.lo[bestAxis]);
+            float lo = centroidBounds.lo[bestAxis];
+            auto mid = std::partition(
+                prims.begin() + first, prims.begin() + first + count,
+                [&](const PrimRef& p) {
+                    float t = (p.centroid[bestAxis] - lo) * invExtent;
+                    std::uint32_t b = static_cast<std::uint32_t>(t);
+                    if (b >= kBinCount) b = kBinCount - 1;
+                    return b <= bestBin;
+                });
+            std::uint32_t leftCount =
+                static_cast<std::uint32_t>(mid - (prims.begin() + first));
+            if (leftCount == 0 || leftCount == count) { emitLeaf(); return; }
+
+            auto [leftIdx, rightIdx] = AllocateChildren();
+            {
+                BVHNode& n = nodes[nodeIdx];
+                std::memcpy(n.aabbMin, bounds.lo, sizeof(bounds.lo));
+                std::memcpy(n.aabbMax, bounds.hi, sizeof(bounds.hi));
+                n.firstChildOrPrim = leftIdx;
+                n.primCount        = 0;
+            }
+            BuildRecursive(leftIdx,  first,             leftCount);
+            BuildRecursive(rightIdx, first + leftCount, count - leftCount);
+        }
+
+        void Build(std::span<const Vector<float, 3, 3>> vertices,
+                   std::span<const std::uint32_t>       indices) {
+            std::uint32_t triCount = static_cast<std::uint32_t>(indices.size()) / 3;
+            prims.resize(triCount);
+            for (std::uint32_t i = 0; i < triCount; ++i) {
+                std::uint32_t i0 = indices[i*3 + 0];
+                std::uint32_t i1 = indices[i*3 + 1];
+                std::uint32_t i2 = indices[i*3 + 2];
+                const auto& v0 = vertices[i0];
+                const auto& v1 = vertices[i1];
+                const auto& v2 = vertices[i2];
+                float p0[3] { v0.v[0], v0.v[1], v0.v[2] };
+                float p1[3] { v1.v[0], v1.v[1], v1.v[2] };
+                float p2[3] { v2.v[0], v2.v[1], v2.v[2] };
+                auto& pr = prims[i];
+                pr.box.Extend(p0);
+                pr.box.Extend(p1);
+                pr.box.Extend(p2);
+                pr.centroid[0] = (pr.box.lo[0] + pr.box.hi[0]) * 0.5f;
+                pr.centroid[1] = (pr.box.lo[1] + pr.box.hi[1]) * 0.5f;
+                pr.centroid[2] = (pr.box.lo[2] + pr.box.hi[2]) * 0.5f;
+                pr.triIndex = i;
+            }
+            nodes.reserve(triCount * 2);
+            nodes.emplace_back();
+            BuildRecursive(0, 0, triCount);
+        }
+    };
+}
+
+void Mesh::Build(std::span<Vector<float, 3, 3>> vertices,
+                 std::span<std::uint32_t>       indices,
+                 WebGPUCommandEncoderRef        /*cmd*/) {
+    triangleCount = static_cast<std::uint32_t>(indices.size()) / 3;
+
+    Builder builder;
+    builder.Build(vertices, indices);
+
+    std::vector<std::uint32_t> primRemap(triangleCount);
+    for (std::uint32_t i = 0; i < triangleCount; ++i) {
+        primRemap[i] = builder.prims[i].triIndex;
+    }
+
+    const BVHNode& root = builder.nodes[0];
+    std::uint32_t h = WebGPU::wgpuRegisterMeshBLAS(
+        root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
+        root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
+        vertices.data(),       static_cast<std::int32_t>(vertices.size()),
+        indices.data(),        static_cast<std::int32_t>(indices.size()),
+        builder.nodes.data(),  static_cast<std::int32_t>(builder.nodes.size()),
+        primRemap.data(),      static_cast<std::int32_t>(primRemap.size()));
+    blasAddr = h;
+}
diff --git a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
new file mode 100644
index 0000000..bf9e115
--- /dev/null
+++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
@@ -0,0 +1,187 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// Megakernel WGSL assembly. The library prelude lives JS-side
+// (additional/dom-webgpu.js, rtWgslPrelude) — we don't have access to
+// it from C++ — so this file emits only the *user-controlled* portions
+// (concatenated SBT sources + the generated switch statements) and the
+// stable entry-point glue. The JS side wraps these with the prelude
+// before handing to device.createShaderModule.
+//
+// Wire format passed across the JS boundary is a single WGSL string
+// containing the substitution markers `// @CRAFTER_RT_USER_SOURCES`,
+// `// @CRAFTER_RT_CLOSESTHIT_CASES`, `// @CRAFTER_RT_ANYHIT_CASES`,
+// `// @CRAFTER_RT_MISS_CASES`, `// @CRAFTER_RT_RAYGEN_BODY` already
+// expanded; the JS side just concatenates prelude + this string.
+
+module;
+module Crafter.Graphics:PipelineRTWebGPU_impl;
+
+import :PipelineRTWebGPU;
+import :ShaderBindingTableWebGPU;
+import :RT;
+import :WebGPU;
+import std;
+
+using namespace Crafter;
+
+namespace {
+    constexpr std::string_view kPlaceholderClosestHit =
+        "fn _crafter_default_closesthit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {}";
+    constexpr std::string_view kPlaceholderAnyHit =
+        "fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 { return RT_ANYHIT_ACCEPT; }";
+    constexpr std::string_view kPlaceholderMiss =
+        "fn _crafter_default_miss(ray: RayDesc, payload: ptr<function, Payload>) {}";
+
+    void AppendCase(std::string& out,
+                    std::uint32_t hitGroupIndex,
+                    std::string_view entryFn,
+                    std::string_view args) {
+        out += "        case ";
+        out += std::to_string(hitGroupIndex);
+        out += "u: { ";
+        out += entryFn;
+        out += "(";
+        out += args;
+        out += "); }\n";
+    }
+
+    // anyhit has a return type — case body forwards the result.
+    void AppendAnyHitCase(std::string& out,
+                          std::uint32_t hitGroupIndex,
+                          std::string_view entryFn) {
+        out += "        case ";
+        out += std::to_string(hitGroupIndex);
+        out += "u: { return ";
+        out += entryFn;
+        out += "(ray, hit, payload); }\n";
+    }
+}
+
+void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef                 /*cmd*/,
+                            std::span<const RTShaderGroup>          raygenGroups,
+                            std::span<const RTShaderGroup>          missGroups,
+                            std::span<const RTShaderGroup>          hitGroups,
+                            const ShaderBindingTableWebGPU&         sbt) {
+    std::string wgsl;
+    wgsl.reserve(8 * 1024);
+
+    // ── Section 1: user closesthit / anyhit / miss source files ────────
+    //
+    // Raygens come later (after `traceRay` is declared) so we partition
+    // shaders by stage. Concatenating *all* non-raygen sources here lets
+    // them declare shared helpers, `struct Payload`, etc., in any order.
+
+    wgsl += "// ── user closesthit / anyhit / miss sources ───────────────\n";
+    for (const auto& shader : sbt.shaders) {
+        if (shader.stage == WebGPURTStage::Raygen) continue;
+        wgsl += shader.source;
+        wgsl += "\n";
+    }
+
+    // ── Section 2: mega-switch dispatchers ─────────────────────────────
+    //
+    // runClosestHit, runAnyHit, runMiss each dispatch on the per-hit /
+    // per-ray index registered against the appropriate group span.
+    // Indices match the user's expectations from VkRayTracingShaderGroup
+    // ordering: closest-hit group N (N from 0..hitGroups.size()-1) is
+    // selected by hitGroupIndex == N.
+
+    wgsl += "\nfn runClosestHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {\n";
+    wgsl += "    switch hg {\n";
+    bool anyClosestHit = false;
+    for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
+        const auto& g = hitGroups[i];
+        if (g.closestHitShader == kRTShaderUnused) continue;
+        if (g.closestHitShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.closestHitShader].entryFn;
+        AppendCase(wgsl, i, fn, "ray, hit, payload");
+        anyClosestHit = true;
+    }
+    if (!anyClosestHit) wgsl += "        // (no closest-hit shaders registered)\n";
+    wgsl += "        default: { }\n";
+    wgsl += "    }\n";
+    wgsl += "}\n\n";
+
+    wgsl += "fn runAnyHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 {\n";
+    wgsl += "    switch hg {\n";
+    bool anyAnyhit = false;
+    for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
+        const auto& g = hitGroups[i];
+        if (g.anyHitShader == kRTShaderUnused) continue;
+        if (g.anyHitShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.anyHitShader].entryFn;
+        AppendAnyHitCase(wgsl, i, fn);
+        anyAnyhit = true;
+    }
+    if (!anyAnyhit) wgsl += "        // (no any-hit shaders registered)\n";
+    wgsl += "        default: { return RT_ANYHIT_ACCEPT; }\n";
+    wgsl += "    }\n";
+    wgsl += "}\n\n";
+
+    wgsl += "fn runMiss(idx: u32, ray: RayDesc, payload: ptr<function, Payload>) {\n";
+    wgsl += "    switch idx {\n";
+    bool anyMiss = false;
+    for (std::uint32_t i = 0; i < missGroups.size(); ++i) {
+        const auto& g = missGroups[i];
+        if (g.generalShader == kRTShaderUnused) continue;
+        if (g.generalShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.generalShader].entryFn;
+        AppendCase(wgsl, i, fn, "ray, payload");
+        anyMiss = true;
+    }
+    if (!anyMiss) wgsl += "        // (no miss shaders registered)\n";
+    wgsl += "        default: { }\n";
+    wgsl += "    }\n";
+    wgsl += "}\n";
+
+    // Marker — JS-side prelude/post-amble searches for this token to know
+    // where the library helpers (traverseBlas/traverseTlas/traceRay) get
+    // injected, followed by raygen sources and the @compute entry point.
+    wgsl += "\n// @CRAFTER_RT_LIBRARY_HELPERS_HERE\n";
+
+    // ── Section 3: user raygen source files ────────────────────────────
+    //
+    // Comes after the library injects traceRay, so raygens can call it.
+
+    wgsl += "\n// ── user raygen sources ───────────────────────────────────\n";
+    std::uint32_t raygenEntryIndex = kRTShaderUnused;
+    std::string   raygenEntryFn;
+    for (const auto& shader : sbt.shaders) {
+        if (shader.stage != WebGPURTStage::Raygen) continue;
+        wgsl += shader.source;
+        wgsl += "\n";
+        // Pick the first raygen group's general shader as the entry. Mirrors
+        // Vulkan's pRayGenShaderBindingTable[0] → first invoked raygen.
+        if (raygenEntryFn.empty()) raygenEntryFn = shader.entryFn;
+    }
+    if (!raygenGroups.empty()
+        && raygenGroups[0].generalShader != kRTShaderUnused
+        && raygenGroups[0].generalShader < sbt.shaders.size()) {
+        raygenEntryIndex = raygenGroups[0].generalShader;
+        raygenEntryFn    = sbt.shaders[raygenEntryIndex].entryFn;
+    }
+    if (raygenEntryFn.empty()) {
+        std::println("PipelineRTWebGPU::Init: no raygen shader registered");
+        pipelineHandle = 0;
+        return;
+    }
+
+    // ── Section 4: @compute entry point ────────────────────────────────
+    //
+    // 8x8 tile workgroup matching the rest of the WebGPU backend.
+
+    wgsl += "\n@compute @workgroup_size(8, 8, 1)\n";
+    wgsl += "fn main(@builtin(global_invocation_id) gid: vec3<u32>) {\n";
+    wgsl += "    ";
+    wgsl += raygenEntryFn;
+    wgsl += "(gid);\n";
+    wgsl += "}\n";
+
+    pipelineHandle = WebGPU::wgpuLoadRTPipeline(
+        wgsl.data(),
+        static_cast<std::int32_t>(wgsl.size()));
+}
diff --git a/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp b/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp
new file mode 100644
index 0000000..2195f85
--- /dev/null
+++ b/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp
@@ -0,0 +1,91 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// DOM-mode TLAS upkeep. BuildTLAS copies the per-element RTInstance into
+// the host-visible instance buffer (skipping the transform for elements
+// whose transform is GPU-owned), uploads it, then dispatches the JS-side
+// TLAS-build compute pass — which consults the per-BLAS records published
+// at Mesh::Build() time to produce world-space AABBs and inverse
+// transforms in the format `traceRay` / `rayQuery` consume.
+
+module;
+module Crafter.Graphics:RenderingElement3D_implWebGPU;
+
+import :RenderingElement3D;
+import :Mesh;
+import :WebGPU;
+import :WebGPUBuffer;
+import std;
+
+using namespace Crafter;
+
+std::vector<RenderingElement3D*> RenderingElement3D::elements;
+
+void RenderingElement3D::Add(RenderingElement3D* e) {
+    e->indexInElements = static_cast<std::uint32_t>(elements.size());
+    elements.push_back(e);
+}
+
+void RenderingElement3D::Remove(RenderingElement3D* e) {
+    std::uint32_t idx = e->indexInElements;
+    if (idx == std::numeric_limits<std::uint32_t>::max()) return;
+    std::uint32_t last = static_cast<std::uint32_t>(elements.size() - 1);
+    if (idx != last) {
+        elements[idx] = elements[last];
+        elements[idx]->indexInElements = idx;
+    }
+    elements.pop_back();
+    e->indexInElements = std::numeric_limits<std::uint32_t>::max();
+}
+
+void RenderingElement3D::BuildTLAS(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t index) {
+    auto& tlas = tlases[index];
+    const std::uint32_t primitiveCount = static_cast<std::uint32_t>(elements.size());
+    if (primitiveCount == 0) {
+        tlas.builtInstanceCount = 0;
+        return;
+    }
+
+    // (Re)allocate instance + metadata + output TLAS buffers if the count
+    // changed. WebGPUBuffer::Resize destroys and recreates the GPU buffer;
+    // bind-group caches keyed on the buffer handle are invalidated in the
+    // JS bridge automatically.
+    if (primitiveCount != tlas.builtInstanceCount) {
+        tlas.instanceBuffer.Resize(primitiveCount);
+        tlas.metadataBuffer.Resize(primitiveCount);
+        // TLASEntry layout in WGSL is 144 bytes due to vec3 align/pad
+        // rules. Must match the struct declared in the rtWgslTypes
+        // block in additional/dom-webgpu.js.
+        tlas.buffer.Resize(primitiveCount * 144);
+    }
+
+    for (std::uint32_t i = 0; i < primitiveCount; ++i) {
+        auto& dst = tlas.instanceBuffer.value[i];
+        const auto& src = elements[i]->instance;
+        if (elements[i]->transformOwnedByGpu) {
+            // Preserve whatever the GPU compute shader most recently
+            // wrote into dst.transform. Update only the non-transform
+            // fields.
+            dst.instanceCustomIndex                    = src.instanceCustomIndex;
+            dst.mask                                   = src.mask;
+            dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset;
+            dst.flags                                  = src.flags;
+            dst.accelerationStructureReference         = src.accelerationStructureReference;
+        } else {
+            dst = src;
+        }
+        tlas.metadataBuffer.value[i] = elements[i]->userMetadata;
+    }
+
+    tlas.instanceBuffer.FlushDevice();
+    tlas.metadataBuffer.FlushDevice();
+
+    WebGPU::wgpuBuildTLAS(tlas.instanceBuffer.handle,
+                          static_cast<std::int32_t>(primitiveCount),
+                          tlas.buffer.handle);
+
+    tlas.builtInstanceCount = primitiveCount;
+}
diff --git a/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp b/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp
new file mode 100644
index 0000000..745468b
--- /dev/null
+++ b/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp
@@ -0,0 +1,32 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+module;
+module Crafter.Graphics:ShaderBindingTableWebGPU_impl;
+
+import :ShaderBindingTableWebGPU;
+import std;
+
+using namespace Crafter;
+
+WebGPUShader::WebGPUShader(const std::filesystem::path& wgslPath,
+                           std::string fn,
+                           WebGPURTStage s)
+    : entryFn(std::move(fn)), stage(s) {
+    std::ifstream f(wgslPath, std::ios::binary | std::ios::ate);
+    if (!f.is_open()) {
+        std::println("WebGPUShader: cannot open {}", wgslPath.string());
+        return;
+    }
+    auto size = f.tellg();
+    if (size <= 0) {
+        std::println("WebGPUShader: empty file {}", wgslPath.string());
+        return;
+    }
+    f.seekg(0, std::ios::beg);
+    source.resize(static_cast<std::size_t>(size));
+    f.read(source.data(), size);
+}
diff --git a/implementations/Crafter.Graphics-WebGPUComputeShader.cpp b/implementations/Crafter.Graphics-WebGPUComputeShader.cpp
index 5a8e554..8546eb1 100644
--- a/implementations/Crafter.Graphics-WebGPUComputeShader.cpp
+++ b/implementations/Crafter.Graphics-WebGPUComputeShader.cpp
@@ -12,18 +12,22 @@ import std;
 using namespace Crafter;
 
 void WebGPUComputeShader::Load(std::string_view wgsl,
-                               std::span<const UICustomBinding> bindings) {
+                               std::span<const UICustomBinding> bindings,
+                               bool rayQuery) {
     customBindings.assign(bindings.begin(), bindings.end());
+    rayQueryCapable = rayQuery;
     pipelineHandle = WebGPU::wgpuLoadCustomShader(
         wgsl.data(),
         static_cast<std::int32_t>(wgsl.size()),
         customBindings.data(),
-        static_cast<std::int32_t>(customBindings.size())
+        static_cast<std::int32_t>(customBindings.size()),
+        rayQuery ? 1 : 0
     );
 }
 
 void WebGPUComputeShader::Load(const std::filesystem::path& wgslPath,
-                               std::span<const UICustomBinding> bindings) {
+                               std::span<const UICustomBinding> bindings,
+                               bool rayQuery) {
     std::ifstream f(wgslPath, std::ios::binary | std::ios::ate);
     if (!f.is_open()) {
         std::println("WebGPUComputeShader::Load: cannot open {}", wgslPath.string());
@@ -37,5 +41,5 @@ void WebGPUComputeShader::Load(const std::filesystem::path& wgslPath,
     f.seekg(0, std::ios::beg);
     std::string src(static_cast<std::size_t>(size), '\0');
     f.read(src.data(), size);
-    Load(std::string_view{src}, bindings);
+    Load(std::string_view{src}, bindings, rayQuery);
 }
diff --git a/interfaces/Crafter.Graphics-Mesh.cppm b/interfaces/Crafter.Graphics-Mesh.cppm
index a2ef23c..d8385ad 100644
--- a/interfaces/Crafter.Graphics-Mesh.cppm
+++ b/interfaces/Crafter.Graphics-Mesh.cppm
@@ -60,3 +60,54 @@ export namespace Crafter {
     };
 }
 #endif // !CRAFTER_GRAPHICS_WINDOW_DOM
+
+#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
+import std;
+import Crafter.Math;
+import :WebGPU;
+
+export namespace Crafter {
+    // Software-RT BLAS node, packed to 32 bytes. Matches the WGSL
+    // `BVHNode` struct in the RT WGSL prelude (additional/dom-webgpu.js,
+    // rtWgslPrelude) byte-for-byte.
+    //
+    //   primCount == 0          → inner node, children at indices
+    //                             firstChildOrPrim and firstChildOrPrim+1.
+    //   primCount  > 0          → leaf, `primCount` primitives starting at
+    //                             primIndex `firstChildOrPrim` in the
+    //                             global primRemap heap.
+    //
+    // SAH-built BVH2; constructed CPU-side at Build() time, never refit.
+    struct BVHNode {
+        float         aabbMin[3];
+        std::uint32_t firstChildOrPrim;
+        float         aabbMax[3];
+        std::uint32_t primCount;
+    };
+    static_assert(sizeof(BVHNode) == 32);
+
+    class Mesh {
+        public:
+        // BLAS "handle": opaque identity that goes into
+        // RTInstance::accelerationStructureReference. Set by Build() to a
+        // stable u32 (widened to u64 for Vulkan-struct layout parity), used
+        // by the WebGPU TLAS-build compute shader to look up the BLAS root
+        // AABB and per-mesh heap offsets. Handle 0 is the unassigned
+        // sentinel; never returned by Build().
+        std::uint64_t blasAddr = 0;
+        std::uint32_t triangleCount = 0;
+
+        bool opaque = true;
+
+        // Build BLAS from raw triangle data. Runs the CPU SAH BVH2 builder
+        // and forwards vertex/index/BVH/remap arrays to the JS-side mesh
+        // heap (additional/dom-webgpu.js), which queue.writeBuffers them
+        // into the global heaps and records the per-mesh offsets keyed by
+        // the returned handle. The `cmd` parameter is unused on WebGPU —
+        // kept for API symmetry with the Vulkan signature.
+        void Build(std::span<Crafter::Vector<float, 3, 3>> vertices,
+                   std::span<std::uint32_t>                indices,
+                   WebGPUCommandEncoderRef                  cmd = 0);
+    };
+}
+#endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm
new file mode 100644
index 0000000..b3df012
--- /dev/null
+++ b/interfaces/Crafter.Graphics-PipelineRTWebGPU.cppm
@@ -0,0 +1,51 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// DOM-mode RT pipeline. Mirrors PipelineRTVulkan's surface — Init takes
+// the same kind of (raygen, miss, hit) shader-group spans plus an SBT.
+// The big difference is implementation: there's no native RT pipeline on
+// WebGPU, so Init assembles a single megakernel WGSL by concatenating
+//   1. library prelude (types, bindings, ray-flag constants)
+//   2. user closesthit / anyhit / miss source files
+//   3. library mega-switches dispatched on per-hit hit-group index
+//   4. library helpers (rayAabb / rayTriangle / traverseBlas / traverseTlas)
+//   5. library traceRay function
+//   6. user raygen source files
+//   7. @compute entry calling the registered raygen
+// and hands the result to wgpuLoadRTPipeline.
+//
+// The library WGSL itself lives in additional/dom-webgpu.js (rtWgslPrelude
+// + rtWgslDispatchTemplate). C++ side only knows the substitution markers.
+
+export module Crafter.Graphics:PipelineRTWebGPU;
+#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
+import std;
+import :RT;
+import :WebGPU;
+import :ShaderBindingTableWebGPU;
+
+export namespace Crafter {
+    class PipelineRTWebGPU {
+    public:
+        std::uint32_t pipelineHandle = 0;
+
+        // Build the megakernel pipeline. Groups carry indices into
+        // `sbt.shaders`. The library generates one `case` per registered
+        // group: closest-hit groups dispatch to their closestHitShader's
+        // entryFn, miss groups to their generalShader's entryFn, etc.
+        // The `cmd` parameter is unused on WebGPU; kept for API symmetry.
+        void Init(WebGPUCommandEncoderRef                 cmd,
+                  std::span<const RTShaderGroup>          raygenGroups,
+                  std::span<const RTShaderGroup>          missGroups,
+                  std::span<const RTShaderGroup>          hitGroups,
+                  const ShaderBindingTableWebGPU&         sbt);
+
+        PipelineRTWebGPU() = default;
+        PipelineRTWebGPU(const PipelineRTWebGPU&) = delete;
+        PipelineRTWebGPU& operator=(const PipelineRTWebGPU&) = delete;
+    };
+}
+#endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-RT.cppm b/interfaces/Crafter.Graphics-RT.cppm
new file mode 100644
index 0000000..77be96d
--- /dev/null
+++ b/interfaces/Crafter.Graphics-RT.cppm
@@ -0,0 +1,83 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License version 3.0 as published by the Free Software Foundation;
+*/
+
+// Portable RT types & constants.
+//
+// Native: aliases the Vulkan struct so existing code that passes
+// `RenderingElement3D::instance` directly into vkCmdBuildAccelerationStructuresKHR
+// is a no-op layout-wise.
+// DOM: provides a POD with the same byte layout + the same field names, so
+// user code touching `instance.mask`, `instance.flags`, `instance.transform`
+// etc. compiles unchanged.
+//
+// Flag constants are spelled out as Crafter::kRT* so portable user code can
+// avoid referencing VK_* on the DOM target. The values match
+// VkGeometryInstanceFlagBitsKHR / VkRayTracingShaderGroupTypeKHR so the
+// constants compare-equal on Vulkan if the user wants to mix surfaces.
+
+module;
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
+#include "vulkan/vulkan.h"
+#endif
+export module Crafter.Graphics:RT;
+import std;
+
+export namespace Crafter {
+#ifndef CRAFTER_GRAPHICS_WINDOW_DOM
+    using RTTransformMatrix = VkTransformMatrixKHR;
+    using RTInstance        = VkAccelerationStructureInstanceKHR;
+#else
+    // Mirrors VkTransformMatrixKHR: row-major affine 3x4.
+    struct RTTransformMatrix {
+        float matrix[3][4];
+    };
+    static_assert(sizeof(RTTransformMatrix) == 48);
+
+    // Mirrors VkAccelerationStructureInstanceKHR byte-for-byte.
+    // On WebGPU the `accelerationStructureReference` slot holds the BLAS
+    // handle returned by MeshWebGPU::blasHandle (a u32 widened to u64).
+    struct RTInstance {
+        RTTransformMatrix transform;
+        std::uint32_t instanceCustomIndex                    : 24;
+        std::uint32_t mask                                   : 8;
+        std::uint32_t instanceShaderBindingTableRecordOffset : 24;
+        std::uint32_t flags                                  : 8;
+        std::uint64_t accelerationStructureReference;
+    };
+    static_assert(sizeof(RTInstance) == 64);
+#endif
+
+    // VkGeometryInstanceFlagBitsKHR mirror. Values verbatim so equal on both.
+    inline constexpr std::uint8_t kRTGeometryInstanceTriangleFacingCullDisable = 0x1;
+    inline constexpr std::uint8_t kRTGeometryInstanceTriangleFlipFacing        = 0x2;
+    inline constexpr std::uint8_t kRTGeometryInstanceForceOpaque               = 0x4;
+    inline constexpr std::uint8_t kRTGeometryInstanceForceNoOpaque             = 0x8;
+
+    // Hit-group identification. Matches VkRayTracingShaderGroupTypeKHR for
+    // the two types we actually support (general + triangles-hit).
+    enum class RTShaderGroupType : std::uint8_t {
+        General           = 0,   // raygen / miss / callable
+        TrianglesHitGroup = 1,
+    };
+
+    // Cross-backend description of one entry in the shader-group array
+    // passed to PipelineRT::Init. Mirrors the meaningful subset of
+    // VkRayTracingShaderGroupCreateInfoKHR: per group, the type and the
+    // indices (into the SBT's shader array) for general / closestHit /
+    // anyHit, with kRTShaderUnused == VK_SHADER_UNUSED_KHR for "none".
+    inline constexpr std::uint32_t kRTShaderUnused = 0xFFFFFFFFu;
+
+    struct RTShaderGroup {
+        RTShaderGroupType type             = RTShaderGroupType::General;
+        std::uint32_t      generalShader    = kRTShaderUnused;
+        std::uint32_t      closestHitShader = kRTShaderUnused;
+        std::uint32_t      anyHitShader     = kRTShaderUnused;
+    };
+}
diff --git a/interfaces/Crafter.Graphics-RTPass.cppm b/interfaces/Crafter.Graphics-RTPass.cppm
index ada74cf..78064c0 100644
--- a/interfaces/Crafter.Graphics-RTPass.cppm
+++ b/interfaces/Crafter.Graphics-RTPass.cppm
@@ -46,3 +46,42 @@ export namespace Crafter {
     };
 }
 #endif // !CRAFTER_GRAPHICS_WINDOW_DOM
+
+#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
+import std;
+import :RenderPass;
+import :Window;
+import :WebGPU;
+import :PipelineRTWebGPU;
+import :RenderingElement3D;
+
+export namespace Crafter {
+    // DOM-mode RT pass — dispatches the megakernel pipeline at frame Record
+    // time. Picks up the current TLAS for the frame and the application's
+    // raygen-side push data (typically empty in v1; pass via window.passes
+    // wiring if needed later).
+    struct RTPass : RenderPass {
+        PipelineRTWebGPU* pipeline;
+        // Optional per-dispatch push data forwarded after the standard
+        // RTDispatchHeader. Null means "no extra data".
+        const void*       pushPtr   = nullptr;
+        std::uint32_t     pushBytes = 0;
+
+        RTPass(PipelineRTWebGPU* p) : pipeline(p) {}
+
+        void Record(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t frameIdx, Window& window) override {
+            const std::uint32_t gx = (window.width  + 7u) / 8u;
+            const std::uint32_t gy = (window.height + 7u) / 8u;
+            auto& tlas = RenderingElement3D::tlases[frameIdx];
+            WebGPU::wgpuDispatchRT(
+                pipeline->pipelineHandle,
+                pushPtr,
+                static_cast<std::int32_t>(pushBytes),
+                tlas.buffer.handle,
+                static_cast<std::int32_t>(tlas.builtInstanceCount),
+                static_cast<std::int32_t>(gx),
+                static_cast<std::int32_t>(gy));
+        }
+    };
+}
+#endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-RenderingElement3D.cppm b/interfaces/Crafter.Graphics-RenderingElement3D.cppm
index 6cbb19b..9756745 100644
--- a/interfaces/Crafter.Graphics-RenderingElement3D.cppm
+++ b/interfaces/Crafter.Graphics-RenderingElement3D.cppm
@@ -22,6 +22,7 @@ module;
 #include "vulkan/vulkan.h"
 #endif // !CRAFTER_GRAPHICS_WINDOW_DOM
 export module Crafter.Graphics:RenderingElement3D;
+import :RT;
 #ifndef CRAFTER_GRAPHICS_WINDOW_DOM
 import std;
 import :Mesh;
@@ -55,7 +56,7 @@ export namespace Crafter {
 
    class RenderingElement3D {
         public:
-        VkAccelerationStructureInstanceKHR instance;
+        RTInstance instance;
         // Position in `elements`, maintained by Add/Remove for O(1) swap-and-pop.
         // Sentinel value = not currently registered.
         std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
@@ -87,3 +88,63 @@ export namespace Crafter {
    };
 }
 #endif // !CRAFTER_GRAPHICS_WINDOW_DOM
+
+#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
+import std;
+import :Mesh;
+import :WebGPU;
+import :WebGPUBuffer;
+import :Window;
+
+export namespace Crafter {
+    // Per-frame TLAS storage. WebGPU has no real swapchain frame count
+    // (Window::numFrames = 1 on DOM), so this is effectively a singleton —
+    // the array form is kept for API symmetry with the Vulkan side so user
+    // code that indexes `tlases[frameIdx]` ports unchanged.
+    struct TlasWithBuffer {
+        // Host-visible instance buffer holding RTInstance entries — same
+        // layout as Vulkan's VkAccelerationStructureInstanceKHR, so user
+        // code touching .instance.mask / .flags / .transform.matrix is
+        // identical across backends. Also bound as a storage SSBO so
+        // application compute shaders (e.g. physics-tlas-transform.comp.wgsl)
+        // can write the .transform field directly when
+        // RenderingElement3D::transformOwnedByGpu is set.
+        WebGPUBuffer<RTInstance, true>     instanceBuffer;
+        // Per-instance application metadata; parallel to instanceBuffer,
+        // identical semantics to the Vulkan-side counterpart.
+        WebGPUBuffer<std::uint32_t, true>  metadataBuffer;
+        // GPU-built TLAS data: one TLASEntry per instance, written each
+        // BuildTLAS by a compute pass on the JS bridge. Read by traceRay /
+        // rayQuery as `@group(1) @binding(0) tlas: array<TLASEntry>`.
+        // TLASEntry layout: 96 bytes — aabbMin (12) + maskHGoffset (4) +
+        // aabbMax (12) + blasHandle (4) + invTransform 3x4 mat (48) +
+        // customIndex (4) + _pad (12). Defined in the WGSL traversal
+        // library; never directly read by C++.
+        WebGPUBuffer<char, false>           buffer;
+
+        std::uint32_t builtInstanceCount = 0;
+    };
+
+    class RenderingElement3D {
+        public:
+        RTInstance instance{};
+        std::uint32_t indexInElements = std::numeric_limits<std::uint32_t>::max();
+        std::uint32_t userMetadata = 0;
+        // Application compute shader writes the transform field of this
+        // element's instanceBuffer slot directly — BuildTLAS preserves it.
+        bool transformOwnedByGpu = false;
+
+        static std::vector<RenderingElement3D*> elements;
+        inline static TlasWithBuffer tlases[Window::numFrames];
+
+        // Repopulate the TLAS for frame `index`. WebGPU path always does
+        // a fresh build (no refit) — the GPU build pass is cheap at the
+        // ~10–100 instance counts the design targets; LBVH-for-TLAS is a
+        // future optimization for larger scenes.
+        static void BuildTLAS(WebGPUCommandEncoderRef cmd, std::uint32_t index);
+
+        static void Add(RenderingElement3D* e);
+        static void Remove(RenderingElement3D* e);
+    };
+}
+#endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm b/interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm
new file mode 100644
index 0000000..c175a52
--- /dev/null
+++ b/interfaces/Crafter.Graphics-ShaderBindingTableWebGPU.cppm
@@ -0,0 +1,64 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// DOM-mode shader-binding-table analog. Stores raw WGSL source strings
+// plus an explicit entry-function name per shader. PipelineRTWebGPU::Init
+// concatenates these into the megakernel WGSL at pipeline-build time.
+
+export module Crafter.Graphics:ShaderBindingTableWebGPU;
+#ifdef CRAFTER_GRAPHICS_WINDOW_DOM
+import std;
+
+export namespace Crafter {
+    enum class WebGPURTStage : std::uint8_t {
+        Raygen     = 0,
+        Miss       = 1,
+        ClosestHit = 2,
+        AnyHit     = 3,
+    };
+
+    // One WGSL shader source + the function name PipelineRTWebGPU should
+    // call from the megakernel switch. The source may declare any helper
+    // functions and (in exactly one raygen file) the `Payload` struct.
+    //
+    // Required signatures inside `source` for `entryFn`:
+    //   Raygen:     fn <entryFn>(gid: vec3<u32>)
+    //   Miss:       fn <entryFn>(ray: RayDesc, payload: ptr<function, Payload>)
+    //   ClosestHit: fn <entryFn>(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>)
+    //   AnyHit:     fn <entryFn>(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32
+    //               returns RT_ANYHIT_ACCEPT / RT_ANYHIT_IGNORE / RT_ANYHIT_END_SEARCH.
+    //
+    // `RayDesc`, `HitInfo`, the `RT_*` flag/return constants, the `tlas` /
+    // BLAS / mesh-record bindings, and the `traceRay` function are all
+    // injected by the library prelude — see the rtWgslPrelude block in
+    // additional/dom-webgpu.js.
+    struct WebGPUShader {
+        std::string   source;
+        std::string   entryFn;
+        WebGPURTStage stage = WebGPURTStage::Raygen;
+
+        WebGPUShader() = default;
+        WebGPUShader(std::string src, std::string fn, WebGPURTStage s)
+            : source(std::move(src)), entryFn(std::move(fn)), stage(s) {}
+
+        // Construct from a WGSL source file path. Reads via the WASI VFS
+        // so apps shipping their shaders as static files (see the
+        // `cfg.files.emplace_back("raygen.wgsl")` pattern in
+        // examples/VulkanTriangle/project.cpp) get them at runtime.
+        WebGPUShader(const std::filesystem::path& wgslPath,
+                     std::string fn,
+                     WebGPURTStage s);
+    };
+
+    class ShaderBindingTableWebGPU {
+    public:
+        std::vector<WebGPUShader> shaders;
+        void Init(std::span<const WebGPUShader> shaders_) {
+            shaders.assign(shaders_.begin(), shaders_.end());
+        }
+    };
+}
+#endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-WebGPU.cppm b/interfaces/Crafter.Graphics-WebGPU.cppm
index 6831322..616c8fe 100644
--- a/interfaces/Crafter.Graphics-WebGPU.cppm
+++ b/interfaces/Crafter.Graphics-WebGPU.cppm
@@ -73,13 +73,62 @@ namespace Crafter::WebGPU {
                                      std::uint32_t atlasHandle, std::uint32_t sampHandle);
 
     // ─── custom user-authored compute shaders ───────────────────────────
+    // rayQueryFlag = 1 swaps group(1) from the UI ping-pong pair to the RT
+    // data heaps (TLAS, BVH, meshRecs, verts, idx, primRemap, outImage) and
+    // prepends a WGSL prelude exposing the rayQuery* API. Shaders that set
+    // this MUST NOT declare their own @group(1) bindings.
     __attribute__((import_module("env"), import_name("wgpuLoadCustomShader")))
     extern "C" std::uint32_t wgpuLoadCustomShader(const void* wgslPtr, std::int32_t wgslLen,
-                                                  const void* bindingsPtr, std::int32_t bindingsCount);
+                                                  const void* bindingsPtr, std::int32_t bindingsCount,
+                                                  std::int32_t rayQueryFlag);
     __attribute__((import_module("env"), import_name("wgpuDispatchCustom")))
     extern "C" void wgpuDispatchCustom(std::uint32_t pipelineHandle,
                                        const void* pushPtr, std::int32_t pushBytes,
                                        const void* handlesPtr, std::int32_t handlesCount,
                                        std::int32_t gx, std::int32_t gy, std::int32_t gz);
+
+    // ─── software raytracing ───────────────────────────────────────────
+    //
+    // Mesh::Build forwards vertex / index / BVH-node / primRemap arrays
+    // to the JS bridge, which queue.writeBuffers them into the global
+    // RT mesh heaps (growing if needed) and records the per-mesh offsets
+    // under a freshly-allocated u32 handle. The handle is what user code
+    // stores in RTInstance::accelerationStructureReference; the WebGPU
+    // TLAS-build compute shader resolves it back to root AABB + heap
+    // offsets at dispatch time. Returns 0 on failure.
+    __attribute__((import_module("env"), import_name("wgpuRegisterMeshBLAS")))
+    extern "C" std::uint32_t wgpuRegisterMeshBLAS(
+        float minX, float minY, float minZ,
+        float maxX, float maxY, float maxZ,
+        const void* verticesPtr, std::int32_t vertexCount,
+        const void* indicesPtr,  std::int32_t indexCount,
+        const void* bvhNodesPtr, std::int32_t bvhNodeCount,
+        const void* primRemapPtr, std::int32_t primRemapCount);
+
+    // RT pipeline build. The library composes WGSL by concatenating the
+    // traversal library, generated hit-group switches, and the user-
+    // supplied raygen / miss / closesthit / anyhit bodies. Returns an
+    // opaque pipeline handle.
+    __attribute__((import_module("env"), import_name("wgpuLoadRTPipeline")))
+    extern "C" std::uint32_t wgpuLoadRTPipeline(const void* wgslPtr, std::int32_t wgslLen);
+
+    // Dispatch a TraceRays-equivalent pass: the RT pipeline is dispatched
+    // over a (gx, gy) tile grid; the library writes the push data (camera,
+    // payload, etc. — opaque) into a uniform ring buffer, attaches the TLAS
+    // + global mesh heap, and runs one workgroup per 8x8 screen tile.
+    __attribute__((import_module("env"), import_name("wgpuDispatchRT")))
+    extern "C" void wgpuDispatchRT(std::uint32_t pipelineHandle,
+                                   const void* pushPtr, std::int32_t pushBytes,
+                                   std::uint32_t tlasBufHandle,
+                                   std::int32_t  instanceCount,
+                                   std::int32_t  gx, std::int32_t gy);
+
+    // GPU TLAS-build dispatch. Reads the instance buffer (host-uploaded or
+    // GPU-written), produces per-instance world-space AABBs + per-instance
+    // transform matrices in a flat tlasBuf SSBO consumed by traceRay / rayQuery.
+    __attribute__((import_module("env"), import_name("wgpuBuildTLAS")))
+    extern "C" void wgpuBuildTLAS(std::uint32_t instanceBufHandle,
+                                  std::int32_t  instanceCount,
+                                  std::uint32_t tlasOutBufHandle);
 }
 #endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm
index fac065f..867bb26 100644
--- a/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm
+++ b/interfaces/Crafter.Graphics-WebGPUComputeShader.cppm
@@ -49,6 +49,7 @@ export namespace Crafter {
     class WebGPUComputeShader {
     public:
         std::uint32_t pipelineHandle = 0;
+        bool          rayQueryCapable = false;
         std::vector<UICustomBinding> customBindings;
 
         WebGPUComputeShader() = default;
@@ -56,22 +57,28 @@ export namespace Crafter {
         WebGPUComputeShader& operator=(const WebGPUComputeShader&) = delete;
         WebGPUComputeShader(WebGPUComputeShader&& o) noexcept
             : pipelineHandle(o.pipelineHandle),
+              rayQueryCapable(o.rayQueryCapable),
               customBindings(std::move(o.customBindings)) {
             o.pipelineHandle = 0;
         }
 
         // Compile + link a custom compute shader. `wgsl` is the source
-        // string; the library does NOT add anything to it — the user's
-        // shader must declare @group(0)/@group(1) bindings matching the
-        // contract above. `bindings` lists every additional resource
-        // (groups 2+) that the renderer should bind at dispatch time.
+        // string; the library does NOT add anything to it (except when
+        // `rayQuery` is true — then a RT prelude exposing the rayQuery*
+        // API is prepended). The user's shader must declare
+        // @group(0)/@group(1) bindings matching the contract above
+        // (rayQuery shaders MUST NOT redeclare group(1)).
+        // `bindings` lists every additional resource (groups 2+) that the
+        // renderer should bind at dispatch time.
         void Load(std::string_view wgsl,
-                  std::span<const UICustomBinding> bindings = {});
+                  std::span<const UICustomBinding> bindings = {},
+                  bool rayQuery = false);
 
         // Path-based overload for symmetry with the Vulkan ComputeShader.
         // Reads the file from disk (browser VFS) and forwards to Load(wgsl).
         void Load(const std::filesystem::path& wgslPath,
-                  std::span<const UICustomBinding> bindings = {});
+                  std::span<const UICustomBinding> bindings = {},
+                  bool rayQuery = false);
     };
 }
 #endif // CRAFTER_GRAPHICS_WINDOW_DOM
diff --git a/interfaces/Crafter.Graphics.cppm b/interfaces/Crafter.Graphics.cppm
index 8b9f6fd..b48f029 100644
--- a/interfaces/Crafter.Graphics.cppm
+++ b/interfaces/Crafter.Graphics.cppm
@@ -58,6 +58,10 @@ export import :UIComponents;
 export import :InputField;
 export import :Decompress;
 
+// Portable RT type aliases (provided on both targets — uses Vulkan
+// structs natively, plain PODs of the same layout on DOM).
+export import :RT;
+
 // DOM-only partitions — empty under native.
 export import :Dom;
 export import :DomEvents;
@@ -66,3 +70,5 @@ export import :WebGPU;
 export import :WebGPUBuffer;
 export import :DescriptorHeapWebGPU;
 export import :WebGPUComputeShader;
+export import :ShaderBindingTableWebGPU;
+export import :PipelineRTWebGPU;
diff --git a/project.cpp b/project.cpp
index 06f64ac..c1a162c 100644
--- a/project.cpp
+++ b/project.cpp
@@ -131,7 +131,7 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
     // when its body is gated out. Vulkan-typed partitions stub to empty
     // modules under CRAFTER_GRAPHICS_WINDOW_DOM; the Dom/DomEvents/Router
     // partitions stub to empty modules in the opposite direction.
-    std::array<fs::path, 37> ifaces = {
+    std::array<fs::path, 40> ifaces = {
         "interfaces/Crafter.Graphics",
         "interfaces/Crafter.Graphics-Animation",
         "interfaces/Crafter.Graphics-Clipboard",
@@ -153,12 +153,15 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
         "interfaces/Crafter.Graphics-Keys",
         "interfaces/Crafter.Graphics-Mesh",
         "interfaces/Crafter.Graphics-PipelineRTVulkan",
+        "interfaces/Crafter.Graphics-PipelineRTWebGPU",
         "interfaces/Crafter.Graphics-RenderingElement3D",
         "interfaces/Crafter.Graphics-RenderPass",
         "interfaces/Crafter.Graphics-Router",
+        "interfaces/Crafter.Graphics-RT",
         "interfaces/Crafter.Graphics-RTPass",
         "interfaces/Crafter.Graphics-SamplerVulkan",
         "interfaces/Crafter.Graphics-ShaderBindingTableVulkan",
+        "interfaces/Crafter.Graphics-ShaderBindingTableWebGPU",
         "interfaces/Crafter.Graphics-ShaderVulkan",
         "interfaces/Crafter.Graphics-Types",
         "interfaces/Crafter.Graphics-UI",
@@ -175,14 +178,18 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
         // DOM impl set. UI-Shared.cpp is backend-agnostic; UI-WebGPU.cpp
         // is the DOM-only implementation of UIRenderer's GPU-touching
         // methods. Font / FontAtlas / UIComponents are now portable.
-        std::array<fs::path, 12> domImpls = {
+        std::array<fs::path, 16> domImpls = {
             "implementations/Crafter.Graphics-Clipboard",
             "implementations/Crafter.Graphics-Dom",
             "implementations/Crafter.Graphics-Font",
             "implementations/Crafter.Graphics-FontAtlas",
             "implementations/Crafter.Graphics-Gamepad",
             "implementations/Crafter.Graphics-Input",
+            "implementations/Crafter.Graphics-Mesh-WebGPU",
+            "implementations/Crafter.Graphics-PipelineRTWebGPU",
+            "implementations/Crafter.Graphics-RenderingElement3D-WebGPU",
             "implementations/Crafter.Graphics-Router",
+            "implementations/Crafter.Graphics-ShaderBindingTableWebGPU",
             "implementations/Crafter.Graphics-UI-Shared",
             "implementations/Crafter.Graphics-UI-WebGPU",
             "implementations/Crafter.Graphics-UIComponents",