webgpu triangle

2026-05-18 18:43:30 +02:00 · 2026-05-18 18:43:30 +02:00 · 5553ded476
commit 5553ded476
parent 64116cd980
22 changed files with 2107 additions and 42 deletions
--- a/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
+++ b/implementations/Crafter.Graphics-Mesh-WebGPU.cpp
@ -0,0 +1,240 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// DOM-mode Mesh implementation: SAH BVH2 built on the host, then
+// forwarded to the JS bridge which appends the four data streams
+// (vertices, indices, BVH nodes, primRemap) into the global RT mesh
+// heaps. The handle returned by wgpuRegisterMeshBLAS goes into
+// RTInstance::accelerationStructureReference and lets the TLAS-build
+// compute pass and the traversal kernel find the BLAS data later.
+//
+// BVH layout must stay binary-identical to the WGSL `BVHNode` struct
+// declared in additional/dom-webgpu.js (rtWgslPrelude).
+
+module;
+module Crafter.Graphics:Mesh_implWebGPU;
+
+import :Mesh;
+import :WebGPU;
+import Crafter.Math;
+import std;
+
+using namespace Crafter;
+
+namespace {
+    // ─── BVH builder (binned SAH, 8 bins, BVH2) ────────────────────────
+
+    constexpr std::uint32_t kBinCount      = 8;
+    constexpr std::uint32_t kMaxLeafSize   = 4;
+    constexpr float         kTraversalCost = 1.0f;
+    constexpr float         kIntersectCost = 1.0f;
+
+    struct AABB {
+        float lo[3] { std::numeric_limits<float>::infinity(),
+                      std::numeric_limits<float>::infinity(),
+                      std::numeric_limits<float>::infinity() };
+        float hi[3] {-std::numeric_limits<float>::infinity(),
+                     -std::numeric_limits<float>::infinity(),
+                     -std::numeric_limits<float>::infinity() };
+
+        void Extend(const float p[3]) noexcept {
+            for (int a = 0; a < 3; ++a) {
+                if (p[a] < lo[a]) lo[a] = p[a];
+                if (p[a] > hi[a]) hi[a] = p[a];
+            }
+        }
+        void Extend(const AABB& o) noexcept {
+            for (int a = 0; a < 3; ++a) {
+                if (o.lo[a] < lo[a]) lo[a] = o.lo[a];
+                if (o.hi[a] > hi[a]) hi[a] = o.hi[a];
+            }
+        }
+        float SurfaceArea() const noexcept {
+            float dx = hi[0] - lo[0];
+            float dy = hi[1] - lo[1];
+            float dz = hi[2] - lo[2];
+            if (dx < 0.0f || dy < 0.0f || dz < 0.0f) return 0.0f;
+            return 2.0f * (dx*dy + dx*dz + dy*dz);
+        }
+    };
+
+    struct PrimRef {
+        AABB box;
+        float centroid[3];
+        std::uint32_t triIndex;
+    };
+
+    struct Bin {
+        AABB box;
+        std::uint32_t count = 0;
+    };
+
+    struct Builder {
+        std::vector<PrimRef> prims;
+        std::vector<BVHNode> nodes;
+
+        std::pair<std::uint32_t, std::uint32_t> AllocateChildren() {
+            std::uint32_t l = static_cast<std::uint32_t>(nodes.size());
+            nodes.emplace_back();
+            nodes.emplace_back();
+            return { l, l + 1 };
+        }
+
+        void BuildRecursive(std::uint32_t nodeIdx,
+                            std::uint32_t first,
+                            std::uint32_t count) {
+            AABB bounds, centroidBounds;
+            for (std::uint32_t i = 0; i < count; ++i) {
+                const auto& p = prims[first + i];
+                bounds.Extend(p.box);
+                centroidBounds.Extend(p.centroid);
+            }
+
+            auto emitLeaf = [&] {
+                BVHNode& n = nodes[nodeIdx];
+                std::memcpy(n.aabbMin, bounds.lo, sizeof(bounds.lo));
+                std::memcpy(n.aabbMax, bounds.hi, sizeof(bounds.hi));
+                n.firstChildOrPrim = first;
+                n.primCount        = count;
+            };
+
+            if (count <= kMaxLeafSize) { emitLeaf(); return; }
+
+            int   bestAxis  = -1;
+            float bestCost  = std::numeric_limits<float>::infinity();
+            std::uint32_t bestBin = 0;
+
+            float parentArea = bounds.SurfaceArea();
+            if (parentArea <= 0.0f) { emitLeaf(); return; }
+
+            for (int axis = 0; axis < 3; ++axis) {
+                float extent = centroidBounds.hi[axis] - centroidBounds.lo[axis];
+                if (extent <= 0.0f) continue;
+                float invExtent = static_cast<float>(kBinCount) / extent;
+
+                std::array<Bin, kBinCount> bins{};
+                for (std::uint32_t i = 0; i < count; ++i) {
+                    const auto& p = prims[first + i];
+                    float t = (p.centroid[axis] - centroidBounds.lo[axis]) * invExtent;
+                    std::uint32_t b = static_cast<std::uint32_t>(t);
+                    if (b >= kBinCount) b = kBinCount - 1;
+                    bins[b].box.Extend(p.box);
+                    bins[b].count += 1;
+                }
+
+                std::array<AABB,         kBinCount - 1> leftBox;
+                std::array<std::uint32_t,kBinCount - 1> leftCount{};
+                {
+                    AABB acc; std::uint32_t cnt = 0;
+                    for (std::uint32_t i = 0; i < kBinCount - 1; ++i) {
+                        acc.Extend(bins[i].box);
+                        cnt += bins[i].count;
+                        leftBox[i]   = acc;
+                        leftCount[i] = cnt;
+                    }
+                }
+                {
+                    AABB acc; std::uint32_t cnt = 0;
+                    for (std::int32_t i = kBinCount - 1; i >= 1; --i) {
+                        acc.Extend(bins[i].box);
+                        cnt += bins[i].count;
+                        std::uint32_t split = static_cast<std::uint32_t>(i - 1);
+                        if (leftCount[split] == 0 || cnt == 0) continue;
+                        float cost = kTraversalCost
+                                   + (leftBox[split].SurfaceArea() * leftCount[split]
+                                      + acc.SurfaceArea() * cnt) * kIntersectCost / parentArea;
+                        if (cost < bestCost) {
+                            bestCost = cost;
+                            bestAxis = axis;
+                            bestBin  = split;
+                        }
+                    }
+                }
+            }
+
+            float leafCost = static_cast<float>(count) * kIntersectCost;
+            if (bestAxis < 0 || bestCost >= leafCost) { emitLeaf(); return; }
+
+            float invExtent = static_cast<float>(kBinCount)
+                            / (centroidBounds.hi[bestAxis] - centroidBounds.lo[bestAxis]);
+            float lo = centroidBounds.lo[bestAxis];
+            auto mid = std::partition(
+                prims.begin() + first, prims.begin() + first + count,
+                [&](const PrimRef& p) {
+                    float t = (p.centroid[bestAxis] - lo) * invExtent;
+                    std::uint32_t b = static_cast<std::uint32_t>(t);
+                    if (b >= kBinCount) b = kBinCount - 1;
+                    return b <= bestBin;
+                });
+            std::uint32_t leftCount =
+                static_cast<std::uint32_t>(mid - (prims.begin() + first));
+            if (leftCount == 0 || leftCount == count) { emitLeaf(); return; }
+
+            auto [leftIdx, rightIdx] = AllocateChildren();
+            {
+                BVHNode& n = nodes[nodeIdx];
+                std::memcpy(n.aabbMin, bounds.lo, sizeof(bounds.lo));
+                std::memcpy(n.aabbMax, bounds.hi, sizeof(bounds.hi));
+                n.firstChildOrPrim = leftIdx;
+                n.primCount        = 0;
+            }
+            BuildRecursive(leftIdx,  first,             leftCount);
+            BuildRecursive(rightIdx, first + leftCount, count - leftCount);
+        }
+
+        void Build(std::span<const Vector<float, 3, 3>> vertices,
+                   std::span<const std::uint32_t>       indices) {
+            std::uint32_t triCount = static_cast<std::uint32_t>(indices.size()) / 3;
+            prims.resize(triCount);
+            for (std::uint32_t i = 0; i < triCount; ++i) {
+                std::uint32_t i0 = indices[i*3 + 0];
+                std::uint32_t i1 = indices[i*3 + 1];
+                std::uint32_t i2 = indices[i*3 + 2];
+                const auto& v0 = vertices[i0];
+                const auto& v1 = vertices[i1];
+                const auto& v2 = vertices[i2];
+                float p0[3] { v0.v[0], v0.v[1], v0.v[2] };
+                float p1[3] { v1.v[0], v1.v[1], v1.v[2] };
+                float p2[3] { v2.v[0], v2.v[1], v2.v[2] };
+                auto& pr = prims[i];
+                pr.box.Extend(p0);
+                pr.box.Extend(p1);
+                pr.box.Extend(p2);
+                pr.centroid[0] = (pr.box.lo[0] + pr.box.hi[0]) * 0.5f;
+                pr.centroid[1] = (pr.box.lo[1] + pr.box.hi[1]) * 0.5f;
+                pr.centroid[2] = (pr.box.lo[2] + pr.box.hi[2]) * 0.5f;
+                pr.triIndex = i;
+            }
+            nodes.reserve(triCount * 2);
+            nodes.emplace_back();
+            BuildRecursive(0, 0, triCount);
+        }
+    };
+}
+
+void Mesh::Build(std::span<Vector<float, 3, 3>> vertices,
+                 std::span<std::uint32_t>       indices,
+                 WebGPUCommandEncoderRef        /*cmd*/) {
+    triangleCount = static_cast<std::uint32_t>(indices.size()) / 3;
+
+    Builder builder;
+    builder.Build(vertices, indices);
+
+    std::vector<std::uint32_t> primRemap(triangleCount);
+    for (std::uint32_t i = 0; i < triangleCount; ++i) {
+        primRemap[i] = builder.prims[i].triIndex;
+    }
+
+    const BVHNode& root = builder.nodes[0];
+    std::uint32_t h = WebGPU::wgpuRegisterMeshBLAS(
+        root.aabbMin[0], root.aabbMin[1], root.aabbMin[2],
+        root.aabbMax[0], root.aabbMax[1], root.aabbMax[2],
+        vertices.data(),       static_cast<std::int32_t>(vertices.size()),
+        indices.data(),        static_cast<std::int32_t>(indices.size()),
+        builder.nodes.data(),  static_cast<std::int32_t>(builder.nodes.size()),
+        primRemap.data(),      static_cast<std::int32_t>(primRemap.size()));
+    blasAddr = h;
+}
--- a/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
+++ b/implementations/Crafter.Graphics-PipelineRTWebGPU.cpp
@ -0,0 +1,187 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// Megakernel WGSL assembly. The library prelude lives JS-side
+// (additional/dom-webgpu.js, rtWgslPrelude) — we don't have access to
+// it from C++ — so this file emits only the *user-controlled* portions
+// (concatenated SBT sources + the generated switch statements) and the
+// stable entry-point glue. The JS side wraps these with the prelude
+// before handing to device.createShaderModule.
+//
+// Wire format passed across the JS boundary is a single WGSL string
+// containing the substitution markers `// @CRAFTER_RT_USER_SOURCES`,
+// `// @CRAFTER_RT_CLOSESTHIT_CASES`, `// @CRAFTER_RT_ANYHIT_CASES`,
+// `// @CRAFTER_RT_MISS_CASES`, `// @CRAFTER_RT_RAYGEN_BODY` already
+// expanded; the JS side just concatenates prelude + this string.
+
+module;
+module Crafter.Graphics:PipelineRTWebGPU_impl;
+
+import :PipelineRTWebGPU;
+import :ShaderBindingTableWebGPU;
+import :RT;
+import :WebGPU;
+import std;
+
+using namespace Crafter;
+
+namespace {
+    constexpr std::string_view kPlaceholderClosestHit =
+        "fn _crafter_default_closesthit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {}";
+    constexpr std::string_view kPlaceholderAnyHit =
+        "fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 { return RT_ANYHIT_ACCEPT; }";
+    constexpr std::string_view kPlaceholderMiss =
+        "fn _crafter_default_miss(ray: RayDesc, payload: ptr<function, Payload>) {}";
+
+    void AppendCase(std::string& out,
+                    std::uint32_t hitGroupIndex,
+                    std::string_view entryFn,
+                    std::string_view args) {
+        out += "        case ";
+        out += std::to_string(hitGroupIndex);
+        out += "u: { ";
+        out += entryFn;
+        out += "(";
+        out += args;
+        out += "); }\n";
+    }
+
+    // anyhit has a return type — case body forwards the result.
+    void AppendAnyHitCase(std::string& out,
+                          std::uint32_t hitGroupIndex,
+                          std::string_view entryFn) {
+        out += "        case ";
+        out += std::to_string(hitGroupIndex);
+        out += "u: { return ";
+        out += entryFn;
+        out += "(ray, hit, payload); }\n";
+    }
+}
+
+void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef                 /*cmd*/,
+                            std::span<const RTShaderGroup>          raygenGroups,
+                            std::span<const RTShaderGroup>          missGroups,
+                            std::span<const RTShaderGroup>          hitGroups,
+                            const ShaderBindingTableWebGPU&         sbt) {
+    std::string wgsl;
+    wgsl.reserve(8 * 1024);
+
+    // ── Section 1: user closesthit / anyhit / miss source files ────────
+    //
+    // Raygens come later (after `traceRay` is declared) so we partition
+    // shaders by stage. Concatenating *all* non-raygen sources here lets
+    // them declare shared helpers, `struct Payload`, etc., in any order.
+
+    wgsl += "// ── user closesthit / anyhit / miss sources ───────────────\n";
+    for (const auto& shader : sbt.shaders) {
+        if (shader.stage == WebGPURTStage::Raygen) continue;
+        wgsl += shader.source;
+        wgsl += "\n";
+    }
+
+    // ── Section 2: mega-switch dispatchers ─────────────────────────────
+    //
+    // runClosestHit, runAnyHit, runMiss each dispatch on the per-hit /
+    // per-ray index registered against the appropriate group span.
+    // Indices match the user's expectations from VkRayTracingShaderGroup
+    // ordering: closest-hit group N (N from 0..hitGroups.size()-1) is
+    // selected by hitGroupIndex == N.
+
+    wgsl += "\nfn runClosestHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {\n";
+    wgsl += "    switch hg {\n";
+    bool anyClosestHit = false;
+    for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
+        const auto& g = hitGroups[i];
+        if (g.closestHitShader == kRTShaderUnused) continue;
+        if (g.closestHitShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.closestHitShader].entryFn;
+        AppendCase(wgsl, i, fn, "ray, hit, payload");
+        anyClosestHit = true;
+    }
+    if (!anyClosestHit) wgsl += "        // (no closest-hit shaders registered)\n";
+    wgsl += "        default: { }\n";
+    wgsl += "    }\n";
+    wgsl += "}\n\n";
+
+    wgsl += "fn runAnyHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 {\n";
+    wgsl += "    switch hg {\n";
+    bool anyAnyhit = false;
+    for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
+        const auto& g = hitGroups[i];
+        if (g.anyHitShader == kRTShaderUnused) continue;
+        if (g.anyHitShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.anyHitShader].entryFn;
+        AppendAnyHitCase(wgsl, i, fn);
+        anyAnyhit = true;
+    }
+    if (!anyAnyhit) wgsl += "        // (no any-hit shaders registered)\n";
+    wgsl += "        default: { return RT_ANYHIT_ACCEPT; }\n";
+    wgsl += "    }\n";
+    wgsl += "}\n\n";
+
+    wgsl += "fn runMiss(idx: u32, ray: RayDesc, payload: ptr<function, Payload>) {\n";
+    wgsl += "    switch idx {\n";
+    bool anyMiss = false;
+    for (std::uint32_t i = 0; i < missGroups.size(); ++i) {
+        const auto& g = missGroups[i];
+        if (g.generalShader == kRTShaderUnused) continue;
+        if (g.generalShader >= sbt.shaders.size()) continue;
+        const auto& fn = sbt.shaders[g.generalShader].entryFn;
+        AppendCase(wgsl, i, fn, "ray, payload");
+        anyMiss = true;
+    }
+    if (!anyMiss) wgsl += "        // (no miss shaders registered)\n";
+    wgsl += "        default: { }\n";
+    wgsl += "    }\n";
+    wgsl += "}\n";
+
+    // Marker — JS-side prelude/post-amble searches for this token to know
+    // where the library helpers (traverseBlas/traverseTlas/traceRay) get
+    // injected, followed by raygen sources and the @compute entry point.
+    wgsl += "\n// @CRAFTER_RT_LIBRARY_HELPERS_HERE\n";
+
+    // ── Section 3: user raygen source files ────────────────────────────
+    //
+    // Comes after the library injects traceRay, so raygens can call it.
+
+    wgsl += "\n// ── user raygen sources ───────────────────────────────────\n";
+    std::uint32_t raygenEntryIndex = kRTShaderUnused;
+    std::string   raygenEntryFn;
+    for (const auto& shader : sbt.shaders) {
+        if (shader.stage != WebGPURTStage::Raygen) continue;
+        wgsl += shader.source;
+        wgsl += "\n";
+        // Pick the first raygen group's general shader as the entry. Mirrors
+        // Vulkan's pRayGenShaderBindingTable[0] → first invoked raygen.
+        if (raygenEntryFn.empty()) raygenEntryFn = shader.entryFn;
+    }
+    if (!raygenGroups.empty()
+        && raygenGroups[0].generalShader != kRTShaderUnused
+        && raygenGroups[0].generalShader < sbt.shaders.size()) {
+        raygenEntryIndex = raygenGroups[0].generalShader;
+        raygenEntryFn    = sbt.shaders[raygenEntryIndex].entryFn;
+    }
+    if (raygenEntryFn.empty()) {
+        std::println("PipelineRTWebGPU::Init: no raygen shader registered");
+        pipelineHandle = 0;
+        return;
+    }
+
+    // ── Section 4: @compute entry point ────────────────────────────────
+    //
+    // 8x8 tile workgroup matching the rest of the WebGPU backend.
+
+    wgsl += "\n@compute @workgroup_size(8, 8, 1)\n";
+    wgsl += "fn main(@builtin(global_invocation_id) gid: vec3<u32>) {\n";
+    wgsl += "    ";
+    wgsl += raygenEntryFn;
+    wgsl += "(gid);\n";
+    wgsl += "}\n";
+
+    pipelineHandle = WebGPU::wgpuLoadRTPipeline(
+        wgsl.data(),
+        static_cast<std::int32_t>(wgsl.size()));
+}
--- a/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp
+++ b/implementations/Crafter.Graphics-RenderingElement3D-WebGPU.cpp
@ -0,0 +1,91 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+// DOM-mode TLAS upkeep. BuildTLAS copies the per-element RTInstance into
+// the host-visible instance buffer (skipping the transform for elements
+// whose transform is GPU-owned), uploads it, then dispatches the JS-side
+// TLAS-build compute pass — which consults the per-BLAS records published
+// at Mesh::Build() time to produce world-space AABBs and inverse
+// transforms in the format `traceRay` / `rayQuery` consume.
+
+module;
+module Crafter.Graphics:RenderingElement3D_implWebGPU;
+
+import :RenderingElement3D;
+import :Mesh;
+import :WebGPU;
+import :WebGPUBuffer;
+import std;
+
+using namespace Crafter;
+
+std::vector<RenderingElement3D*> RenderingElement3D::elements;
+
+void RenderingElement3D::Add(RenderingElement3D* e) {
+    e->indexInElements = static_cast<std::uint32_t>(elements.size());
+    elements.push_back(e);
+}
+
+void RenderingElement3D::Remove(RenderingElement3D* e) {
+    std::uint32_t idx = e->indexInElements;
+    if (idx == std::numeric_limits<std::uint32_t>::max()) return;
+    std::uint32_t last = static_cast<std::uint32_t>(elements.size() - 1);
+    if (idx != last) {
+        elements[idx] = elements[last];
+        elements[idx]->indexInElements = idx;
+    }
+    elements.pop_back();
+    e->indexInElements = std::numeric_limits<std::uint32_t>::max();
+}
+
+void RenderingElement3D::BuildTLAS(WebGPUCommandEncoderRef /*cmd*/, std::uint32_t index) {
+    auto& tlas = tlases[index];
+    const std::uint32_t primitiveCount = static_cast<std::uint32_t>(elements.size());
+    if (primitiveCount == 0) {
+        tlas.builtInstanceCount = 0;
+        return;
+    }
+
+    // (Re)allocate instance + metadata + output TLAS buffers if the count
+    // changed. WebGPUBuffer::Resize destroys and recreates the GPU buffer;
+    // bind-group caches keyed on the buffer handle are invalidated in the
+    // JS bridge automatically.
+    if (primitiveCount != tlas.builtInstanceCount) {
+        tlas.instanceBuffer.Resize(primitiveCount);
+        tlas.metadataBuffer.Resize(primitiveCount);
+        // TLASEntry layout in WGSL is 144 bytes due to vec3 align/pad
+        // rules. Must match the struct declared in the rtWgslTypes
+        // block in additional/dom-webgpu.js.
+        tlas.buffer.Resize(primitiveCount * 144);
+    }
+
+    for (std::uint32_t i = 0; i < primitiveCount; ++i) {
+        auto& dst = tlas.instanceBuffer.value[i];
+        const auto& src = elements[i]->instance;
+        if (elements[i]->transformOwnedByGpu) {
+            // Preserve whatever the GPU compute shader most recently
+            // wrote into dst.transform. Update only the non-transform
+            // fields.
+            dst.instanceCustomIndex                    = src.instanceCustomIndex;
+            dst.mask                                   = src.mask;
+            dst.instanceShaderBindingTableRecordOffset = src.instanceShaderBindingTableRecordOffset;
+            dst.flags                                  = src.flags;
+            dst.accelerationStructureReference         = src.accelerationStructureReference;
+        } else {
+            dst = src;
+        }
+        tlas.metadataBuffer.value[i] = elements[i]->userMetadata;
+    }
+
+    tlas.instanceBuffer.FlushDevice();
+    tlas.metadataBuffer.FlushDevice();
+
+    WebGPU::wgpuBuildTLAS(tlas.instanceBuffer.handle,
+                          static_cast<std::int32_t>(primitiveCount),
+                          tlas.buffer.handle);
+
+    tlas.builtInstanceCount = primitiveCount;
+}
--- a/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp
+++ b/implementations/Crafter.Graphics-ShaderBindingTableWebGPU.cpp
@ -0,0 +1,32 @@
+/*
+Crafter®.Graphics
+Copyright (C) 2026 Catcrafts®
+catcrafts.net
+*/
+
+module;
+module Crafter.Graphics:ShaderBindingTableWebGPU_impl;
+
+import :ShaderBindingTableWebGPU;
+import std;
+
+using namespace Crafter;
+
+WebGPUShader::WebGPUShader(const std::filesystem::path& wgslPath,
+                           std::string fn,
+                           WebGPURTStage s)
+    : entryFn(std::move(fn)), stage(s) {
+    std::ifstream f(wgslPath, std::ios::binary | std::ios::ate);
+    if (!f.is_open()) {
+        std::println("WebGPUShader: cannot open {}", wgslPath.string());
+        return;
+    }
+    auto size = f.tellg();
+    if (size <= 0) {
+        std::println("WebGPUShader: empty file {}", wgslPath.string());
+        return;
+    }
+    f.seekg(0, std::ios::beg);
+    source.resize(static_cast<std::size_t>(size));
+    f.read(source.data(), size);
+}
--- a/implementations/Crafter.Graphics-WebGPUComputeShader.cpp
+++ b/implementations/Crafter.Graphics-WebGPUComputeShader.cpp
@ -12,18 +12,22 @@ import std;
 using namespace Crafter;

 void WebGPUComputeShader::Load(std::string_view wgsl,
-                               std::span<const UICustomBinding> bindings) {
+                               std::span<const UICustomBinding> bindings,
+                               bool rayQuery) {
    customBindings.assign(bindings.begin(), bindings.end());
+    rayQueryCapable = rayQuery;
    pipelineHandle = WebGPU::wgpuLoadCustomShader(
        wgsl.data(),
        static_cast<std::int32_t>(wgsl.size()),
        customBindings.data(),
-        static_cast<std::int32_t>(customBindings.size())
+        static_cast<std::int32_t>(customBindings.size()),
+        rayQuery ? 1 : 0
    );
 }

 void WebGPUComputeShader::Load(const std::filesystem::path& wgslPath,
-                               std::span<const UICustomBinding> bindings) {
+                               std::span<const UICustomBinding> bindings,
+                               bool rayQuery) {
    std::ifstream f(wgslPath, std::ios::binary | std::ios::ate);
    if (!f.is_open()) {
        std::println("WebGPUComputeShader::Load: cannot open {}", wgslPath.string());
@ -37,5 +41,5 @@ void WebGPUComputeShader::Load(const std::filesystem::path& wgslPath,
    f.seekg(0, std::ios::beg);
    std::string src(static_cast<std::size_t>(size), '\0');
    f.read(src.data(), size);
-    Load(std::string_view{src}, bindings);
+    Load(std::string_view{src}, bindings, rayQuery);
 }