2026-05-18 18:43:30 +02:00
|
|
|
/*
|
|
|
|
|
Crafter®.Graphics
|
|
|
|
|
Copyright (C) 2026 Catcrafts®
|
|
|
|
|
catcrafts.net
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// Megakernel WGSL assembly. The library prelude lives JS-side
|
|
|
|
|
// (additional/dom-webgpu.js, rtWgslPrelude) — we don't have access to
|
|
|
|
|
// it from C++ — so this file emits only the *user-controlled* portions
|
|
|
|
|
// (concatenated SBT sources + the generated switch statements) and the
|
|
|
|
|
// stable entry-point glue. The JS side wraps these with the prelude
|
|
|
|
|
// before handing to device.createShaderModule.
|
|
|
|
|
//
|
|
|
|
|
// Wire format passed across the JS boundary is a single WGSL string
|
|
|
|
|
// containing the substitution markers `// @CRAFTER_RT_USER_SOURCES`,
|
|
|
|
|
// `// @CRAFTER_RT_CLOSESTHIT_CASES`, `// @CRAFTER_RT_ANYHIT_CASES`,
|
|
|
|
|
// `// @CRAFTER_RT_MISS_CASES`, `// @CRAFTER_RT_RAYGEN_BODY` already
|
|
|
|
|
// expanded; the JS side just concatenates prelude + this string.
|
|
|
|
|
|
|
|
|
|
module;
|
|
|
|
|
module Crafter.Graphics:PipelineRTWebGPU_impl;
|
|
|
|
|
|
|
|
|
|
import :PipelineRTWebGPU;
|
|
|
|
|
import :ShaderBindingTableWebGPU;
|
2026-05-19 00:27:09 +02:00
|
|
|
import :WebGPUComputeShader;
|
2026-05-18 18:43:30 +02:00
|
|
|
import :RT;
|
|
|
|
|
import :WebGPU;
|
|
|
|
|
import std;
|
|
|
|
|
|
|
|
|
|
using namespace Crafter;
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
constexpr std::string_view kPlaceholderClosestHit =
|
|
|
|
|
"fn _crafter_default_closesthit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {}";
|
|
|
|
|
constexpr std::string_view kPlaceholderAnyHit =
|
|
|
|
|
"fn _crafter_default_anyhit(ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 { return RT_ANYHIT_ACCEPT; }";
|
|
|
|
|
constexpr std::string_view kPlaceholderMiss =
|
|
|
|
|
"fn _crafter_default_miss(ray: RayDesc, payload: ptr<function, Payload>) {}";
|
|
|
|
|
|
|
|
|
|
void AppendCase(std::string& out,
|
|
|
|
|
std::uint32_t hitGroupIndex,
|
|
|
|
|
std::string_view entryFn,
|
|
|
|
|
std::string_view args) {
|
|
|
|
|
out += " case ";
|
|
|
|
|
out += std::to_string(hitGroupIndex);
|
|
|
|
|
out += "u: { ";
|
|
|
|
|
out += entryFn;
|
|
|
|
|
out += "(";
|
|
|
|
|
out += args;
|
|
|
|
|
out += "); }\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// anyhit has a return type — case body forwards the result.
|
|
|
|
|
void AppendAnyHitCase(std::string& out,
|
|
|
|
|
std::uint32_t hitGroupIndex,
|
|
|
|
|
std::string_view entryFn) {
|
|
|
|
|
out += " case ";
|
|
|
|
|
out += std::to_string(hitGroupIndex);
|
|
|
|
|
out += "u: { return ";
|
|
|
|
|
out += entryFn;
|
|
|
|
|
out += "(ray, hit, payload); }\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PipelineRTWebGPU::Init(WebGPUCommandEncoderRef /*cmd*/,
|
|
|
|
|
std::span<const RTShaderGroup> raygenGroups,
|
|
|
|
|
std::span<const RTShaderGroup> missGroups,
|
|
|
|
|
std::span<const RTShaderGroup> hitGroups,
|
2026-05-19 00:27:09 +02:00
|
|
|
const ShaderBindingTableWebGPU& sbt,
|
|
|
|
|
std::span<const UICustomBinding> bindings) {
|
|
|
|
|
userBindings.assign(bindings.begin(), bindings.end());
|
2026-05-18 18:43:30 +02:00
|
|
|
std::string wgsl;
|
|
|
|
|
wgsl.reserve(8 * 1024);
|
|
|
|
|
|
|
|
|
|
// ── Section 1: user closesthit / anyhit / miss source files ────────
|
|
|
|
|
//
|
|
|
|
|
// Raygens come later (after `traceRay` is declared) so we partition
|
|
|
|
|
// shaders by stage. Concatenating *all* non-raygen sources here lets
|
|
|
|
|
// them declare shared helpers, `struct Payload`, etc., in any order.
|
|
|
|
|
|
2026-05-31 16:24:41 +00:00
|
|
|
wgsl += "// ── user closesthit / anyhit / miss / resolve sources ─────\n";
|
2026-05-18 18:43:30 +02:00
|
|
|
for (const auto& shader : sbt.shaders) {
|
|
|
|
|
if (shader.stage == WebGPURTStage::Raygen) continue;
|
|
|
|
|
wgsl += shader.source;
|
|
|
|
|
wgsl += "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-31 16:24:41 +00:00
|
|
|
// ── Payload-typed wavefront storage binding ────────────────────────
|
|
|
|
|
//
|
|
|
|
|
// Emitted *after* the user sources so it can name the user's `Payload`
|
|
|
|
|
// type. Holds one Payload per in-flight ray slot across both ping/pong
|
|
|
|
|
// ray buffers (capacity = 2·W·H). SHADE loads ray.payloadSlot here;
|
|
|
|
|
// emit helpers (rtEmitPrimaryRay / rtEmitRay) store into it.
|
|
|
|
|
wgsl += "\n@group(1) @binding(15) var<storage, read_write> "
|
|
|
|
|
"wfPayload : array<Payload>;\n";
|
|
|
|
|
|
2026-05-18 18:43:30 +02:00
|
|
|
// ── Section 2: mega-switch dispatchers ─────────────────────────────
|
|
|
|
|
//
|
|
|
|
|
// runClosestHit, runAnyHit, runMiss each dispatch on the per-hit /
|
|
|
|
|
// per-ray index registered against the appropriate group span.
|
|
|
|
|
// Indices match the user's expectations from VkRayTracingShaderGroup
|
|
|
|
|
// ordering: closest-hit group N (N from 0..hitGroups.size()-1) is
|
|
|
|
|
// selected by hitGroupIndex == N.
|
|
|
|
|
|
|
|
|
|
wgsl += "\nfn runClosestHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) {\n";
|
|
|
|
|
wgsl += " switch hg {\n";
|
|
|
|
|
bool anyClosestHit = false;
|
|
|
|
|
for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
|
|
|
|
|
const auto& g = hitGroups[i];
|
|
|
|
|
if (g.closestHitShader == kRTShaderUnused) continue;
|
|
|
|
|
if (g.closestHitShader >= sbt.shaders.size()) continue;
|
|
|
|
|
const auto& fn = sbt.shaders[g.closestHitShader].entryFn;
|
|
|
|
|
AppendCase(wgsl, i, fn, "ray, hit, payload");
|
|
|
|
|
anyClosestHit = true;
|
|
|
|
|
}
|
|
|
|
|
if (!anyClosestHit) wgsl += " // (no closest-hit shaders registered)\n";
|
|
|
|
|
wgsl += " default: { }\n";
|
|
|
|
|
wgsl += " }\n";
|
|
|
|
|
wgsl += "}\n\n";
|
|
|
|
|
|
|
|
|
|
wgsl += "fn runAnyHit(hg: u32, ray: RayDesc, hit: HitInfo, payload: ptr<function, Payload>) -> u32 {\n";
|
|
|
|
|
wgsl += " switch hg {\n";
|
|
|
|
|
bool anyAnyhit = false;
|
|
|
|
|
for (std::uint32_t i = 0; i < hitGroups.size(); ++i) {
|
|
|
|
|
const auto& g = hitGroups[i];
|
|
|
|
|
if (g.anyHitShader == kRTShaderUnused) continue;
|
|
|
|
|
if (g.anyHitShader >= sbt.shaders.size()) continue;
|
|
|
|
|
const auto& fn = sbt.shaders[g.anyHitShader].entryFn;
|
|
|
|
|
AppendAnyHitCase(wgsl, i, fn);
|
|
|
|
|
anyAnyhit = true;
|
|
|
|
|
}
|
|
|
|
|
if (!anyAnyhit) wgsl += " // (no any-hit shaders registered)\n";
|
|
|
|
|
wgsl += " default: { return RT_ANYHIT_ACCEPT; }\n";
|
|
|
|
|
wgsl += " }\n";
|
|
|
|
|
wgsl += "}\n\n";
|
|
|
|
|
|
|
|
|
|
wgsl += "fn runMiss(idx: u32, ray: RayDesc, payload: ptr<function, Payload>) {\n";
|
|
|
|
|
wgsl += " switch idx {\n";
|
|
|
|
|
bool anyMiss = false;
|
|
|
|
|
for (std::uint32_t i = 0; i < missGroups.size(); ++i) {
|
|
|
|
|
const auto& g = missGroups[i];
|
|
|
|
|
if (g.generalShader == kRTShaderUnused) continue;
|
|
|
|
|
if (g.generalShader >= sbt.shaders.size()) continue;
|
|
|
|
|
const auto& fn = sbt.shaders[g.generalShader].entryFn;
|
|
|
|
|
AppendCase(wgsl, i, fn, "ray, payload");
|
|
|
|
|
anyMiss = true;
|
|
|
|
|
}
|
|
|
|
|
if (!anyMiss) wgsl += " // (no miss shaders registered)\n";
|
|
|
|
|
wgsl += " default: { }\n";
|
|
|
|
|
wgsl += " }\n";
|
|
|
|
|
wgsl += "}\n";
|
|
|
|
|
|
2026-05-31 16:24:41 +00:00
|
|
|
// runResolve — RESOLVE-stage tonemap hook. The first registered
|
|
|
|
|
// Resolve shader wins; with none, identity passthrough (alpha forced
|
|
|
|
|
// to 1) so the wavefront output matches a megakernel that wrote raw
|
|
|
|
|
// colors.
|
|
|
|
|
std::string resolveEntryFn;
|
|
|
|
|
for (const auto& shader : sbt.shaders) {
|
|
|
|
|
if (shader.stage == WebGPURTStage::Resolve) { resolveEntryFn = shader.entryFn; break; }
|
|
|
|
|
}
|
|
|
|
|
wgsl += "\nfn runResolve(coord: vec2<u32>, hdr: vec4<f32>) -> vec4<f32> {\n";
|
|
|
|
|
if (!resolveEntryFn.empty()) {
|
|
|
|
|
wgsl += " return ";
|
|
|
|
|
wgsl += resolveEntryFn;
|
|
|
|
|
wgsl += "(coord, hdr);\n";
|
|
|
|
|
} else {
|
|
|
|
|
wgsl += " return vec4<f32>(hdr.rgb, 1.0);\n";
|
|
|
|
|
}
|
|
|
|
|
wgsl += "}\n";
|
|
|
|
|
|
2026-05-18 18:43:30 +02:00
|
|
|
// Marker — JS-side prelude/post-amble searches for this token to know
|
|
|
|
|
// where the library helpers (traverseBlas/traverseTlas/traceRay) get
|
|
|
|
|
// injected, followed by raygen sources and the @compute entry point.
|
|
|
|
|
wgsl += "\n// @CRAFTER_RT_LIBRARY_HELPERS_HERE\n";
|
|
|
|
|
|
|
|
|
|
// ── Section 3: user raygen source files ────────────────────────────
|
|
|
|
|
//
|
|
|
|
|
// Comes after the library injects traceRay, so raygens can call it.
|
|
|
|
|
|
|
|
|
|
wgsl += "\n// ── user raygen sources ───────────────────────────────────\n";
|
|
|
|
|
std::uint32_t raygenEntryIndex = kRTShaderUnused;
|
|
|
|
|
std::string raygenEntryFn;
|
|
|
|
|
for (const auto& shader : sbt.shaders) {
|
|
|
|
|
if (shader.stage != WebGPURTStage::Raygen) continue;
|
|
|
|
|
wgsl += shader.source;
|
|
|
|
|
wgsl += "\n";
|
|
|
|
|
// Pick the first raygen group's general shader as the entry. Mirrors
|
|
|
|
|
// Vulkan's pRayGenShaderBindingTable[0] → first invoked raygen.
|
|
|
|
|
if (raygenEntryFn.empty()) raygenEntryFn = shader.entryFn;
|
|
|
|
|
}
|
|
|
|
|
if (!raygenGroups.empty()
|
|
|
|
|
&& raygenGroups[0].generalShader != kRTShaderUnused
|
|
|
|
|
&& raygenGroups[0].generalShader < sbt.shaders.size()) {
|
|
|
|
|
raygenEntryIndex = raygenGroups[0].generalShader;
|
|
|
|
|
raygenEntryFn = sbt.shaders[raygenEntryIndex].entryFn;
|
|
|
|
|
}
|
|
|
|
|
if (raygenEntryFn.empty()) {
|
|
|
|
|
std::println("PipelineRTWebGPU::Init: no raygen shader registered");
|
|
|
|
|
pipelineHandle = 0;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-31 16:24:41 +00:00
|
|
|
// ── Section 4: wavefront @compute entry points ─────────────────────
|
2026-05-18 18:43:30 +02:00
|
|
|
//
|
2026-05-31 16:24:41 +00:00
|
|
|
// Five kernels share this one module; createComputePipeline selects
|
|
|
|
|
// each by entryPoint name. GENERATE/RESOLVE are 8x8 screen tiles;
|
|
|
|
|
// TRACE/SHADE are 64-wide 1-D over the compacted ray list (dispatched
|
|
|
|
|
// indirectly from PREP); PREP is a single thread. The library helper
|
|
|
|
|
// bodies (_rtwTraverseTlas, rtEmit*, rtAccumulate, _wfCurCount, …) are
|
|
|
|
|
// injected JS-side at the marker above.
|
2026-05-18 18:43:30 +02:00
|
|
|
|
2026-05-31 16:24:41 +00:00
|
|
|
// GENERATE — one thread per pixel; clears the pixel's accumulator and
|
|
|
|
|
// runs the user raygen, which calls rtEmitPrimaryRay.
|
2026-05-18 18:43:30 +02:00
|
|
|
wgsl += "\n@compute @workgroup_size(8, 8, 1)\n";
|
2026-05-31 16:24:41 +00:00
|
|
|
wgsl += "fn wfGenerate(@builtin(global_invocation_id) gid: vec3<u32>) {\n";
|
|
|
|
|
wgsl += " if (gid.x >= wfParams.surfaceW || gid.y >= wfParams.surfaceH) { return; }\n";
|
|
|
|
|
wgsl += " let pixel = gid.y * wfParams.surfaceW + gid.x;\n";
|
|
|
|
|
wgsl += " wfAccum[pixel] = vec4<f32>(0.0, 0.0, 0.0, 0.0);\n";
|
|
|
|
|
wgsl += " _wfPixel = pixel;\n";
|
2026-05-18 18:43:30 +02:00
|
|
|
wgsl += " ";
|
|
|
|
|
wgsl += raygenEntryFn;
|
|
|
|
|
wgsl += "(gid);\n";
|
|
|
|
|
wgsl += "}\n";
|
|
|
|
|
|
2026-05-31 16:24:41 +00:00
|
|
|
// PREP — single thread; reads the live ray count and publishes the
|
|
|
|
|
// indirect dispatch args for the upcoming TRACE/SHADE, then zeroes the
|
|
|
|
|
// next buffer's emit counter so SHADE starts compacting from 0.
|
|
|
|
|
wgsl += "\n@compute @workgroup_size(1)\n";
|
|
|
|
|
wgsl += "fn wfPrep() { _wfPrep(); }\n";
|
|
|
|
|
|
|
|
|
|
// TRACE — zero user code: pure traversal + intersection. One thread
|
|
|
|
|
// per live ray; writes a HitResult into wfHits[i].
|
|
|
|
|
wgsl += "\n@compute @workgroup_size(64)\n";
|
|
|
|
|
wgsl += "fn wfTrace(@builtin(global_invocation_id) gid: vec3<u32>) { _wfTrace(gid.x); }\n";
|
|
|
|
|
|
|
|
|
|
// SHADE — one thread per live ray; loads the ray + its hit + payload,
|
|
|
|
|
// dispatches to runMiss / runClosestHit, which may rtAccumulate and
|
|
|
|
|
// rtEmitRay continuation/shadow rays into the next buffer.
|
|
|
|
|
wgsl += "\n@compute @workgroup_size(64)\n";
|
|
|
|
|
wgsl += "fn wfShade(@builtin(global_invocation_id) gid: vec3<u32>) { _wfShade(gid.x); }\n";
|
|
|
|
|
|
|
|
|
|
// RESOLVE — one thread per pixel; runs the user resolve (or identity)
|
|
|
|
|
// over the linear accumulator and stores to the output image.
|
|
|
|
|
wgsl += "\n@compute @workgroup_size(8, 8, 1)\n";
|
|
|
|
|
wgsl += "fn wfResolve(@builtin(global_invocation_id) gid: vec3<u32>) {\n";
|
|
|
|
|
wgsl += " if (gid.x >= wfParams.surfaceW || gid.y >= wfParams.surfaceH) { return; }\n";
|
|
|
|
|
wgsl += " let pixel = gid.y * wfParams.surfaceW + gid.x;\n";
|
|
|
|
|
wgsl += " let outc = runResolve(gid.xy, wfAccum[pixel]);\n";
|
|
|
|
|
wgsl += " textureStore(outImage, vec2<i32>(i32(gid.x), i32(gid.y)), outc);\n";
|
|
|
|
|
wgsl += "}\n";
|
|
|
|
|
|
2026-05-18 18:43:30 +02:00
|
|
|
pipelineHandle = WebGPU::wgpuLoadRTPipeline(
|
|
|
|
|
wgsl.data(),
|
2026-05-19 00:27:09 +02:00
|
|
|
static_cast<std::int32_t>(wgsl.size()),
|
|
|
|
|
userBindings.empty() ? nullptr : userBindings.data(),
|
|
|
|
|
static_cast<std::int32_t>(userBindings.size()));
|
2026-05-18 18:43:30 +02:00
|
|
|
}
|