diff --git a/examples/README.md b/examples/README.md index 170eb0f..940d54d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -82,3 +82,11 @@ shader couldn't express. Shows: inserted automatically, so the custom shader sees the colored stripes drawn by the prior `DispatchQuads` and reads/writes the swapchain image safely. + +### [RayQueryPick](RayQueryPick/) +Regression test for the WebGPU software ray-query shim. Builds a +512-instance TLAS and shoots one ray through a `rayQuery=true` +`PlainComputeShader`, reading the committed hit back to the host and +checking it against the analytically-known answer. Guards against the +hardcoded-leaf-start TLAS-traversal bug (issue #25) that made every +rayQuery pick miss for realistic instance counts. WebGPU/DOM only. diff --git a/examples/RayQueryPick/README.md b/examples/RayQueryPick/README.md new file mode 100644 index 0000000..2fa18ec --- /dev/null +++ b/examples/RayQueryPick/README.md @@ -0,0 +1,35 @@ +# RayQueryPick + +Regression test for the WebGPU software **ray-query** shim +(`additional/dom-webgpu.js`, `_rqTraverseTlas`). + +Builds an 8³ = 512-instance TLAS and shoots one fully-determined ray +through a `rayQuery=true` `PlainComputeShader`. The committed hit is read +back to the host and checked against the analytically-known answer +(instance `customIndex = 484`, `t = 40.75`). On success the console prints: + +``` +[RayQueryPick] result: hit=1 customIndex=484 prim=6 t=40.75 +[RayQueryPick] PASS — rayQuery TLAS traversal hit the expected instance +``` + +## Why 512 instances + +The TLAS sweep tree is padded to `next_pow2(instanceCount)` leaves. The +rayQuery shim used to detect BVH leaves with a hardcoded `16384 - 1` leaf +start, so for any scene with fewer than 8193 instances **no node index +ever reached a leaf** and every pick missed (issue #25). 512 sits squarely +in that broken regime, so this example fails fast if the shim regresses to +a static leaf start. The shim now derives the leaf start from a per-build +`RqTlasMeta.nPadded` uniform, matching the megakernel `_rtwTraverseTlas`. + +The scene also renders through the wavefront RT pipeline (same as +RTStress) so the run produces a visible frame, but the pass/fail signal is +the console line above. + +```bash +cd examples/RayQueryPick +crafter-build -r --target=wasm32-wasip1 +``` + +WebGPU/DOM only — the native path uses hardware ray queries. diff --git a/examples/RayQueryPick/closesthit.wgsl b/examples/RayQueryPick/closesthit.wgsl new file mode 100644 index 0000000..c80f5bb --- /dev/null +++ b/examples/RayQueryPick/closesthit.wgsl @@ -0,0 +1,54 @@ +// RTStress closest-hit (runs in SHADE). Computes flat-shaded Lambert from +// the hit triangle's geometric normal, accumulates ambient, and — if the +// surface faces the sun — emits a shadow ray toward the sun. The shadow +// ray's miss (sun visible) adds the direct term; its hit (occluded) adds +// nothing because RT_FLAG_SKIP_CLOSEST_HIT suppresses closesthit on hit. +// +// Payload declared here so the assembler sees it before wfPayload / SHADE. +struct Payload { + color: vec3, // shadow ray: pending direct contribution + shadowRay: u32, // 0 primary, 1 shadow +}; + +const SUN_DIR_TO_LIGHT: vec3 = vec3(0.40, 0.85, 0.35); +const SUN_COLOR: vec3 = vec3(1.15, 1.05, 0.90); +const AMBIENT_COLOR: vec3 = vec3(0.12, 0.13, 0.18); + +// Cheap per-instance albedo so the grid reads as distinct cubes (and any +// TLAS flicker as instance count scales is obvious). +fn instanceAlbedo(i: u32) -> vec3 { + let h = i * 2654435761u; + return vec3( + 0.35 + 0.6 * f32((h >> 0u) & 255u) / 255.0, + 0.35 + 0.6 * f32((h >> 8u) & 255u) / 255.0, + 0.35 + 0.6 * f32((h >> 16u) & 255u) / 255.0); +} + +fn closesthit_main(ray: RayDesc, hit: HitInfo, payload: ptr) { + let meshRec = meshRecords[tlasEntries[hit.instanceId].blasMeshIdx]; + let verts = _rtFetchTri(meshRec, hit.primitiveId); + let nObj = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + let nWorld = normalize(vec3( + dot(hit.objectToWorldR0.xyz, nObj), + dot(hit.objectToWorldR1.xyz, nObj), + dot(hit.objectToWorldR2.xyz, nObj))); + + let albedo = instanceAlbedo(hit.customIndex); + let worldPos = ray.origin + ray.direction * hit.t; + let viewDir = -ray.direction; + let nFacing = select(-nWorld, nWorld, dot(nWorld, viewDir) > 0.0); + let sunDir = normalize(SUN_DIR_TO_LIGHT); + let nDotL = max(0.0, dot(nFacing, sunDir)); + + rtAccumulate(albedo * AMBIENT_COLOR); + + if (nDotL > 0.0) { + var sp: Payload; + sp.color = albedo * SUN_COLOR * nDotL; + sp.shadowRay = 1u; + let shadowOrigin = worldPos + nFacing * 0.05; + rtEmitRay(shadowOrigin, 0.01, sunDir, 100000.0, + RT_FLAG_SKIP_CLOSEST_HIT | RT_FLAG_TERMINATE_ON_FIRST_HIT, + 0xFFu, 0u, 0u, sp); + } +} diff --git a/examples/RayQueryPick/main.cpp b/examples/RayQueryPick/main.cpp new file mode 100644 index 0000000..157282b --- /dev/null +++ b/examples/RayQueryPick/main.cpp @@ -0,0 +1,226 @@ +// RayQueryPick — regression test for the WebGPU software ray-query shim. +// +// Builds an 8³ = 512-instance TLAS (well below the 8193 threshold where a +// hardcoded 16384-leaf TLAS start used to make every rayQuery pick miss — +// issue #25) and shoots ONE fully-determined ray through a `rayQuery=true` +// compute shader. The committed hit is read back to the host and checked +// against the analytically-known answer. +// +// The scene also renders through the wavefront RT pipeline (same as +// RTStress) so the run produces a visible frame, but the pass/fail signal +// is the console line printed from the read-back pick result. +// +// WebGPU/DOM only — the rayQuery shim is the WebGPU software RT path. + +#ifndef CRAFTER_GRAPHICS_WINDOW_DOM +int main() { return 0; } // native path uses hardware ray queries +#else + +#include // std::fflush / stdout — flush the verdict past _Exit + +import Crafter.Graphics; +import Crafter.Math; +import Crafter.Event; +import std; + +using namespace Crafter; +namespace fs = std::filesystem; + +namespace { + constexpr int kGrid = 8; // 8³ = 512 instances (< 8193 ⇒ bug regime) + constexpr float kSpacing = 2.5f; + constexpr float kHalf = 0.5f; + + // Analytically-known target: a -X ray down the (iy=4, iz=4) row hits the + // cube with the largest X centre (ix=7) first. + constexpr int kHitX = 7, kHitY = 4, kHitZ = 4; + constexpr std::uint32_t kExpectedCustomIndex = + static_cast(((kHitX * kGrid) + kHitY) * kGrid + kHitZ); // 484 + + struct CameraGPU { + float origin[3]; float pad0; + float right[3]; float tanHalf; + float up[3]; float aspect; + float forward[3]; float pad1; + }; + static_assert(sizeof(CameraGPU) == 64); + + // @group(0) push for the pick shader: ray origin + direction. + struct PickPush { + float origin[3]; float pad0; + float dir[3]; float pad1; + }; + static_assert(sizeof(PickPush) == 32); + + struct PickResult { + std::uint32_t hit; + std::uint32_t instanceCustomIndex; + std::uint32_t primitiveIndex; + float tHit; + }; + static_assert(sizeof(PickResult) == 16); +} + +int main() { + const int instanceCount = kGrid * kGrid * kGrid; + std::println("[RayQueryPick] grid {}^3 = {} instances (expected hit customIndex {})", + kGrid, instanceCount, kExpectedCustomIndex); + + Device::Initialize(); + static Window window(1280, 720, "RayQueryPick"); + auto cmd = window.StartInit(); + + DescriptorHeapWebGPU heap; + heap.Initialize(/*images*/ 1, /*buffers*/ 2, /*samplers*/ 1); + + std::array shaders {{ + WebGPUShader(fs::path("raygen.wgsl"), "raygen_main", WebGPURTStage::Raygen), + WebGPUShader(fs::path("miss.wgsl"), "miss_main", WebGPURTStage::Miss), + WebGPUShader(fs::path("closesthit.wgsl"), "closesthit_main", WebGPURTStage::ClosestHit), + WebGPUShader(fs::path("resolve.wgsl"), "resolve_main", WebGPURTStage::Resolve), + }}; + ShaderBindingTableWebGPU sbt; + sbt.Init(shaders); + + std::array raygenGroups {{ { .type = RTShaderGroupType::General, .generalShader = 0 } }}; + std::array missGroups {{ { .type = RTShaderGroupType::General, .generalShader = 1 } }}; + std::array hitGroups {{ { .type = RTShaderGroupType::TrianglesHitGroup, .closestHitShader = 2 } }}; + + std::array bindings {{ + { .group = 3, .binding = 0, .kind = UICustomBindingKind::Buffer, ._pad = 0, .pushOffset = 0 }, + }}; + + PipelineRTWebGPU pipeline; + pipeline.Init(cmd, raygenGroups, missGroups, hitGroups, sbt, bindings); + + // ── Unit cube mesh (8 verts, 12 tris). ──────────────────────────── + static std::array, 8> verts {{ + {-kHalf, -kHalf, -kHalf}, { kHalf, -kHalf, -kHalf}, + { kHalf, kHalf, -kHalf}, {-kHalf, kHalf, -kHalf}, + {-kHalf, -kHalf, kHalf}, { kHalf, -kHalf, kHalf}, + { kHalf, kHalf, kHalf}, {-kHalf, kHalf, kHalf}, + }}; + static std::array indices {{ + 0,1,2, 0,2,3, 5,4,7, 5,7,6, 4,0,3, 4,3,7, + 1,5,6, 1,6,2, 4,5,1, 4,1,0, 3,2,6, 3,6,7, + }}; + static Mesh cube; + cube.Build(verts, indices, cmd); + + WebGPUBuffer cameraBuf; + cameraBuf.Create(1); + static std::array userHandles { cameraBuf.handle }; + + // ── Instance grid. ───────────────────────────────────────────────── + static std::vector renderers; + renderers.reserve(static_cast(instanceCount)); + const float origin0 = -0.5f * static_cast(kGrid - 1) * kSpacing; + for (int x = 0; x < kGrid; ++x) + for (int y = 0; y < kGrid; ++y) + for (int z = 0; z < kGrid; ++z) { + renderers.emplace_back(); + RenderingElement3D& r = renderers.back(); + auto& tx = r.instance.transform.matrix; + tx[0][0] = 1; tx[0][1] = 0; tx[0][2] = 0; tx[0][3] = origin0 + float(x) * kSpacing; + tx[1][0] = 0; tx[1][1] = 1; tx[1][2] = 0; tx[1][3] = origin0 + float(y) * kSpacing; + tx[2][0] = 0; tx[2][1] = 0; tx[2][2] = 1; tx[2][3] = origin0 + float(z) * kSpacing; + r.instance.instanceCustomIndex = static_cast(renderers.size() - 1); + r.instance.mask = 0xFF; + r.instance.instanceShaderBindingTableRecordOffset = 0; + r.instance.flags = kRTGeometryInstanceForceOpaque; + r.instance.accelerationStructureReference = cube.blasAddr; + RenderingElement3D::Add(&r); + } + RenderingElement3D::BuildTLAS(cmd, 0); + + window.descriptorHeap = &heap; + window.FinishInit(); + + RTPass rtPass(&pipeline); + rtPass.handlesPtr = userHandles.data(); + rtPass.handlesCount = static_cast(userHandles.size()); + rtPass.maxDepth = 1; + window.passes.push_back(&rtPass); + + // Fixed camera framing the grid from a corner, aimed at the centre. + { + const float ext = float(kGrid - 1) * kSpacing; + Vector pos { ext * 1.4f, ext * 1.0f, ext * 1.4f }; + Vector d { -pos.x, -pos.y, -pos.z }; + const float len = std::sqrt(d.x*d.x + d.y*d.y + d.z*d.z); + Vector forward { d.x/len, d.y/len, d.z/len }; + Vector worldUp { 0.0f, 1.0f, 0.0f }; + Vector right { forward.y*worldUp.z - forward.z*worldUp.y, + forward.z*worldUp.x - forward.x*worldUp.z, + forward.x*worldUp.y - forward.y*worldUp.x }; + const float rLen = std::sqrt(right.x*right.x + right.y*right.y + right.z*right.z); + right.x /= rLen; right.y /= rLen; right.z /= rLen; + Vector up { right.y*forward.z - right.z*forward.y, + right.z*forward.x - right.x*forward.z, + right.x*forward.y - right.y*forward.x }; + CameraGPU& g = cameraBuf.value[0]; + g.origin[0]=pos.x; g.origin[1]=pos.y; g.origin[2]=pos.z; g.pad0=0; + g.right[0]=right.x; g.right[1]=right.y; g.right[2]=right.z; + g.up[0]=up.x; g.up[1]=up.y; g.up[2]=up.z; + g.forward[0]=forward.x; g.forward[1]=forward.y; g.forward[2]=forward.z; + g.aspect = float(window.width) / float(window.height); + g.tanHalf = std::tan(70.0f * 3.14159265f / 360.0f); + g.pad1 = 0; + cameraBuf.FlushDevice(); + } + + // ── rayQuery pick shader + output buffer. ────────────────────────── + static PlainComputeShader pickShader; + std::array pickBindings {{ + { .group = 2, .binding = 0, .kind = UICustomBindingKind::BufferReadWrite, ._pad = 0, .pushOffset = 0 }, + }}; + pickShader.Load(fs::path("rayquery_pick.wgsl"), + static_cast(sizeof(PickPush)), + pickBindings, /*rayQuery*/ true); + + static WebGPUBuffer pickBuf; + pickBuf.Create(1); + static std::array pickHandles { pickBuf.handle }; + + // The known ray: -X down the (iy,iz) row, far enough out to clear the grid. + static PickPush push {}; + push.origin[0] = 50.0f; + push.origin[1] = origin0 + float(kHitY) * kSpacing; + push.origin[2] = origin0 + float(kHitZ) * kSpacing; + push.dir[0] = -1.0f; push.dir[1] = 0.0f; push.dir[2] = 0.0f; + + static int frame = 0; + static bool dispatched = false; + static bool reported = false; + EventListener tick(&window.onBeforeUpdate, [&]() { + if (reported) return; + // Let a couple of frames go by so the TLAS build has certainly run. + if (frame == 2 && !dispatched) { + pickShader.Dispatch(&push, sizeof(push), pickHandles, 1, 1, 1); + pickBuf.EnqueueReadback(); + dispatched = true; + } else if (dispatched && pickBuf.PollReadback()) { + const PickResult& r = pickBuf.value[0]; + const bool ok = (r.hit == 1u) && (r.instanceCustomIndex == kExpectedCustomIndex); + std::println("[RayQueryPick] result: hit={} customIndex={} prim={} t={}", + r.hit, r.instanceCustomIndex, r.primitiveIndex, r.tHit); + if (ok) { + std::println("[RayQueryPick] PASS — rayQuery TLAS traversal hit the expected instance"); + } else { + std::println("[RayQueryPick] FAIL — expected hit=1 customIndex={}, got hit={} customIndex={}", + kExpectedCustomIndex, r.hit, r.instanceCustomIndex); + } + // The render loop runs after main's _Exit, where stdio is never + // flushed implicitly — push the verdict out explicitly. + std::fflush(stdout); + reported = true; + } + ++frame; + }); + + window.Render(); + window.StartUpdate(); + window.StartSync(); + return 0; +} +#endif diff --git a/examples/RayQueryPick/miss.wgsl b/examples/RayQueryPick/miss.wgsl new file mode 100644 index 0000000..d23a0bc --- /dev/null +++ b/examples/RayQueryPick/miss.wgsl @@ -0,0 +1,11 @@ +// RTStress miss (runs in SHADE). Primary miss → sky gradient. Shadow miss +// → the sun is unoccluded, so add the pending direct contribution. +fn miss_main(ray: RayDesc, payload: ptr) { + if ((*payload).shadowRay == 1u) { + rtAccumulate((*payload).color); + return; + } + let t = clamp(ray.direction.y * 0.5 + 0.5, 0.0, 1.0); + rtAccumulate(mix(vec3(0.50, 0.62, 0.85), + vec3(0.90, 0.94, 1.00), t)); +} diff --git a/examples/RayQueryPick/project.cpp b/examples/RayQueryPick/project.cpp new file mode 100644 index 0000000..bedc9fd --- /dev/null +++ b/examples/RayQueryPick/project.cpp @@ -0,0 +1,47 @@ +import std; +import Crafter.Build; +namespace fs = std::filesystem; +using namespace Crafter; + +extern "C" Configuration CrafterBuildProject(std::span args) { + bool isWasm = false; + for (std::string_view a : args) { + if (a.starts_with("--target=") && a.find("wasm") != std::string_view::npos) { + isWasm = true; + break; + } + } + + std::vector graphicsArgs(args.begin(), args.end()); + Configuration* graphics = LocalProject({ + .projectFile = "../../project.cpp", + .args = graphicsArgs, + }); + + Configuration cfg; + cfg.path = "./"; + cfg.name = "RayQueryPick"; + cfg.outputName = "RayQueryPick"; + cfg.type = ConfigurationType::Executable; + if (isWasm) { + cfg.target = "wasm32-wasip1"; + cfg.defines.push_back({"CRAFTER_GRAPHICS_WINDOW_DOM", ""}); + cfg.compileFlags.push_back("-msimd128"); + } + ApplyStandardArgs(cfg, args); + cfg.dependencies = { graphics }; + + std::array ifaces = {}; + std::array impls = { "main" }; + cfg.GetInterfacesAndImplementations(ifaces, impls); + + if (isWasm) { + cfg.files.emplace_back(fs::path("raygen.wgsl")); + cfg.files.emplace_back(fs::path("closesthit.wgsl")); + cfg.files.emplace_back(fs::path("miss.wgsl")); + cfg.files.emplace_back(fs::path("resolve.wgsl")); + cfg.files.emplace_back(fs::path("rayquery_pick.wgsl")); + EnableWasiBrowserRuntime(cfg); + } + return cfg; +} diff --git a/examples/RayQueryPick/raygen.wgsl b/examples/RayQueryPick/raygen.wgsl new file mode 100644 index 0000000..def54fc --- /dev/null +++ b/examples/RayQueryPick/raygen.wgsl @@ -0,0 +1,35 @@ +// RTStress raygen (runs in GENERATE). Host-driven pinhole camera at +// @group(3) (groups 0..2 are reserved by the wavefront pipeline: +// 0 = WfParams, 1 = data heaps, 2 = indirect args). +struct Camera { + origin: vec3, + pad0: f32, + right: vec3, + tanHalf: f32, + up: vec3, + aspect: f32, + forward: vec3, + pad1: f32, +}; +@group(3) @binding(0) var camera : Camera; + +fn raygen_main(gid: vec3) { + if (gid.x >= wfParams.surfaceW || gid.y >= wfParams.surfaceH) { return; } + + let pixelf = vec2(f32(gid.x), f32(gid.y)); + let res = vec2(f32(wfParams.surfaceW), f32(wfParams.surfaceH)); + let uv = (pixelf + vec2(0.5)) / res; + let ndc = uv * 2.0 - vec2(1.0); + + let direction = normalize( + camera.right * (ndc.x * camera.aspect * camera.tanHalf) + + camera.up * (-ndc.y * camera.tanHalf) + + camera.forward); + + var p: Payload; + p.color = vec3(0.0); + p.shadowRay = 0u; + + rtEmitPrimaryRay(camera.origin, 0.01, direction, 100000.0, + 0u, 0xFFu, 0u, 0u, p); +} diff --git a/examples/RayQueryPick/rayquery_pick.wgsl b/examples/RayQueryPick/rayquery_pick.wgsl new file mode 100644 index 0000000..b58b4b9 --- /dev/null +++ b/examples/RayQueryPick/rayquery_pick.wgsl @@ -0,0 +1,48 @@ +// rayQuery picking smoke test (WebGPU/DOM software ray-query path). +// +// Shoots a single, fully-determined ray at a known TLAS instance through +// the injected `rayQuery*` shim and records the committed hit into a +// storage buffer the host reads back. The whole point is to exercise +// `_rqTraverseTlas`'s TLAS descent for a realistic (< 8193) instance +// count — the regime where a hardcoded 16384-leaf start makes every node +// look internal and the pick always misses (issue #25). +// +// rayQuery=true ⇒ @group(1) is the reserved RT heap (do NOT declare it). +// User bindings live at @group(2)+; the optional push uniform at @group(0). + +struct PushData { + origin: vec3, + _p0: f32, + dir: vec3, + _p1: f32, +}; +@group(0) @binding(0) var push : PushData; + +struct PickResult { + hit: u32, // 1 = committed triangle hit, 0 = miss + instanceCustomIndex: u32, + primitiveIndex: u32, + tHit: f32, +}; +@group(2) @binding(0) var result : PickResult; + +@compute @workgroup_size(1) +fn main() { + var rq: RayQuery; + rayQueryInitialize(&rq, 0u, RT_FLAG_OPAQUE, 0xFFu, + push.origin, 0.001, push.dir, 10000.0); + // Software shim resolves the whole traversal in one proceed call. + while (rayQueryProceed(&rq)) {} + + if (rayQueryGetCommittedIntersectionType(&rq) != RT_INTERSECTION_NONE) { + result.hit = 1u; + result.instanceCustomIndex = rayQueryGetCommittedInstanceCustomIndex(&rq); + result.primitiveIndex = rayQueryGetCommittedPrimitiveIndex(&rq); + result.tHit = rayQueryGetCommittedT(&rq); + } else { + result.hit = 0u; + result.instanceCustomIndex = 0xFFFFFFFFu; + result.primitiveIndex = 0xFFFFFFFFu; + result.tHit = -1.0; + } +} diff --git a/examples/RayQueryPick/resolve.wgsl b/examples/RayQueryPick/resolve.wgsl new file mode 100644 index 0000000..7950c96 --- /dev/null +++ b/examples/RayQueryPick/resolve.wgsl @@ -0,0 +1,7 @@ +// RTStress RESOLVE-stage tonemap: Reinhard + gamma 2.2 over the linear +// accumulator. Registered as a WebGPURTStage::Resolve shader. +fn resolve_main(coord: vec2, hdr: vec4) -> vec4 { + let mapped = hdr.rgb / (hdr.rgb + vec3(1.0)); + let g = pow(mapped, vec3(1.0 / 2.2)); + return vec4(g, 1.0); +}