Crafter.Graphics/tests/PushConstantRewrite/main.cpp

/*
Crafter®.Graphics
Copyright (C) 2026 Catcrafts®
catcrafts.net

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 3.0 as published by the Free Software Foundation;

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
*/

// Regression test for issue #18: the NVIDIA descriptor-heap AS-read workaround
// (WorkaroundNvidiaAS::Patch) used to bolt a brand-new push-constant block onto
// every patched ray-tracing shader. SPIR-V allows at most one push-constant
// block statically used per entry point, so any shader that already declared
// one ended up with two and failed spirv-val:
//
//   Entry point id '4' uses more than one PushConstant interface.
//
// This test compiles representative ray-generation shaders with glslang, runs
// them through the real Patch(), and asserts with spirv-val that the result is
// valid and contains exactly one push-constant variable — both for shaders
// that already have a push constant (merge path) and for those that don't
// (synthesize path). It also checks the returned TLAS push-constant offset.
//
// It additionally covers ray-querying *compute* shaders (issue #21): the
// rewrite is stage-agnostic, and ComputeShader::Dispatch now pushes the TLAS
// address at the per-shader offset Patch returns, so a compute shader that
// reads an acceleration structure through the descriptor heap must be patched
// and report a correct offset exactly like a raygen does.
//
// Delete this test together with the rest of the workaround once a fixed NVIDIA
// driver ships.

#include "vulkan/vulkan.h"
#include <cstdlib>

import Crafter.Graphics;
import std;
using namespace Crafter;

namespace {

namespace fs = std::filesystem;

int RunCommand(const std::string& cmd) {
    int status = std::system(cmd.c_str());
    if (status == -1) return -1;
    // Mirror WEXITSTATUS without pulling in <sys/wait.h>: glibc encodes the
    // exit code in bits 8..15 of the wait status when the low byte is zero.
    return (status & 0x7f) == 0 ? ((status >> 8) & 0xff) : 128 + (status & 0x7f);
}

std::vector<std::uint32_t> ReadSpirv(const fs::path& p) {
    std::ifstream f(p, std::ios::binary | std::ios::ate);
    if (!f) return {};
    std::streamsize size = f.tellg();
    f.seekg(0);
    std::vector<std::uint32_t> words(static_cast<std::size_t>(size) / sizeof(std::uint32_t));
    f.read(reinterpret_cast<char*>(words.data()), size);
    return words;
}

void WriteSpirv(const fs::path& p, const std::vector<std::uint32_t>& words) {
    std::ofstream f(p, std::ios::binary);
    f.write(reinterpret_cast<const char*>(words.data()),
            static_cast<std::streamsize>(words.size() * sizeof(std::uint32_t)));
}

// Count OpVariable instructions in the PushConstant storage class (SC == 9).
int CountPushConstantVariables(const std::vector<std::uint32_t>& words) {
    constexpr std::uint32_t OpVariable = 59;
    constexpr std::uint32_t StorageClassPushConstant = 9;
    int count = 0;
    for (std::size_t i = 5; i < words.size();) {
        std::uint32_t len = words[i] >> 16;
        if (len == 0 || i + len > words.size()) break;
        if ((words[i] & 0xFFFFu) == OpVariable && len >= 4 && words[i + 3] == StorageClassPushConstant)
            ++count;
        i += len;
    }
    return count;
}

struct Case {
    std::string_view name;
    std::string_view glsl;
    bool readsAccelStruct;     // whether Patch should rewrite anything
    bool hasExistingPushConst; // whether the source already declares a push block
    std::uint32_t expectedOffset; // expected PatchResult::tlasPushOffset (only checked when readsAccelStruct)
};

// Shared raygen scaffolding: a heap AS + heap image, traced and stored to.
constexpr std::string_view kHeader =
    "#version 460\n"
    "#extension GL_EXT_ray_tracing : enable\n"
    "#extension GL_EXT_shader_image_load_formatted : enable\n"
    "#extension GL_EXT_descriptor_heap : enable\n"
    "#extension GL_EXT_nonuniform_qualifier : enable\n"
    "layout(descriptor_heap) uniform accelerationStructureEXT topLevelAS[];\n"
    "layout(descriptor_heap) uniform writeonly image2D image[];\n"
    "layout(location = 0) rayPayloadEXT vec3 hitValue;\n";

const std::array<Case, 5> kCases = {{
    // No push constant at all → Patch synthesizes a fresh single-member block at offset 0.
    { "no-push-constant", std::string_view{
        ""
      }, true, false, 0 },

    // Existing block {mat4 @0, vec3 @64, uint @76}; ends at 80, already 8-aligned.
    { "merge-mat4-vec3-uint", std::string_view{
        "layout(push_constant) uniform PC { mat4 m; vec3 l; uint f; } pc;\n"
      }, true, true, 80 },

    // Existing block {uint @0}; ends at 4, TLAS rounds up to the next 8.
    { "merge-uint", std::string_view{
        "layout(push_constant) uniform PC { uint f; } pc;\n"
      }, true, true, 8 },

    // Existing block {vec4 v[2] @0 (32 bytes), uint @32}; ends at 36, rounds to 40.
    { "merge-array", std::string_view{
        "layout(push_constant) uniform PC { vec4 v[2]; uint f; } pc;\n"
      }, true, true, 40 },

    // Push constant but NO acceleration-structure read → Patch is a no-op; the
    // single user block must survive untouched and still validate.
    { "push-constant-no-as", std::string_view{
        "layout(push_constant) uniform PC { vec4 tint; } pc;\n"
      }, false, true, 0 },
}};

std::string BuildSource(const Case& c) {
    std::string s(kHeader);
    s += c.glsl;
    s += "void main() {\n";
    s += "  uvec2 pixel = gl_LaunchIDEXT.xy;\n";
    s += "  vec3 origin = vec3(0.0, 0.0, -300.0);\n";
    s += "  vec3 dir = normalize(vec3(0.0, 0.0, 1.0));\n";
    if (c.readsAccelStruct)
        s += "  traceRayEXT(topLevelAS[0], gl_RayFlagsNoneEXT, 0xff, 0,0,0, origin, 0.001, dir, 10000.0, 0);\n";
    // Reference the push constant so glslang keeps the block in the module.
    std::string_view g = c.glsl;
    std::string extra = "vec4(hitValue, 1.0)";
    if (g.find("mat4 m;") != std::string_view::npos)
        extra = "pc.m * vec4(hitValue, 1.0) + vec4(pc.l, float(pc.f))";
    else if (g.find("uint f; } pc;") != std::string_view::npos && g.find("vec4 v[2]") != std::string_view::npos)
        extra = "vec4(hitValue, 1.0) + pc.v[0] + pc.v[1] + vec4(float(pc.f))";
    else if (g.find("uint f; } pc;") != std::string_view::npos)
        extra = "vec4(hitValue, float(pc.f))";
    else if (g.find("vec4 tint;") != std::string_view::npos)
        extra = "vec4(hitValue, 1.0) + pc.tint";
    s += "  imageStore(image[0], ivec2(pixel), " + extra + ");\n";
    s += "}\n";
    return s;
}

// Compute counterpart of the raygen cases (issue #21): a shader that ray-queries
// the heap TLAS via rayQueryEXT. Shares the offset math with the raygen merge
// path, so we only need one merge case and one synthesize case to prove compute
// stages are handled identically.
struct ComputeCase {
    std::string_view name;
    std::string_view glsl;        // optional push-constant declaration
    bool hasExistingPushConst;
    std::uint32_t expectedOffset; // expected PatchResult::tlasPushOffset
};

const std::array<ComputeCase, 2> kComputeCases = {{
    // No push constant → fresh single-member block synthesized at offset 0.
    { "compute-no-push", std::string_view{""}, false, 0 },
    // Existing block {uint f; @0}; ends at 4, TLAS rounds up to the next 8.
    { "compute-merge-uint",
      std::string_view{"layout(push_constant) uniform PC { uint f; } pc;\n"}, true, 8 },
}};

std::string BuildComputeSource(const ComputeCase& c) {
    std::string s =
        "#version 460\n"
        "#extension GL_EXT_ray_query : enable\n"
        "#extension GL_EXT_shader_image_load_formatted : enable\n"
        "#extension GL_EXT_descriptor_heap : enable\n"
        "#extension GL_EXT_nonuniform_qualifier : enable\n"
        "layout(descriptor_heap) uniform accelerationStructureEXT topLevelAS[];\n"
        "layout(descriptor_heap) uniform writeonly image2D image[];\n";
    s += c.glsl;
    s += "layout(local_size_x = 64) in;\n";
    s += "void main() {\n";
    s += "  vec3 origin = vec3(0.0);\n";
    s += "  vec3 dir = vec3(0.0, 0.0, 1.0);\n";
    s += "  rayQueryEXT rq;\n";
    s += "  rayQueryInitializeEXT(rq, topLevelAS[0], gl_RayFlagsNoneEXT, 0xFF, origin, 0.001, dir, 10000.0);\n";
    s += "  while (rayQueryProceedEXT(rq)) {}\n";
    float pushRef = 0; (void)pushRef;
    std::string val = c.hasExistingPushConst ? "float(pc.f)" : "1.0";
    s += "  imageStore(image[0], ivec2(gl_GlobalInvocationID.xy), vec4(" + val + "));\n";
    s += "}\n";
    return s;
}

// Compile `source` for `stage`, run Patch(), and assert: spirv-val accepts it,
// exactly one push-constant variable survives, and Patch reports patched/offset
// matching expectations. Returns true on success.
bool RunCase(const fs::path& dir, std::string_view name, std::string_view stage,
             const std::string& source, bool readsAccelStruct,
             std::uint32_t expectedOffset) {
    const fs::path glslPath = dir / (std::string(name) + "." + std::string(stage) + ".glsl");
    const fs::path spvPath  = dir / (std::string(name) + ".spv");
    const fs::path patched  = dir / (std::string(name) + ".patched.spv");

    { std::ofstream f(glslPath); f << source; }

    std::string compile = "glslang --target-env vulkan1.4 -V -S " + std::string(stage)
        + " \"" + glslPath.string() + "\" -o \"" + spvPath.string() + "\" > /dev/null";
    if (RunCommand(compile) != 0) {
        std::println(std::cerr, "[{}] glslang failed to compile the source shader", name);
        return false;
    }

    std::vector<std::uint32_t> words = ReadSpirv(spvPath);
    if (words.size() < 5) {
        std::println(std::cerr, "[{}] could not read compiled SPIR-V", name);
        return false;
    }

    WorkaroundNvidiaAS::PatchResult patch = WorkaroundNvidiaAS::Patch(words);
    WriteSpirv(patched, words);

    // 1. The patched module must pass spirv-val under the engine's flags.
    std::string validate = "spirv-val \"" + patched.string()
        + "\" --relax-block-layout --scalar-block-layout --target-env vulkan1.4";
    if (RunCommand(validate) != 0) {
        std::println(std::cerr, "[{}] spirv-val rejected the patched module", name);
        return false;
    }

    // 2. Exactly one push-constant variable — the whole point of issue #18.
    int pcVars = CountPushConstantVariables(words);
    if (pcVars != 1) {
        std::println(std::cerr, "[{}] expected exactly 1 push-constant variable, found {}", name, pcVars);
        return false;
    }

    // 3. Patch must report it rewrote the shader exactly when it reads an AS.
    if (patch.patched != readsAccelStruct) {
        std::println(std::cerr, "[{}] expected patched={}, got {}", name, readsAccelStruct, patch.patched);
        return false;
    }

    // 4. The returned TLAS offset must match the expected layout end.
    if (readsAccelStruct && patch.tlasPushOffset != expectedOffset) {
        std::println(std::cerr, "[{}] expected TLAS push offset {}, got {}",
                     name, expectedOffset, patch.tlasPushOffset);
        return false;
    }

    std::println(std::cout, "[{}] ok (push-constant vars: {}, tlas offset: {})",
                 name, pcVars, readsAccelStruct ? patch.tlasPushOffset : 0u);
    return true;
}

} // namespace

int main() {
    const fs::path dir = fs::temp_directory_path() / "crafter-pcrewrite-test";
    std::error_code ec;
    fs::create_directories(dir, ec);

    int failures = 0;
    for (const Case& c : kCases) {
        if (!RunCase(dir, c.name, "rgen", BuildSource(c), c.readsAccelStruct, c.expectedOffset))
            ++failures;
    }
    // Ray-querying compute shaders (issue #21) — must be patched and report a
    // correct per-shader offset just like the raygen cases above.
    for (const ComputeCase& c : kComputeCases) {
        if (!RunCase(dir, c.name, "comp", BuildComputeSource(c), /*readsAccelStruct=*/true, c.expectedOffset))
            ++failures;
    }

    if (failures != 0) {
        std::println(std::cerr, "{} case(s) failed", failures);
        return 1;
    }
    std::println(std::cout, "all push-constant rewrite cases passed");
    return 0;
}