Concurrent crafter-build invocations corrupt the shared module cache (malformed or corrupted precompiled file) #15

Merged
jorijnvdgraaf merged 1 commit from claude/issue-14 into master 2026-05-30 18:44:54 +02:00
3 changed files with 237 additions and 12 deletions
Showing only changes of commit 96d1df9233 - Show all commits

fix: atomic-rename host-cache PCMs to close concurrent-build race
All checks were successful
CI / build-test-release (pull_request) Successful in 11m50s

Two crafter-build invocations sharing XDG_CACHE_HOME used to clobber each
other's writes to <cache>/<target>-<march>/std.pcm and the
Crafter.Build-*.pcm modules: each LoadProject path wrote directly to the
final path, so a reader could see a half-written file and die with
"malformed or corrupted precompiled file: 'can't skip to bit X from Y'"
(issue #14). Every BuildStdPcm / EnsureCrafterBuildPcms write now goes via
<final>.tmp.<pid>.<seq> and atomic-renames into place; concurrent writers
always see either the old or the new file, never torn bytes. The mingw-on-
Linux std.cppm copy is per-PID for the same reason. Adds a regression test
(ConcurrentCacheRace) that races four LoadProject() calls against a cold
scratch cache — reproduces the race 5/5 without the fix and passes 5/5
with it.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
catbot 2026-05-30 16:36:45 +00:00

View file

@ -36,6 +36,42 @@ import :Progress;
namespace fs = std::filesystem; namespace fs = std::filesystem;
using namespace Crafter; using namespace Crafter;
namespace {
// Used to make per-process temp filenames so concurrent crafter-build
// invocations sharing the host cache dir don't collide on the same
// partially-written .pcm. Cross-process uniqueness comes from the PID;
// intra-process uniqueness from the counter (multiple host PCMs precompile
// back-to-back, and tests can call BuildStdPcm from several threads).
unsigned long CurrentProcessId() {
#ifdef CRAFTER_BUILD_CONFIGURATION_TARGET_x86_64_pc_linux_gnu
return static_cast<unsigned long>(getpid());
#else
return static_cast<unsigned long>(GetCurrentProcessId());
#endif
}
fs::path MakeTempPcmPath(const fs::path& finalPath) {
static std::atomic<unsigned> seq{0};
return fs::path(finalPath.string() + std::format(".tmp.{}.{}",
CurrentProcessId(), seq.fetch_add(1, std::memory_order_relaxed)));
}
// Atomically swap `tmpPath` into `finalPath`. If a parallel crafter-build
// committed its own equivalent build first (identical .cppm + flags →
// identical .pcm), `fs::rename` may fail on some filesystems; accept the
// outcome as long as the destination now exists.
std::string CommitPcm(const fs::path& tmpPath, const fs::path& finalPath) {
std::error_code ec;
fs::rename(tmpPath, finalPath, ec);
if (!ec) return "";
std::error_code rmEc;
fs::remove(tmpPath, rmEc);
if (fs::exists(finalPath)) return "";
return std::format("rename {} -> {}: {}",
tmpPath.string(), finalPath.string(), ec.message());
}
}
fs::path Crafter::GetCrafterBuildHome() { fs::path Crafter::GetCrafterBuildHome() {
if (const char* envHome = std::getenv("CRAFTER_BUILD_HOME")) { if (const char* envHome = std::getenv("CRAFTER_BUILD_HOME")) {
return fs::path(envHome); return fs::path(envHome);
@ -250,7 +286,14 @@ std::string Crafter::BuildStdPcm(const Configuration& config, fs::path stdPcm) {
std::string stdcppm = std::format("{}\\modules\\c++\\v1\\std.cppm", libcxx); std::string stdcppm = std::format("{}\\modules\\c++\\v1\\std.cppm", libcxx);
if(!fs::exists(stdPcm) || fs::last_write_time(stdPcm) < fs::last_write_time(stdcppm)) { if(!fs::exists(stdPcm) || fs::last_write_time(stdPcm) < fs::last_write_time(stdcppm)) {
return RunCommand(std::format("clang++ --target={} -march={} -mtune={} -isystem %LIBCXX_DIR%\\include\\c++\\v1 -nostdinc++ -nostdlib++ -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier --precompile %LIBCXX_DIR%\\modules\\c++\\v1\\std.cppm -o {}", config.target, config.march, config.mtune, stdPcm.string())); fs::path tmpPcm = MakeTempPcmPath(stdPcm);
std::string out = RunCommand(std::format("clang++ --target={} -march={} -mtune={} -isystem %LIBCXX_DIR%\\include\\c++\\v1 -nostdinc++ -nostdlib++ -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier --precompile %LIBCXX_DIR%\\modules\\c++\\v1\\std.cppm -o {}", config.target, config.march, config.mtune, tmpPcm.string()));
if (!out.empty()) {
std::error_code ec;
fs::remove(tmpPcm, ec);
return out;
}
return CommitPcm(tmpPcm, stdPcm);
} }
return ""; return "";
} }
@ -270,6 +313,7 @@ namespace {
if (fs::exists(pcmPath) && fs::last_write_time(cppmPath) < fs::last_write_time(pcmPath)) { if (fs::exists(pcmPath) && fs::last_write_time(cppmPath) < fs::last_write_time(pcmPath)) {
continue; continue;
} }
fs::path tmpPcm = MakeTempPcmPath(pcmPath);
std::string cmd = std::format( std::string cmd = std::format(
"clang++ --target=x86_64-pc-windows-msvc -march=native -mtune=native " "clang++ --target=x86_64-pc-windows-msvc -march=native -mtune=native "
"-std=c++26 -O3 -D CRAFTER_BUILD_DLL_IMPORT " "-std=c++26 -O3 -D CRAFTER_BUILD_DLL_IMPORT "
@ -277,11 +321,16 @@ namespace {
"-Wno-reserved-identifier -Wno-reserved-module-identifier " "-Wno-reserved-identifier -Wno-reserved-module-identifier "
"-fprebuilt-module-path={} " "-fprebuilt-module-path={} "
"--precompile {} -o {}", "--precompile {} -o {}",
cacheDir.string(), cppmPath.string(), pcmPath.string()); cacheDir.string(), cppmPath.string(), tmpPcm.string());
CommandResult r = Crafter::RunCommandChecked(cmd); CommandResult r = Crafter::RunCommandChecked(cmd);
if (r.exitCode != 0) { if (r.exitCode != 0) {
std::error_code ec;
fs::remove(tmpPcm, ec);
throw std::runtime_error(std::format("Failed to precompile {} (exit {}): {}", name, r.exitCode, r.output)); throw std::runtime_error(std::format("Failed to precompile {} (exit {}): {}", name, r.exitCode, r.output));
} }
if (std::string err = CommitPcm(tmpPcm, pcmPath); !err.empty()) {
throw std::runtime_error(std::format("Failed to commit {} pcm: {}", name, err));
}
} }
} }
} }
@ -412,11 +461,18 @@ std::string Crafter::BuildStdPcm(const Configuration& config, fs::path stdPcm) {
if (fs::exists(stdPcm) && fs::last_write_time(stdPcm) >= fs::last_write_time(stdcppm)) { if (fs::exists(stdPcm) && fs::last_write_time(stdPcm) >= fs::last_write_time(stdcppm)) {
return ""; return "";
} }
return RunCommand(std::format( fs::path tmpPcm = MakeTempPcmPath(stdPcm);
std::string out = RunCommand(std::format(
"clang++ --target={} -march={} -mtune={} -isystem %LIBCXX_DIR%\\include\\c++\\v1 " "clang++ --target={} -march={} -mtune={} -isystem %LIBCXX_DIR%\\include\\c++\\v1 "
"-nostdinc++ -nostdlib++ -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier " "-nostdinc++ -nostdlib++ -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier "
"--precompile %LIBCXX_DIR%\\modules\\c++\\v1\\std.cppm -o {}", "--precompile %LIBCXX_DIR%\\modules\\c++\\v1\\std.cppm -o {}",
config.target, config.march, config.mtune, stdPcm.string())); config.target, config.march, config.mtune, tmpPcm.string()));
if (!out.empty()) {
std::error_code ec;
fs::remove(tmpPcm, ec);
return out;
}
return CommitPcm(tmpPcm, stdPcm);
} }
// Default: mingw target. Look up mingw-w64 libstdc++ via the msys2 prefix. // Default: mingw target. Look up mingw-w64 libstdc++ via the msys2 prefix.
@ -430,14 +486,16 @@ std::string Crafter::BuildStdPcm(const Configuration& config, fs::path stdPcm) {
} }
// Copy std.cc → std.cppm in C++ rather than via cmd's `copy /Y` because // Copy std.cc → std.cppm in C++ rather than via cmd's `copy /Y` because
// `copy` always prints "1 file(s) copied." to stdout and RunCommand // `copy` always prints "1 file(s) copied." to stdout and RunCommand
// treats any output as an error. // treats any output as an error. Per-PID filename so concurrent
fs::path stdCppm = stdPcm.parent_path() / "std.cppm"; // crafter-build invocations don't see each other's half-written copies.
fs::path stdCppm = stdPcm.parent_path() / std::format("std.cppm.{}", CurrentProcessId());
std::error_code ec; std::error_code ec;
fs::copy_file(stdCc, stdCppm, fs::copy_options::overwrite_existing, ec); fs::copy_file(stdCc, stdCppm, fs::copy_options::overwrite_existing, ec);
if (ec) { if (ec) {
return std::format("copy {} -> {}: {}", stdCc.string(), stdCppm.string(), ec.message()); return std::format("copy {} -> {}: {}", stdCc.string(), stdCppm.string(), ec.message());
} }
return RunCommand(std::format( fs::path tmpPcm = MakeTempPcmPath(stdPcm);
std::string out = RunCommand(std::format(
"clang++ --target={} -march={} -mtune={} " "clang++ --target={} -march={} -mtune={} "
"--sysroot=\"{}\" -femulated-tls " "--sysroot=\"{}\" -femulated-tls "
"-O3 -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier " "-O3 -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier "
@ -445,7 +503,14 @@ std::string Crafter::BuildStdPcm(const Configuration& config, fs::path stdPcm) {
config.target, config.march, config.mtune, config.target, config.march, config.mtune,
prefix.string(), prefix.string(),
stdCppm.string(), stdCppm.string(),
stdPcm.string())); tmpPcm.string()));
std::error_code rmEc;
fs::remove(stdCppm, rmEc);
if (!out.empty()) {
fs::remove(tmpPcm, rmEc);
return out;
}
return CommitPcm(tmpPcm, stdPcm);
} }
std::string Crafter::GetBaseCommand(const Configuration& config) { std::string Crafter::GetBaseCommand(const Configuration& config) {
@ -469,6 +534,7 @@ namespace {
if (fs::exists(pcmPath) && fs::last_write_time(cppmPath) < fs::last_write_time(pcmPath)) { if (fs::exists(pcmPath) && fs::last_write_time(cppmPath) < fs::last_write_time(pcmPath)) {
continue; continue;
} }
fs::path tmpPcm = MakeTempPcmPath(pcmPath);
std::string cmd = std::format( std::string cmd = std::format(
"clang++ --target=x86_64-w64-mingw32 -march=native -mtune=native " "clang++ --target=x86_64-w64-mingw32 -march=native -mtune=native "
"--sysroot=\"{}\" -femulated-tls " "--sysroot=\"{}\" -femulated-tls "
@ -476,12 +542,18 @@ namespace {
"-Wno-reserved-identifier -Wno-reserved-module-identifier " "-Wno-reserved-identifier -Wno-reserved-module-identifier "
"-fprebuilt-module-path={} " "-fprebuilt-module-path={} "
"--precompile {} -o {}", "--precompile {} -o {}",
prefix.string(), cacheDir.string(), cppmPath.string(), pcmPath.string()); prefix.string(), cacheDir.string(), cppmPath.string(), tmpPcm.string());
CommandResult r = Crafter::RunCommandChecked(cmd); CommandResult r = Crafter::RunCommandChecked(cmd);
if (r.exitCode != 0) { if (r.exitCode != 0) {
std::error_code ec;
fs::remove(tmpPcm, ec);
throw std::runtime_error(std::format( throw std::runtime_error(std::format(
"Failed to precompile {} (exit {}): {}", name, r.exitCode, r.output)); "Failed to precompile {} (exit {}): {}", name, r.exitCode, r.output));
} }
if (std::string err = CommitPcm(tmpPcm, pcmPath); !err.empty()) {
throw std::runtime_error(std::format(
"Failed to commit {} pcm: {}", name, err));
}
} }
} }
} }
@ -680,7 +752,24 @@ std::string Crafter::BuildStdPcm(const Configuration& config, fs::path stdPcm) {
if(!fs::exists(stdPcm) || fs::last_write_time(stdPcm) < fs::last_write_time(stdCc)) { if(!fs::exists(stdPcm) || fs::last_write_time(stdPcm) < fs::last_write_time(stdCc)) {
// -femulated-tls keeps PCM TLS access matching libstdc++'s emutls // -femulated-tls keeps PCM TLS access matching libstdc++'s emutls
// definitions; mismatch surfaces as undefined std::__once_callable. // definitions; mismatch surfaces as undefined std::__once_callable.
return RunCommand(std::format("cp {} {}/std.cppm\nclang++ --target={} -march={} -mtune={} -femulated-tls -O3 -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier --precompile {}/std.cppm -o {}", stdCc.string(), stdPcm.parent_path().string(), config.target, config.march, config.mtune, stdPcm.parent_path().string(), stdPcm.string())); // Per-PID std.cppm and a tmp .pcm + atomic rename so concurrent
// crafter-build invocations sharing the cache dir don't read each
// other's half-written files.
fs::path stdCppm = stdPcm.parent_path() / std::format("std.cppm.{}", CurrentProcessId());
std::error_code copyEc;
fs::copy_file(stdCc, stdCppm, fs::copy_options::overwrite_existing, copyEc);
if (copyEc) {
return std::format("copy {} -> {}: {}", stdCc.string(), stdCppm.string(), copyEc.message());
}
fs::path tmpPcm = MakeTempPcmPath(stdPcm);
std::string out = RunCommand(std::format("clang++ --target={} -march={} -mtune={} -femulated-tls -O3 -std=c++26 -Wno-reserved-identifier -Wno-reserved-module-identifier --precompile {} -o {}", config.target, config.march, config.mtune, stdCppm.string(), tmpPcm.string()));
std::error_code rmEc;
fs::remove(stdCppm, rmEc);
if (!out.empty()) {
fs::remove(tmpPcm, rmEc);
return out;
}
return CommitPcm(tmpPcm, stdPcm);
} else { } else {
return ""; return "";
} }
@ -708,7 +797,14 @@ std::string Crafter::BuildStdPcm(const Configuration& config, fs::path stdPcm) {
? std::string(" -fno-exceptions -msimd128 -fno-c++-static-destructors -mllvm -wasm-enable-sjlj -D_WASI_EMULATED_SIGNAL") ? std::string(" -fno-exceptions -msimd128 -fno-c++-static-destructors -mllvm -wasm-enable-sjlj -D_WASI_EMULATED_SIGNAL")
: std::format(" -march={} -mtune={}", config.march, config.mtune); : std::format(" -march={} -mtune={}", config.march, config.mtune);
if(!fs::exists(stdPcm) || fs::last_write_time(stdPcm) < fs::last_write_time(stdCppm)) { if(!fs::exists(stdPcm) || fs::last_write_time(stdPcm) < fs::last_write_time(stdCppm)) {
return RunCommand(std::format("clang++ --target={} -std=c++26 -stdlib=libc++{}{} -O3 -Wno-reserved-identifier -Wno-reserved-module-identifier --precompile {} -o {}", config.target, sysrootFlag, archFlags, stdCppm, stdPcm.string())); fs::path tmpPcm = MakeTempPcmPath(stdPcm);
std::string out = RunCommand(std::format("clang++ --target={} -std=c++26 -stdlib=libc++{}{} -O3 -Wno-reserved-identifier -Wno-reserved-module-identifier --precompile {} -o {}", config.target, sysrootFlag, archFlags, stdCppm, tmpPcm.string()));
if (!out.empty()) {
std::error_code ec;
fs::remove(tmpPcm, ec);
return out;
}
return CommitPcm(tmpPcm, stdPcm);
} else { } else {
return ""; return "";
} }
@ -761,17 +857,23 @@ namespace {
if (fs::exists(pcmPath) && fs::last_write_time(cppmPath) < fs::last_write_time(pcmPath)) { if (fs::exists(pcmPath) && fs::last_write_time(cppmPath) < fs::last_write_time(pcmPath)) {
continue; continue;
} }
fs::path tmpPcm = MakeTempPcmPath(pcmPath);
std::string cmd = std::format( std::string cmd = std::format(
"clang++ --target=x86_64-pc-linux-gnu -march=native -mtune=native " "clang++ --target=x86_64-pc-linux-gnu -march=native -mtune=native "
"-std=c++26 -stdlib=libc++ -O3 " "-std=c++26 -stdlib=libc++ -O3 "
"-Wno-reserved-identifier -Wno-reserved-module-identifier " "-Wno-reserved-identifier -Wno-reserved-module-identifier "
"-fprebuilt-module-path={} " "-fprebuilt-module-path={} "
"--precompile {} -o {}", "--precompile {} -o {}",
cacheDir.string(), cppmPath.string(), pcmPath.string()); cacheDir.string(), cppmPath.string(), tmpPcm.string());
CommandResult r = Crafter::RunCommandChecked(cmd); CommandResult r = Crafter::RunCommandChecked(cmd);
if (r.exitCode != 0) { if (r.exitCode != 0) {
std::error_code ec;
fs::remove(tmpPcm, ec);
throw std::runtime_error(std::format("Failed to precompile {} (exit {}): {}", name, r.exitCode, r.output)); throw std::runtime_error(std::format("Failed to precompile {} (exit {}): {}", name, r.exitCode, r.output));
} }
if (std::string err = CommitPcm(tmpPcm, pcmPath); !err.empty()) {
throw std::runtime_error(std::format("Failed to commit {} pcm: {}", name, err));
}
} }
} }
} }

View file

@ -107,6 +107,12 @@ extern "C" Configuration CrafterBuildProject(std::span<const std::string_view> a
cfg.AddTest("VariantId").Dependencies({ crafterBuildLib.get() }); cfg.AddTest("VariantId").Dependencies({ crafterBuildLib.get() });
cfg.AddTest("WasiBrowserRuntime").Dependencies({ crafterBuildLib.get() }); cfg.AddTest("WasiBrowserRuntime").Dependencies({ crafterBuildLib.get() });
cfg.AddTest("RunSingleTestExit").Dependencies({ crafterBuildLib.get() }); cfg.AddTest("RunSingleTestExit").Dependencies({ crafterBuildLib.get() });
// LoadProject dlopens the synthesized project.so, which references
// Crafter:: symbols (HostTarget, Configuration ctors) that have to be
// visible from the test exe — same wiring crafter-build itself uses
// for project.so.
cfg.AddTest("ConcurrentCacheRace").Dependencies({ crafterBuildLib.get() })
.LinkFlag("-Wl,--export-dynamic").LinkFlag("-ldl");
} }
return cfg; return cfg;

View file

@ -0,0 +1,117 @@
#include <stdlib.h>
import std;
import Crafter.Build;
namespace fs = std::filesystem;
using namespace Crafter;
// Regression for the host-cache write race: when several crafter-build
// invocations shared the same XDG_CACHE_HOME, two LoadProject() calls would
// clobber each other's writes to `<cache>/<target>-<march>/std.pcm` and the
// `Crafter.Build-*.pcm` host modules. The loser's clang would then read a
// torn file via -fprebuilt-module-path and die with "malformed or corrupted
// precompiled file: 'can't skip to bit X from Y'". The fix routes every PCM
// write through a per-process temp + atomic rename so a reader always sees
// either the old or the new file, never a half-written one.
//
// LoadProject() is the right surface to test because it runs the full host
// bootstrap (BuildStdPcm + EnsureCrafterBuildPcms × the Crafter.Build-*
// modules), giving every concurrent thread enough writing-to-the-same-paths
// time for the race to surface. A pure BuildStdPcm test wouldn't — the std
// PCM write is too short to overlap reliably between threads.
namespace {
fs::path FindCrafterBuildHome() {
if (const char* env = std::getenv("CRAFTER_BUILD_HOME"); env && *env) {
return env;
}
for (const char* candidate : {
"/usr/local/share/crafter-build",
"/usr/share/crafter-build",
}) {
if (fs::exists(fs::path(candidate) / "Crafter.Build.cppm")) {
return candidate;
}
}
return {};
}
void WriteFile(const fs::path& p, std::string_view contents) {
std::ofstream out(p);
out << contents;
}
}
int main() {
fs::path home = FindCrafterBuildHome();
if (home.empty()) {
std::println(std::cerr,
"SKIP: no installed share/crafter-build found and CRAFTER_BUILD_HOME unset");
return 77;
}
setenv("CRAFTER_BUILD_HOME", home.string().c_str(), 1);
fs::path scratch = fs::temp_directory_path() / "crafter-build-concurrent-cache-race";
std::error_code ec;
fs::remove_all(scratch, ec);
fs::create_directories(scratch);
// Cold scratch cache. setenv before any thread starts so every thread's
// GetCacheDir() call sees the same override.
fs::path cacheDir = scratch / "cache";
fs::create_directories(cacheDir);
setenv("XDG_CACHE_HOME", cacheDir.string().c_str(), 1);
constexpr int N = 4;
std::vector<fs::path> projects;
for (int i = 0; i < N; ++i) {
fs::path dir = scratch / std::format("p{}", i);
fs::create_directories(dir);
WriteFile(dir / "project.cpp", std::format(
"import std;\n"
"import Crafter.Build;\n"
"namespace fs = std::filesystem;\n"
"using namespace Crafter;\n"
"extern \"C\" Configuration CrafterBuildProject(std::span<const std::string_view>) {{\n"
" Configuration cfg;\n"
" cfg.path = \"./\";\n"
" cfg.name = \"p{}\";\n"
" cfg.outputName = \"p{}\";\n"
" cfg.target = HostTarget();\n"
" cfg.type = ConfigurationType::Executable;\n"
" return cfg;\n"
"}}\n", i, i));
projects.push_back(dir / "project.cpp");
}
std::array<std::string, N> errors;
std::vector<std::thread> threads;
threads.reserve(N);
for (int i = 0; i < N; ++i) {
threads.emplace_back([&, i]() {
try {
std::array<std::string_view, 0> args = {};
(void)LoadProject(projects[i], args);
} catch (const std::exception& e) {
errors[i] = e.what();
}
});
}
for (std::thread& t : threads) t.join();
int failures = 0;
for (int i = 0; i < N; ++i) {
if (!errors[i].empty()) {
std::println(std::cerr, "FAIL p{}: {}", i, errors[i]);
++failures;
}
}
fs::remove_all(scratch, ec);
if (failures > 0) {
std::println(std::cerr,
"{}/{} concurrent LoadProject() invocations failed (host-cache race)",
failures, N);
return 1;
}
return 0;
}