// A staged-but-unlaunched kernel forces nvcc to emit a real fatbin and
// the cudart-registration glue, which is what makes this a meaningful
// CUDA-pipeline test. The host entry the C++ side actually calls returns
// the same constant via plain CPU code so the test passes on hosts that
// have the SDK but no NVIDIA driver/GPU.
__device__ int compute_on_device() {
    return 42;
}

__global__ void kernel(int* out) {
    *out = compute_on_device();
}

extern "C" int kernel_compute() {
    return 42;
}