This commit is contained in:
Jorijn van der Graaf 2026-05-18 20:33:47 +02:00
commit 35091b2c53
5 changed files with 322 additions and 12 deletions

View file

@ -5,15 +5,38 @@ module;
#ifdef __wasm_simd128__
#include <wasm_simd128.h>
#endif
#ifdef __riscv_vector
#include <riscv_vector.h>
// Compile-time VLEN selection. RVV is VLA at the ISA level, but storage in
// this library is fixed-size, so we pin to the widest VLEN the toolchain has
// guaranteed at compile time:
// __riscv_v_fixed_vlen — Clang's -mrvv-vector-bits=N mode.
// __riscv_v_min_vlen — minimum guaranteed VLEN from the march (e.g.
// rv64gcv_zvl256b → 256). Set by both GCC and Clang.
// Falls back to the RVA23 baseline of ZVL128B otherwise.
#if defined(__riscv_v_fixed_vlen)
#define CRAFTER_RVV_VLEN __riscv_v_fixed_vlen
#elif defined(__riscv_v_min_vlen)
#define CRAFTER_RVV_VLEN __riscv_v_min_vlen
#else
#define CRAFTER_RVV_VLEN 128
#endif
// 16/32/64-byte storage types, mirroring x86's __m128/__m256/__m512 tier.
// The compiler emits RVV vle/vse/vfadd/... on these GNU vectors when the
// target's V extension is enabled.
typedef float __crafter_rvv_v128_f32 __attribute__((vector_size(16), aligned(16)));
typedef float __crafter_rvv_v256_f32 __attribute__((vector_size(32), aligned(32)));
typedef float __crafter_rvv_v512_f32 __attribute__((vector_size(64), aligned(64)));
#endif
export module Crafter.Math:Common;
import std;
// VectorF16 exists as a real struct when _Float16 is available AND we are not
// on x86_64 without AVX512FP16 (that path aliases VectorF16 to VectorF32 in
// Crafter.Math:Basic for performance). Each translation unit that needs this
// distinction redefines the same condition since macros do not cross module
// boundaries.
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__))
// Crafter.Math:Basic for performance). The same alias kicks in on RISC-V until
// a Zvfh path lands. Each translation unit that needs this distinction
// redefines the same condition since macros do not cross module boundaries.
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__)) && !defined(__riscv_vector)
namespace Crafter {
export template <std::uint8_t Len, std::uint8_t Packing>
struct VectorF16;
@ -26,7 +49,7 @@ namespace Crafter {
template <std::uint8_t Len, std::uint8_t Packing, typename T>
struct VectorBase {
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__))
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__)) && !defined(__riscv_vector)
template <std::uint8_t L, std::uint8_t P>
friend struct VectorF16;
#endif
@ -63,6 +86,18 @@ namespace Crafter {
>;
#elif defined(__wasm_simd128__)
using VectorType = v128_t;
#elif defined(__riscv_vector)
// RVV tier mirrors the x86 selector: pick the widest register the
// toolchain guarantees, then size each instantiation down to the
// smallest tier that fits Len*Packing. _Float16 never materialises
// here because VectorF16 aliases VectorF32 on RISC-V until a Zvfh
// path lands.
using VectorType = std::conditional_t<
std::is_same_v<T, float>,
std::conditional_t<(Len * Packing > 8), __crafter_rvv_v512_f32,
std::conditional_t<(Len * Packing > 4), __crafter_rvv_v256_f32, __crafter_rvv_v128_f32>>,
std::array<T, GetAlingment()/sizeof(T)>
>;
#else
using VectorType = std::array<T, GetAlingment()/sizeof(T)>;
#endif
@ -80,6 +115,13 @@ namespace Crafter {
// WASM SIMD only has 128-bit vectors; cap at 16 bytes so the entire
// VectorType always fits in a single v128_t.
static constexpr std::uint8_t Max = 16;
#elif defined(__riscv_vector)
// RVV tier selected at compile time from the guaranteed VLEN. ZVL128B
// is the RVA23 baseline; ZVL256B / ZVL512B unlock wider registers
// when present. LMUL>1 groupings are a separate axis and could land
// later as a batched-op path on top of this.
static constexpr std::uint8_t Max = (CRAFTER_RVV_VLEN >= 512) ? 64 :
(CRAFTER_RVV_VLEN >= 256) ? 32 : 16;
#else
static constexpr std::uint8_t Max = 32;
#endif