RVV
This commit is contained in:
parent
f0becd1582
commit
35091b2c53
5 changed files with 322 additions and 12 deletions
|
|
@ -5,15 +5,38 @@ module;
|
|||
#ifdef __wasm_simd128__
|
||||
#include <wasm_simd128.h>
|
||||
#endif
|
||||
#ifdef __riscv_vector
|
||||
#include <riscv_vector.h>
|
||||
// Compile-time VLEN selection. RVV is VLA at the ISA level, but storage in
|
||||
// this library is fixed-size, so we pin to the widest VLEN the toolchain has
|
||||
// guaranteed at compile time:
|
||||
// __riscv_v_fixed_vlen — Clang's -mrvv-vector-bits=N mode.
|
||||
// __riscv_v_min_vlen — minimum guaranteed VLEN from the march (e.g.
|
||||
// rv64gcv_zvl256b → 256). Set by both GCC and Clang.
|
||||
// Falls back to the RVA23 baseline of ZVL128B otherwise.
|
||||
#if defined(__riscv_v_fixed_vlen)
|
||||
#define CRAFTER_RVV_VLEN __riscv_v_fixed_vlen
|
||||
#elif defined(__riscv_v_min_vlen)
|
||||
#define CRAFTER_RVV_VLEN __riscv_v_min_vlen
|
||||
#else
|
||||
#define CRAFTER_RVV_VLEN 128
|
||||
#endif
|
||||
// 16/32/64-byte storage types, mirroring x86's __m128/__m256/__m512 tier.
|
||||
// The compiler emits RVV vle/vse/vfadd/... on these GNU vectors when the
|
||||
// target's V extension is enabled.
|
||||
typedef float __crafter_rvv_v128_f32 __attribute__((vector_size(16), aligned(16)));
|
||||
typedef float __crafter_rvv_v256_f32 __attribute__((vector_size(32), aligned(32)));
|
||||
typedef float __crafter_rvv_v512_f32 __attribute__((vector_size(64), aligned(64)));
|
||||
#endif
|
||||
export module Crafter.Math:Common;
|
||||
import std;
|
||||
|
||||
// VectorF16 exists as a real struct when _Float16 is available AND we are not
|
||||
// on x86_64 without AVX512FP16 (that path aliases VectorF16 to VectorF32 in
|
||||
// Crafter.Math:Basic for performance). Each translation unit that needs this
|
||||
// distinction redefines the same condition since macros do not cross module
|
||||
// boundaries.
|
||||
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__))
|
||||
// Crafter.Math:Basic for performance). The same alias kicks in on RISC-V until
|
||||
// a Zvfh path lands. Each translation unit that needs this distinction
|
||||
// redefines the same condition since macros do not cross module boundaries.
|
||||
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__)) && !defined(__riscv_vector)
|
||||
namespace Crafter {
|
||||
export template <std::uint8_t Len, std::uint8_t Packing>
|
||||
struct VectorF16;
|
||||
|
|
@ -26,7 +49,7 @@ namespace Crafter {
|
|||
|
||||
template <std::uint8_t Len, std::uint8_t Packing, typename T>
|
||||
struct VectorBase {
|
||||
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__))
|
||||
#if defined(__FLT16_MAX__) && (!defined(__x86_64) || defined(__AVX512FP16__)) && !defined(__riscv_vector)
|
||||
template <std::uint8_t L, std::uint8_t P>
|
||||
friend struct VectorF16;
|
||||
#endif
|
||||
|
|
@ -63,6 +86,18 @@ namespace Crafter {
|
|||
>;
|
||||
#elif defined(__wasm_simd128__)
|
||||
using VectorType = v128_t;
|
||||
#elif defined(__riscv_vector)
|
||||
// RVV tier mirrors the x86 selector: pick the widest register the
|
||||
// toolchain guarantees, then size each instantiation down to the
|
||||
// smallest tier that fits Len*Packing. _Float16 never materialises
|
||||
// here because VectorF16 aliases VectorF32 on RISC-V until a Zvfh
|
||||
// path lands.
|
||||
using VectorType = std::conditional_t<
|
||||
std::is_same_v<T, float>,
|
||||
std::conditional_t<(Len * Packing > 8), __crafter_rvv_v512_f32,
|
||||
std::conditional_t<(Len * Packing > 4), __crafter_rvv_v256_f32, __crafter_rvv_v128_f32>>,
|
||||
std::array<T, GetAlingment()/sizeof(T)>
|
||||
>;
|
||||
#else
|
||||
using VectorType = std::array<T, GetAlingment()/sizeof(T)>;
|
||||
#endif
|
||||
|
|
@ -80,6 +115,13 @@ namespace Crafter {
|
|||
// WASM SIMD only has 128-bit vectors; cap at 16 bytes so the entire
|
||||
// VectorType always fits in a single v128_t.
|
||||
static constexpr std::uint8_t Max = 16;
|
||||
#elif defined(__riscv_vector)
|
||||
// RVV tier selected at compile time from the guaranteed VLEN. ZVL128B
|
||||
// is the RVA23 baseline; ZVL256B / ZVL512B unlock wider registers
|
||||
// when present. LMUL>1 groupings are a separate axis and could land
|
||||
// later as a batched-op path on top of this.
|
||||
static constexpr std::uint8_t Max = (CRAFTER_RVV_VLEN >= 512) ? 64 :
|
||||
(CRAFTER_RVV_VLEN >= 256) ? 32 : 16;
|
||||
#else
|
||||
static constexpr std::uint8_t Max = 32;
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue