packed intersection and matrix

This commit is contained in:
Jorijn van der Graaf 2026-05-18 19:57:40 +02:00
commit f0becd1582
7 changed files with 948 additions and 557 deletions

View file

@ -87,6 +87,17 @@ namespace Crafter {
static constexpr std::uint8_t AlignmentElement = GetAlingment()/sizeof(T);
static constexpr std::uint8_t Alignment = GetAlingment();
// Number of input vectors per batched Normalize/Dot/Length call that
// exactly fills the output register on the current (Len, Packing, ISA).
// Each input contributes `Packing` scalar results; an output register
// holds `AlignmentElement` lanes, so optimal arity = lanes / packing.
static constexpr std::uint8_t BatchSize = AlignmentElement / Packing;
// Largest Packing that still fits a single SIMD register for this
// (Len, T) on the current ISA. Independent of the current Packing
// dimension — meant for higher-level batching code that wants to
// process Packing sub-primitives at once (e.g. intersection tests).
// Falls back to 1 in the pathological case Len > MaxElement.
static constexpr std::uint8_t OptimalPacking = (MaxElement / Len) > 0 ? (MaxElement / Len) : 1;
static_assert(Len * Packing <= MaxElement, "Len * Packing exceeds MaxElement");
protected:
@ -97,6 +108,22 @@ namespace Crafter {
return arr;
}
// True iff every per-Packing-slot shuffle (output, source) pair stays
// within the same PerLane chunk. shuffle_epi32 / shuffle_epi8 are
// applied per 128-bit lane, so any cross-lane move has to fall through
// to a cross-lane permute path instead.
template <std::array<std::uint8_t, Len> ShuffleValues>
static consteval bool LaneSafeShuffle() {
for (std::uint8_t p = 0; p < Packing; ++p) {
for (std::uint8_t i = 0; i < Len; ++i) {
std::uint8_t outIdx = static_cast<std::uint8_t>(p * Len + i);
std::uint8_t srcIdx = static_cast<std::uint8_t>(p * Len + ShuffleValues[i]);
if (outIdx / PerLane != srcIdx / PerLane) return false;
}
}
return true;
}
template <std::array<std::uint8_t, Len> ShuffleValues>
static consteval bool CheckEpi32Shuffle() {
if constexpr (PerLane == 8) {
@ -113,7 +140,7 @@ namespace Crafter {
}
}
}
return true;
return LaneSafeShuffle<ShuffleValues>();
}
template <std::array<std::uint8_t, Len> ShuffleValues>
@ -124,7 +151,7 @@ namespace Crafter {
return false;
}
}
return true;
return LaneSafeShuffle<ShuffleValues>();
}
template <std::array<std::uint8_t, Len> ShuffleValues>