packed intersection and matrix
This commit is contained in:
parent
027947cae6
commit
f0becd1582
7 changed files with 948 additions and 557 deletions
|
|
@ -87,6 +87,17 @@ namespace Crafter {
|
|||
|
||||
static constexpr std::uint8_t AlignmentElement = GetAlingment()/sizeof(T);
|
||||
static constexpr std::uint8_t Alignment = GetAlingment();
|
||||
// Number of input vectors per batched Normalize/Dot/Length call that
|
||||
// exactly fills the output register on the current (Len, Packing, ISA).
|
||||
// Each input contributes `Packing` scalar results; an output register
|
||||
// holds `AlignmentElement` lanes, so optimal arity = lanes / packing.
|
||||
static constexpr std::uint8_t BatchSize = AlignmentElement / Packing;
|
||||
// Largest Packing that still fits a single SIMD register for this
|
||||
// (Len, T) on the current ISA. Independent of the current Packing
|
||||
// dimension — meant for higher-level batching code that wants to
|
||||
// process Packing sub-primitives at once (e.g. intersection tests).
|
||||
// Falls back to 1 in the pathological case Len > MaxElement.
|
||||
static constexpr std::uint8_t OptimalPacking = (MaxElement / Len) > 0 ? (MaxElement / Len) : 1;
|
||||
static_assert(Len * Packing <= MaxElement, "Len * Packing exceeds MaxElement");
|
||||
|
||||
protected:
|
||||
|
|
@ -97,6 +108,22 @@ namespace Crafter {
|
|||
return arr;
|
||||
}
|
||||
|
||||
// True iff every per-Packing-slot shuffle (output, source) pair stays
|
||||
// within the same PerLane chunk. shuffle_epi32 / shuffle_epi8 are
|
||||
// applied per 128-bit lane, so any cross-lane move has to fall through
|
||||
// to a cross-lane permute path instead.
|
||||
template <std::array<std::uint8_t, Len> ShuffleValues>
|
||||
static consteval bool LaneSafeShuffle() {
|
||||
for (std::uint8_t p = 0; p < Packing; ++p) {
|
||||
for (std::uint8_t i = 0; i < Len; ++i) {
|
||||
std::uint8_t outIdx = static_cast<std::uint8_t>(p * Len + i);
|
||||
std::uint8_t srcIdx = static_cast<std::uint8_t>(p * Len + ShuffleValues[i]);
|
||||
if (outIdx / PerLane != srcIdx / PerLane) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <std::array<std::uint8_t, Len> ShuffleValues>
|
||||
static consteval bool CheckEpi32Shuffle() {
|
||||
if constexpr (PerLane == 8) {
|
||||
|
|
@ -113,7 +140,7 @@ namespace Crafter {
|
|||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return LaneSafeShuffle<ShuffleValues>();
|
||||
}
|
||||
|
||||
template <std::array<std::uint8_t, Len> ShuffleValues>
|
||||
|
|
@ -124,7 +151,7 @@ namespace Crafter {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return LaneSafeShuffle<ShuffleValues>();
|
||||
}
|
||||
|
||||
template <std::array<std::uint8_t, Len> ShuffleValues>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue