zoo
zoo copied to clipboard
Parallel Suffix cleanup
Not totally sure how this can be turned into AI right now... i think this function might be too simple for associative iteration?
template<typename S>
constexpr auto parallelSuffix(S input) {
auto
log2Count = log2_of_power_of_two(S::NBits),
power = 1;
auto
result = input,
shiftMask = S{~S::MostSignificantBit};
for (;;) {
result = result ^ result.shiftIntraLaneLeft(power, shiftMask);
if (!--log2Count) { break; }
shiftMask = shiftMask & S{shiftMask.value() >> power};
power <<= 1;
}
return S{result};
}