ispc
ispc copied to clipboard
Add a stdlib shuffle that maps to mm256_shuffle_epi8
Would like the following shuffle to map directly to the packed int8 shuffle instruction on x86 vpshufb
int transpose(int val)
{
int ret;
const uniform int8 SIMD_SHUFFLE_SCANLINE_TO_SUBTILES[] = { 0x0, 0x4, 0x8, 0xC, 0x1, 0x5, 0x9, 0xD, 0x2, 0x6, 0xA, 0xE, 0x3, 0x7, 0xB, 0xF, 0x0, 0x4, 0x8, 0xC, 0x1, 0x5, 0x9, 0xD, 0x2, 0x6, 0xA, 0xE, 0x3, 0x7, 0xB, 0xF };
uniform int8 * uniform valBytePtr = ((uniform int8 * uniform) &val);
uniform int8 * uniform retBytePtr = ((uniform int8 * uniform) &ret);
uniform const int numBytes = SIMD_LANES * 2; // deliberately * 2 - needed to emmulate AVX2 shuffle
foreach(bytes = 0 ... numBytes)
{
retBytePtr[bytes] = valBytePtr[SIMD_SHUFFLE_SCANLINE_TO_SUBTILES[bytes]];
retBytePtr[bytes + 16] = valBytePtr[SIMD_SHUFFLE_SCANLINE_TO_SUBTILES[bytes + 16] + 16];
}
return ret;
}