Image-processing-algorithm-Speed
Image-processing-algorithm-Speed copied to clipboard
ISPC is faster
struct Pixel { unsigned int8 r, g, b; };
unsigned int8 rgb2gray_kernel(Pixel p) {
return (p.r *76 + p.g *150 + p.b *30) >> 8;
}
export void rgb2gray(uniform unsigned int8* uniform src,
uniform unsigned int8* uniform dst,
uniform size_t width,
uniform size_t height,
uniform size_t stride) {
for (uniform int h=0; h<height; ++h) {
uniform unsigned int8* uniform scanline_start = src + h * stride;
uniform Pixel* scanline = (uniform Pixel* uniform)scanline_start;
uniform unsigned int8* uniform out_scanline = dst + h * width;
foreach (w= 0...width) {
unsigned int8 v = rgb2gray_kernel(scanline[w]);
out_scanline[w] = v;
}
}
}
compiled with
ispc rgb2gray.ispc -h rgb2gray.h -o rgb2gray.o -O3 --pic --target=avx2-i32x16