veccore
veccore copied to clipboard
Mandelbrot benchmark's SIMD doesn't give the same output as the scalar version.
#include "VecCore/VecCore"
using namespace vecCore;
template<typename T>
void mandelbrot(T xmin, T xmax, size_t nx,
T ymin, T ymax, size_t ny,
size_t max_iter, unsigned char *image)
{
T dx = (xmax - xmin) / T(nx);
T dy = (ymax - ymin) / T(ny);
for (size_t i = 0; i < nx; ++i) {
for (size_t j = 0; j < ny; ++j) {
size_t k = 0;
T x = xmin + T(i) * dx, cr = x, zr = x;
T y = ymin + T(j) * dy, ci = y, zi = y;
do {
x = zr*zr - zi*zi + cr;
y = T(2.0) * zr*zi + ci;
zr = x;
zi = y;
} while (++k < max_iter && (zr*zr + zi*zi < T(4.0)));
image[ny*i + j] = k;
}
}
}
template<typename T>
void mandelbrot_v(Scalar<T> xmin, Scalar<T> xmax, size_t nx,
Scalar<T> ymin, Scalar<T> ymax, size_t ny,
Scalar<Index<T>> max_iter, unsigned char *image)
{
T iota;
for (size_t i = 0; i < VectorSize<T>(); ++i)
Set<T>(iota, i, i);
T dx = T(xmax - xmin) / T((Scalar<T>)nx);
T dy = T(ymax - ymin) / T((Scalar<T>)ny), dyv = iota * dy;
for (size_t i = 0; i < nx; ++i) {
for (size_t j = 0; j < ny; j += VectorSize<T>()) {
Scalar<Index<T>> k(0);
T x = xmin + T((Scalar<T>)i) * dx, cr = x, zr = x;
T y = ymin + T((Scalar<T>)j) * dy + dyv, ci = y, zi = y;
Index<T> kv(0);
Mask<T> m(true);
do {
x = zr*zr - zi*zi + cr;
y = T((Scalar<T>)2.0) * zr*zi + ci;
MaskedAssign<T>(zr, m, x);
MaskedAssign<T>(zi, m, y);
MaskedAssign<Index<T>>(kv, m, ++k);
m = zr*zr + zi*zi < T((Scalar<T>)4.0);
} while (k < max_iter && !MaskEmpty(m));
for (size_t k = 0; k < VectorSize<T>(); ++k)
image[ny*i + j + k] = (unsigned char) Get(kv, k);
}
}
}
int main(){
double xmin = -2.1, xmax = 1.1;
double ymin = -1.35, ymax = 1.35;
size_t nx = 1024, ny = 864, max_iter = 500;
unsigned char *image_scalar = new unsigned char[nx*ny];
unsigned char *image_simd = new unsigned char[nx*ny];
mandelbrot_v<backend::SIMDNative::Double_v>(xmin, xmax, nx, ymin, ymax, ny,
max_iter, image_simd);
mandelbrot<double>(xmin, xmax, nx, ymin, ymax, ny,
max_iter, image_scalar);
for(int i=0 ; i<nx*ny; ++i){
assert(image_simd[i] == image_scalar[i]);
}
delete[] image_scalar;
delete[] image_simd;
}