devito icon indicating copy to clipboard operation
devito copied to clipboard

Code Generation Issue: atomic update inside a simd loop

Open utkarsh8028 opened this issue 1 year ago • 3 comments

I am facing the following error : error: OpenMP constructs other than '#pragma omp ordered simd' may not be nested inside 'simd' region due to #pragma omp atomic update inside simd loop.

Please see the MFE

from devito import Function,Operator,SpaceDimension
import devito as dv
import numpy as np
from devito import configuration
configuration['language'] = 'openmp'
configuration['log-level'] = 'DEBUG'
# convolve input_f with kernel
data = np.random.randn(50,50) ; k = np.random.randn(5,5)
input_dimensions = [SpaceDimension("inp_I_"+str(x)) for x in range(0,2)]
kernel_dimensions = [SpaceDimension("inp_K_"+str(x)) for x in range(0,2)]
result_dimensions = [SpaceDimension("inp_R_"+str(x)) for x in range(0,2)]
input_func = Function(name=("Input_F"), shape=data.shape, dimensions=input_dimensions)
kernel_func = Function(name=("Kernel_F"), shape=k.shape, dimensions=kernel_dimensions)
res_func = Function(name=("Result_F"), shape=(46,46),dimensions=result_dimensions)
input_func.data[:] = data; kernel_func.data[:]=k
rhs = kernel_func[kernel_dimensions] * input_func[result_dimensions[0] + kernel_dimensions[0],result_dimensions[1] + kernel_dimensions[1]]
eqs = [dv.Inc(res_func, rhs)]
op = Operator(eqs)
op.apply()
print(res_func.data)

I am using a linux system with a GCC compiler.

The C Code that is generated by Devito

#define _POSIX_C_SOURCE 200809L
#define START_TIMER(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP_TIMER(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "xmmintrin.h"
#include "pmmintrin.h"
#include "omp.h"

struct dataobj
{
  void *restrict data;
  unsigned long * size;
  unsigned long * npsize;
  unsigned long * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
  void * dmap;
} ;

struct profiler
{
  double section0;
} ;


int Kernel(struct dataobj *restrict Input_F_vec, struct dataobj *restrict Kernel_F_vec, struct dataobj *restrict Result_F_vec, const int inp_K_0_M, const int inp_K_0_m, const int inp_K_1_M, const int inp_K_1_m, const int inp_R_0_M, const int inp_R_0_m, const int inp_R_1_M, const int inp_R_1_m, const int nthreads_nonaffine, struct profiler * timers)
{
  float (*restrict Input_F)[Input_F_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[Input_F_vec->size[1]]) Input_F_vec->data;
  float (*restrict Kernel_F)[Kernel_F_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[Kernel_F_vec->size[1]]) Kernel_F_vec->data;
  float (*restrict Result_F)[Result_F_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[Result_F_vec->size[1]]) Result_F_vec->data;

  /* Flush denormal numbers to zero in hardware */
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);

  /* Begin section0 */
  START_TIMER(section0)
  for (int inp_K_0 = inp_K_0_m; inp_K_0 <= inp_K_0_M; inp_K_0 += 1)
  {
    #pragma omp parallel num_threads(nthreads_nonaffine)
    {
      int chunk_size = (int)(fmax(1, (1.0F/3.0F)*(inp_K_1_M - inp_K_1_m + 1)*(inp_R_0_M - inp_R_0_m + 1)/nthreads_nonaffine));
      #pragma omp for collapse(2) schedule(dynamic,chunk_size)
      for (int inp_R_0 = inp_R_0_m; inp_R_0 <= inp_R_0_M; inp_R_0 += 1)
      {
        for (int inp_K_1 = inp_K_1_m; inp_K_1 <= inp_K_1_M; inp_K_1 += 1)
        {
          #pragma omp simd aligned(Input_F,Kernel_F,Result_F:64)
          for (int inp_R_1 = inp_R_1_m; inp_R_1 <= inp_R_1_M; inp_R_1 += 1)
          {
            float r0 = Input_F[inp_K_0 + inp_R_0 + 1][inp_K_1 + inp_R_1 + 1]*Kernel_F[inp_K_0 + 1][inp_K_1 + 1];
            #pragma omp atomic update
            Result_F[inp_R_0 + 1][inp_R_1 + 1] += r0;
          }
        }
      }
    }
  }
  STOP_TIMER(section0,timers)
  /* End section0 */

  return 0;
}

utkarsh8028 avatar Sep 21 '22 17:09 utkarsh8028

Can I ask what gcc version you are using?

mloubout avatar Sep 21 '22 17:09 mloubout

Hi,I am using : gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-10.1.0.1)

utkarsh8028 avatar Sep 21 '22 17:09 utkarsh8028

Related discussion: https://devitocodes.slack.com/archives/C7JMLMSG0/p1663842578621769

georgebisbas avatar Sep 30 '22 18:09 georgebisbas