Polygeist icon indicating copy to clipboard operation
Polygeist copied to clipboard

affine.for can't be convert to affine.parallel if scf is raised through polygeist-opt

Open jzhoulon opened this issue 2 years ago • 0 comments

for the following case, if it raise to affine through cgeist, matmul can be parallized by polygeist-opt(affine-parallize), but DSPF_sp_mat_trans_cplx can't be parallize, if it raise to affine through polygeist, then both funcions(DSPF_sp_mat_trans_cplx and mamtul) can't be parallized

cgeist test.c -S | polygeist-opt --raise-scf-to-affine --affine-parallelize result: DSPF_sp_mat_trans_cplx and matmul can't be parallized through --affine-parallelize

cgeist test.c -S --raise-scf-to-affine | polygeist-opt --affine-parallelize result: matmul can be parallized through --affine-parallelize but DSPF_sp_mat_trans_cplx still can't be

example code:

void DSPF_sp_mat_trans_cplx( float*  x, int rows, int cols, float * r)
{
        int i, j;
        for (i = 0; i < cols; i++)
                for (j = 0; j < rows; j++)
                {
                        r[(i * rows + j) * 2] = x[(i + cols * j) * 2];
                        r[(i * rows + j) * 2 + 1] = x[(i + cols * j) * 2 + 1];
                }
}
#define N 200
#define M 300
#define K 400


#define DATA_TYPE float
void matmul(DATA_TYPE A[N][K], DATA_TYPE B[K][M], DATA_TYPE C[N][M], int N_L, int M_L, int K_L) {
  int i, j, k;
  for (int i = 0; i < N_L; i++) {
    for (int j = 0; j < M_L; j++) {
      for (int k = 0; k < K_L; k++) {
        C[i][j] += A[i][k] * B[k][j];
      }
    }
  }
}


int main() {
float a[N][K];
float b[K][M];
float c[N][M];
matmul(a, b, c, N, M, K);
DSPF_sp_mat_trans_cplx(a, N, M, b);
return 0;
}

cgeist test.c -S | polygeist-opt --raise-scf-to-affine --affine-parallelize result:

 func.func @matmul(%arg0: memref<?x400xf32>, %arg1: memref<?x300xf32>, %arg2: memref<?x300xf32>, %arg3: i32, %arg4: i32, %arg5: i32) attributes {llvm.linkage = #llvm.linkage<external>} {
    %0 = arith.index_cast %arg3 : i32 to index
    %1 = arith.index_cast %arg4 : i32 to index
    %2 = arith.index_cast %arg5 : i32 to index
    affine.for %arg6 = 0 to %0 {
      affine.for %arg7 = 0 to %1 {
        affine.for %arg8 = 0 to %2 {
          %3 = memref.load %arg0[%arg6, %arg8] : memref<?x400xf32>
          %4 = memref.load %arg1[%arg8, %arg7] : memref<?x300xf32>
          %5 = arith.mulf %3, %4 : f32
          %6 = memref.load %arg2[%arg6, %arg7] : memref<?x300xf32>
          %7 = arith.addf %6, %5 : f32
          memref.store %7, %arg2[%arg6, %arg7] : memref<?x300xf32>
        }
      }
    }
    return
  }
  func.func @DSPF_sp_mat_trans_cplx(%arg0: memref<?xf32>, %arg1: i32, %arg2: i32, %arg3: memref<?xf32>) attributes {llvm.linkage = #llvm.linkage<external>} {
    %c1_i32 = arith.constant 1 : i32
    %c2_i32 = arith.constant 2 : i32
    %0 = arith.index_cast %arg2 : i32 to index
    %1 = arith.index_cast %arg1 : i32 to index
    affine.for %arg4 = 0 to %0 {
      %2 = arith.index_cast %arg4 : index to i32
      %3 = arith.muli %2, %arg1 : i32
      affine.for %arg5 = 0 to %1 {
        %4 = arith.index_cast %arg5 : index to i32
        %5 = arith.addi %3, %4 : i32
        %6 = arith.muli %5, %c2_i32 : i32
        %7 = arith.index_cast %6 : i32 to index
        %8 = arith.muli %arg2, %4 : i32
        %9 = arith.addi %2, %8 : i32
        %10 = arith.muli %9, %c2_i32 : i32
        %11 = arith.index_cast %10 : i32 to index
        %12 = memref.load %arg0[%11] : memref<?xf32>
        memref.store %12, %arg3[%7] : memref<?xf32>
        %13 = arith.addi %6, %c1_i32 : i32
        %14 = arith.index_cast %13 : i32 to index
        %15 = arith.addi %10, %c1_i32 : i32
        %16 = arith.index_cast %15 : i32 to index
        %17 = memref.load %arg0[%16] : memref<?xf32>
        memref.store %17, %arg3[%14] : memref<?xf32>
      }
    }
    return
  }

cgeist test.c -S --raise-scf-to-affine | polygeist-opt --affine-parallelize result:

  func.func @matmul(%arg0: memref<?x400xf32>, %arg1: memref<?x300xf32>, %arg2: memref<?x300xf32>, %arg3: i32, %arg4: i32, %arg5: i32) attributes {llvm.linkage = #llvm.linkage<external>} {
    %0 = arith.index_cast %arg3 : i32 to index
    %1 = arith.index_cast %arg4 : i32 to index
    %2 = arith.index_cast %arg5 : i32 to index
    affine.parallel (%arg6) = (0) to (symbol(%0)) {
      affine.parallel (%arg7) = (0) to (symbol(%1)) {
        affine.for %arg8 = 0 to %2 {
          %3 = affine.load %arg0[%arg6, %arg8] : memref<?x400xf32>
          %4 = affine.load %arg1[%arg8, %arg7] : memref<?x300xf32>
          %5 = arith.mulf %3, %4 : f32
          %6 = affine.load %arg2[%arg6, %arg7] : memref<?x300xf32>
          %7 = arith.addf %6, %5 : f32
          affine.store %7, %arg2[%arg6, %arg7] : memref<?x300xf32>
        }
      }
    }
    return
  }
  func.func @DSPF_sp_mat_trans_cplx(%arg0: memref<?xf32>, %arg1: i32, %arg2: i32, %arg3: memref<?xf32>) attributes {llvm.linkage = #llvm.linkage<external>} {
    %0 = arith.index_cast %arg2 : i32 to index
    %1 = arith.index_cast %arg1 : i32 to index
    affine.for %arg4 = 0 to %0 {
      affine.for %arg5 = 0 to %1 {
        %2 = affine.load %arg0[%arg4 * 2 + (%arg5 * symbol(%0)) * 2] : memref<?xf32>
        affine.store %2, %arg3[%arg5 * 2 + (%arg4 * symbol(%1)) * 2] : memref<?xf32>
        %3 = affine.load %arg0[%arg4 * 2 + (%arg5 * symbol(%0)) * 2 + 1] : memref<?xf32>
        affine.store %3, %arg3[%arg5 * 2 + (%arg4 * symbol(%1)) * 2 + 1] : memref<?xf32>
      }
    }
    return
  }
}

jzhoulon avatar Dec 28 '22 11:12 jzhoulon