CINN icon indicating copy to clipboard operation
CINN copied to clipboard

Example of conv2d generating 2 kernels

Open haozech opened this issue 3 years ago • 1 comments

此PR通过举例说明了在不使用Schedule时,初始的Conv2d Compute在cuda端会生成两个kernels

单测结果:

I0601 11:14:21.862107 52538 codegen_cuda_dev_test.cc:673] func number is : 2
I0601 11:14:21.862145 52538 codegen_cuda_dev_test.cc:676] func is : 
function schedule_conv2d_0 (_X, _input_pad)
{
  for (j, 0, 128)
  {
    for (k, 0, 28)
    {
      for (a, 0, 28)
      {
        input_pad[0, j, k, a] = X[0, j, k, a]
      }
    }
  }
}
I0601 11:14:21.862172 52538 codegen_cuda_dev_test.cc:676] func is : 
function schedule_conv2d_0_1 (_X, _Y, _input_pad, _COD)
{
  for (j, 0, 256)
  {
    for (k, 0, 14)
    {
      for (a, 0, 14)
      {
        COD__reduce_init[0, j, k, a] = 0
        for (rc, 0, 128)
        {
          COD[0, j, k, a] = (COD[0, j, k, a] + (input_pad[0, rc, (2 * k), (2 * a)] * Y[j, rc, 0, 0]))
        }
      }
    }
  }
}
I0601 11:14:21.882473 52538 codegen_cuda_dev_test.cc:682] compiled test_no_schedule_conv2d code:


extern "C" {

#include "cinn_cuda_runtime_source.cuh"

#ifdef __CUDACC_RTC__
typedef int int32_t;
typedef char int8_t;
#endif



__global__
void schedule_conv2d_0(const float* __restrict__ X, float* __restrict__ input_pad)
{
  for (int32_t j = 0; j < 128; j += 1) {
    for (int32_t k = 0; k < 28; k += 1) {
      for (int32_t a = 0; a < 28; a += 1) {
        input_pad[((784 * j) + ((28 * k) + a))] = X[((784 * j) + ((28 * k) + a))];
      };
    };
  };
}__global__
void schedule_conv2d_0_1(const float* __restrict__ X, const float* __restrict__ Y, const float* __restrict__ input_pad, float* __restrict__ COD)
{
  float* COD__reduce_init = COD;
  for (int32_t j = 0; j < 256; j += 1) {
    for (int32_t k = 0; k < 14; k += 1) {
      for (int32_t a = 0; a < 14; a += 1) {
        COD__reduce_init[((196 * j) + ((14 * k) + a))] = 0;
        for (int32_t rc = 0; rc < 128; rc += 1) {
          COD[((196 * j) + ((14 * k) + a))] = (COD[((196 * j) + ((14 * k) + a))] + (input_pad[((2 * a) + ((56 * k) + (784 * rc)))] * Y[((128 * j) + rc)]));
        };
      };
    };
  };
}

}

haozech avatar Jun 01 '22 11:06 haozech

Thanks for your contribution!

paddle-bot-old[bot] avatar Jun 01 '22 11:06 paddle-bot-old[bot]