HIP icon indicating copy to clipboard operation
HIP copied to clipboard

When the kernels are in a different file, it will affect the latency of Kernel Launch

Open yhlisnps opened this issue 3 years ago • 1 comments
trafficstars

When the kernels are in a different file, it will affect the latency of Kernel Launch. I used RocProfiler and saw four more HSA APIs (hsa_executable_create_alt ,d hsa_code_object_reader_create_from_memory,hsa_executable_load_agent_code_object & hsa_executable_freeze ) compared to the normal kernelLaunch.

Code

main file

 1 #include <hip/hip_runtime.h>                                                                                                                                                                                                              
  2 #include "class1.h"
  3 #include "class2.h"
  4 #include "class3.h"
  5 #define SIZE 1024
  6 int main(){
  7     //todo:create gpu mem
  8     float * input;
  9     float * output;
 10     hipMalloc(&input,SIZE*sizeof(float));
 11     hipMalloc(&output,SIZE*sizeof(float));
 12     hipStream_t s;
 13     hipStreamCreate(&s);
 15     Class1* c1=new Class1(input,output);
 16     Class2* c2=new Class2(input,output);
 17     c1->kernel(s);
 18     c2->kernel(s);
 23 }

Class1 Header file and cpp file

  1 #include<hip/hip_runtime.h>                                                                                                                                                                                                               
  2 class Class1{
  3 public:
  4     Class1(float*_in,float*_out):in(_in),out(_out){}
  5     void kernel(hipStream_t q);
  6 private:
  7     float* in;
  8     float* out;
  9 };
  1 #include"class1.h"
  2 #include"class2.h"
  3 __global__ void class1_kernel(float* in,float* out){
  4     const int tid=blockDim.x*blockIdx.x+threadIdx.x;
  5     out[tid]=in[tid];
  6 }
  7 void Class1::kernel(hipStream_t q)
  8 {
  9    class1_kernel<<<2,512,0,q>>> (in,out);
 10 }  

Class2 Header file & cpp file

  1 #include<hip/hip_runtime.h>                                                                                                                                                                                                               
  2 class Class2{
  3 public:
  4     Class2(float*_in,float*_out):in(_in),out(_out){}
  5     void kernel(hipStream_t q);
  6 private:
  7     float* in;
  8     float* out;
  9 };
  1 #include"class2.h"                                                                                                                                                                                                                        
  2 __global__ void class2_kernel(float* in,float* out){
  3     const int tid=blockDim.x*blockIdx.x+threadIdx.x;
  4     out[tid]=in[tid];
  5 }
  6 void Class2::kernel(hipStream_t q)
  7 {
  8    class2_kernel<<<2,512,0,q>>> (in,out);
  9 }

yhlisnps avatar Aug 25 '22 08:08 yhlisnps

@yhlisnps Hi were you able to resolve your issue on the latest HIP? If so, can we please close this ticket?

abhimeda avatar Feb 07 '24 18:02 abhimeda