HIP
HIP copied to clipboard
When the kernels are in a different file, it will affect the latency of Kernel Launch
trafficstars
When the kernels are in a different file, it will affect the latency of Kernel Launch. I used RocProfiler and saw four more HSA APIs (hsa_executable_create_alt ,d hsa_code_object_reader_create_from_memory,hsa_executable_load_agent_code_object & hsa_executable_freeze ) compared to the normal kernelLaunch.
Code
main file
1 #include <hip/hip_runtime.h>
2 #include "class1.h"
3 #include "class2.h"
4 #include "class3.h"
5 #define SIZE 1024
6 int main(){
7 //todo:create gpu mem
8 float * input;
9 float * output;
10 hipMalloc(&input,SIZE*sizeof(float));
11 hipMalloc(&output,SIZE*sizeof(float));
12 hipStream_t s;
13 hipStreamCreate(&s);
15 Class1* c1=new Class1(input,output);
16 Class2* c2=new Class2(input,output);
17 c1->kernel(s);
18 c2->kernel(s);
23 }
Class1 Header file and cpp file
1 #include<hip/hip_runtime.h>
2 class Class1{
3 public:
4 Class1(float*_in,float*_out):in(_in),out(_out){}
5 void kernel(hipStream_t q);
6 private:
7 float* in;
8 float* out;
9 };
1 #include"class1.h"
2 #include"class2.h"
3 __global__ void class1_kernel(float* in,float* out){
4 const int tid=blockDim.x*blockIdx.x+threadIdx.x;
5 out[tid]=in[tid];
6 }
7 void Class1::kernel(hipStream_t q)
8 {
9 class1_kernel<<<2,512,0,q>>> (in,out);
10 }
Class2 Header file & cpp file
1 #include<hip/hip_runtime.h>
2 class Class2{
3 public:
4 Class2(float*_in,float*_out):in(_in),out(_out){}
5 void kernel(hipStream_t q);
6 private:
7 float* in;
8 float* out;
9 };
1 #include"class2.h"
2 __global__ void class2_kernel(float* in,float* out){
3 const int tid=blockDim.x*blockIdx.x+threadIdx.x;
4 out[tid]=in[tid];
5 }
6 void Class2::kernel(hipStream_t q)
7 {
8 class2_kernel<<<2,512,0,q>>> (in,out);
9 }
@yhlisnps Hi were you able to resolve your issue on the latest HIP? If so, can we please close this ticket?