why does hip-clang think asio is device code?
Adding standalone ASIO header causes this sample program not to compile with error messages like
asio-1.18.1/include/asio/execution/set_done.hpp:212:3: error: dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables.
[build] set_done = asio_execution_set_done_fn::static_instance<>::instance;
My application uses the standalone ASIO header only library. A search of the ASIO library does not reveal any use of __device__, __constant__, __shared__, or __managed__ keywords that I could find. I'm using cmake and trying to follow the documentation. If I remove #include asio.hpp from main.cpp the program compiles and runs as expected. If I change the CMakeLists.txt last line to target_link_libraries(vectoradd hip::host), main.cpp compiles fine but the device code does not. How do I tell the compiler to treat main.cpp as host code, and vectoradd.cu as device code?
CMakeLists.txt
cmake_minimum_required(VERSION 3.21)
project(vectoradd VERSION 0.1 LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include_directories(${CMAKE_SOURCE_DIR}/asio-1.18.1/include)
set(HOST_FILES main.cpp)
set(GPU_FILES vectoradd_hip.cu)
set_source_files_properties(${GPU_FILES} PROPERTIES LANGUAGE CXX)
add_executable(vectoradd)
target_sources(vectoradd PRIVATE ${GPU_FILES} ${HOST_FILES})
list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
find_package(hip)
target_link_libraries(vectoradd hip::device)
main.cpp
#include "vectoradd.hpp"
#include "asio.hpp"
int main()
{
run_vectoradd();
return 0;
}
vectoradd.hpp
void run_vectoradd();
vectoradd.cu
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include "vectoradd.hpp"
#include "hip/hip_runtime.h"
#define WIDTH 1024
#define HEIGHT 1024
#define NUM (WIDTH*HEIGHT)
#define THREADS_PER_BLOCK_X 16
#define THREADS_PER_BLOCK_Y 16
#define THREADS_PER_BLOCK_Z 1
__global__ void
vectoradd_float(float* __restrict__ a, const float* __restrict__ b, const float* __restrict__ c, int width, int height)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
int i = y * width + x;
if ( i < (width * height)) {
a[i] = b[i] + c[i];
}
}
void run_vectoradd()
{
float* hostA;
float* hostB;
float* hostC;
float* deviceA;
float* deviceB;
float* deviceC;
hipDeviceProp_t devProp;
hipError_t hip_error = hipGetDeviceProperties(&devProp, 0);
std::cout << " System minor " << devProp.minor << std::endl;
std::cout << " System major " << devProp.major << std::endl;
std::cout << " agent prop name " << devProp.name << std::endl;
int i;
int errors;
hostA = (float*)malloc(NUM * sizeof(float));
hostB = (float*)malloc(NUM * sizeof(float));
hostC = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
hostB[i] = (float)i;
hostC[i] = (float)i*100.0f;
}
hip_error = hipMalloc((void**)&deviceA, NUM * sizeof(float));
hip_error = hipMalloc((void**)&deviceB, NUM * sizeof(float));
hip_error = hipMalloc((void**)&deviceC, NUM * sizeof(float));
hip_error = hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice);
hip_error = hipMemcpy(deviceC, hostC, NUM*sizeof(float), hipMemcpyHostToDevice);
hipLaunchKernelGGL(vectoradd_float,
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
0, 0,
deviceA ,deviceB ,deviceC ,WIDTH ,HEIGHT);
hip_error = hipMemcpy(hostA, deviceA, NUM*sizeof(float), hipMemcpyDeviceToHost);
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
if (i == 3)
printf("%f %f %f\n", hostA[i], hostB[i], hostC[i]);
if (hostA[i] != (hostB[i] + hostC[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
} else {
printf ("PASSED!\n");
}
hip_error = hipFree(deviceA);
hip_error = hipFree(deviceB);
hip_error = hipFree(deviceC);
free(hostA);
free(hostB);
free(hostC);
}
This should have been fixed by https://reviews.llvm.org/D119615
Thank you. I was able to temporarily work around this by removing several constexpr from the library.