HIP
HIP copied to clipboard
How do I add HIP support to an existing cmake project? The link costs too much time.
Hello guys.. How do I add HIP support to an existing cmake project. I followed this issue #231 and had tried 4 approaches, but only one approach work correctly. So what is the correct way to add HIP support to an existing cmake project?
I am giving the detail information as follow: System information
- Have I written custom code: Yes main.cpp
#include <iostream>
#define __HIP_PLATFORM_HCC__
// hip header file
#include "hip/hip_runtime.h"
#include "transpose.cuh"
#define WIDTH 1024
#define NUM (WIDTH * WIDTH)
// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
int main() {
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;
float* gpuMatrix;
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
std::cout << "Device name " << devProp.name << std::endl;
int i;
int errors;
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
// Lauching kernel from host
exec(gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
printf("FAILED: %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
return errors;
}
MatrixTranspose.cpp
#include <iostream>
#include "hip/hip_runtime_api.h"
#include "hip/hip_runtime.h"
#define WIDTH 1024
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
__global__ void
__attribute__((visibility("default")))
matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
out[y * width + x] = in[x * width + y];
}
void exec(float* gpuTransposeMatrix, float* gpuMatrix, const int width) {
// Lauching kernel from host
hipLaunchKernelGGL(
matrixTranspose,
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH);
}
transpose.cuh
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
void exec(float* out, float* in, const int width);
CMakeList.txt
cmake_minimum_required(VERSION 2.8.3)
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
project(12_cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
else()
message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
endif()
set(MY_SOURCE_FILES MatrixTranspose.cpp)
set(MY_TARGET_NAME MatrixTranspose)
set(MY_HIPCC_OPTIONS )
set(MY_HCC_OPTIONS )
set(MY_NVCC_OPTIONS )
set (CMAKE_LINKER "/opt/rocm/bin/hipcc")
set (CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_LINKER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
set_source_files_properties(${MY_SOURCE_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_library(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS ${MY_HIPCC_OPTIONS} HCC_OPTIONS ${MY_HCC_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS})
set (SOURCES main.cpp)
#add_custom_target(combined COMMAND ar -x $<TARGET_FILE:MatrixTranspose>
# WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
# DEPENDS MatrixTranspose
# )
#add_library (hip_cmake_lib STATIC main.cpp)
#add_dependencies(hip_cmake_lib MatrixTranspose)
add_executable (hip_cmake_test main.cpp)
target_link_libraries (hip_cmake_test MatrixTranspose hip_hcc)
- ROCm version
Package: rocm-libs
Version: 2.3.14
Priority: optional
Section: devel
Maintainer: Advanced Micro Devices Inc.
Installed-Size: 13.3 kB
Depends: rocfft, rocrand, hipblas, rocblas
Homepage: https://github.com/RadeonOpenCompute/ROCm
Download-Size: 766 B
APT-Sources: http://repo.radeon.com/rocm/apt/debian xenial/main amd64 Packages
Description: Radeon Open Compute (ROCm) Runtime software stack
- hip version
HIP version : 1.5.19123
== hipconfig
HIP_PATH : /opt/rocm
HIP_PLATFORM : hcc
CPP_CONFIG : -D__HIP_PLATFORM_HCC__= -I/opt/rocm/include -I/opt/rocm/hcc/include
== hcc
HSA_PATH : /opt/rocm/hsa
HCC_HOME : /opt/rocm/hcc
HCC clang version 9.0.0 (/data/jenkins_workspace/compute-rocm-rel-2.3/external/hcc-tot/clang 785f31db116e742ac53d052e207979869a857d1a) (/data/jenkins_workspace/compute-rocm-rel-2.3/external/hcc-tot/compiler 87f982f8ce2b85ce824f91bf8c2c90f6843a50a3) (based on HCC 1.3.19115-9b3a740-785f31d-87f982f )
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm/hcc/bin
LLVM (http://llvm.org/):
LLVM version 9.0.0svn
Optimized build.
Default target: x86_64-unknown-linux-gnu
Host CPU: haswell
Registered Targets:
amdgcn - AMD GCN GPUs
r600 - AMD GPUs HD2XXX-HD6XXX
x86 - 32-bit X86: Pentium-Pro and above
x86-64 - 64-bit X86: EM64T and AMD64
HCC-cxxflags : -hc -std=c++amp -I/opt/rocm/hcc/include -I/opt/rocm/includeHCC-ldflags : -hc -std=c++amp -L/opt/rocm/hcc/lib -Wl,--rpath=/opt/rocm/hcc/lib -ldl -lm -lpthread -lhc_am -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive
...
== Linux Kernel
Hostname : #####
Linux qiang 4.13.16-041316-generic #201711240901 SMP Fri Nov 24 09:02:42 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 16.04.5 LTS
Release: 16.04
Codename: xenial
Describe the current behavior
-
If I compile the device code via macro hip_add_library to a .a file, and compile the host code with gcc. And then link the device object code and host object code with hipcc, the generated executable runs correctly. But the link costs to much time in our machine learning framework.
-
If I compile the device code via macro hip_add_library to a .so file (i.e. pass SHARED in hip_add_library), and compile the host code with gcc. And then link the device object code and host object code with hipcc. When I run the executable, it throws an error:
Device name Device 6860
terminate called after throwing an instance of 'std::exception'
what(): std::exception
[1] 38576 abort (core dumped) ./hip_cmake_test
I have investigated this error, the exception is thrown at
hip/hcc_detail/functional_grid_launch.hpp:108
if (it == function_names().cend()) {
hip_throw(std::runtime_error{"Undefined __global__ function."});
}
- If I compile the device code via macro hip_add_library to a .so file, and compile the host code with gcc. And then link the device object code and host object code with gcc. When I run the executable, it throws the same error as the above situation.
-If I compile the device code via macro hip_add_library to a .a file, and compile the host code with gcc. And then link the device object code and host object code with gcc. A link error occurs.
/usr/bin/x86_64-linux-gnu-ld: libMatrixTranspose.a(MatrixTranspose_generated_MatrixTranspose.cpp.o): undefined reference to symbol 'hsa_system_major_extension_supported@@ROCR_1'
/opt/rocm/hsa/lib/libhsa-runtime64.so.1: error adding symbols: DSO missing from command line
collect2: error: ld returned 1 exit status
CMakeFiles/hip_cmake_test.dir/build.make:95: recipe for target 'hip_cmake_test' failed
make[2]: *** [hip_cmake_test] Error 1
CMakeFiles/Makefile2:104: recipe for target 'CMakeFiles/hip_cmake_test.dir/all' failed
make[1]: *** [CMakeFiles/hip_cmake_test.dir/all] Error 2
Makefile:83: recipe for target 'all' failed
make: *** [all] Error 2
Describe the excepted behavior
- One can compile the device code via hip_add_library, and compile host code with gcc. And then link the device object and host object with gcc/hipcc. The link time is within a reasonable range.
Please assist how to resolve this problem
Thanks and Regards.
You may try use hip-clang for compilation and get better chance. hip-clang embed kernel as strings with internal linkage therefore does not need kernel symbols in the elf itself. You may check HIP installation guide about how to use hip-clang with HIP.
After stumbling on this same exact problem, I ended up finding a solution to this. Thanks a lot @xiaocenxiaocen for the small example, that helped a lot.
The fix here also fixes some other problems that arise during the linking step of the shared library creation. In particular, when using the target exported from CMake, i.e. in /opt/rocm/hcc/lib/cmake/hcc/hcc-targets.cmake
the flag -hc
is passed for INTERFACE_LINK_LIBRARIES
of the hcc::hccshared
target. This flag is only available for hipcc
, but hipcc
is never used when linking at shared library creation step, except when doing the change I did here.
The changes are relatively small: passing in the SHARED
into hip_add_library
and changing the various CMAKE_LINKER
related settings.
Afterwards, calling the following in a build directory works properly.
cmake -DBUILD_SHARED_LIBS=on ..
export LD_LIBRARY_PATH=$PWD:$LD_LIBRARY_PATH #rpath is not set through hipcc
# either pass the rpath by hand in the CMakeLists.txt or do this
./hip_cmake_test
Final CMakeLists.txt
cmake_minimum_required(VERSION 3.8)
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
endif()
if(NOT DEFINED ROCM_PATH)
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed")
elseif(DEFINED ENV{HIP_PATH})
set(ROCM_PATH "$ENV{HIP_PATH}/.." CACHE PATH "Path to which ROCM has been installed")
else()
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed")
endif()
endif()
if(NOT DEFINED HCC_PATH)
if(DEFINED ENV{HCC_PATH})
set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed")
else()
set(HCC_PATH "${ROCM_PATH}/hcc" CACHE PATH "Path to which HCC has been installed")
endif()
set(HCC_HOME "${HCC_PATH}")
endif()
if(NOT DEFINED HIP_CLANG_PATH)
if(NOT DEFINED ENV{HIP_CLANG_PATH})
set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin" CACHE PATH "Path to which HIP compatible clang binaries have been installed")
else()
set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH} CACHE PATH "Path to which HIP compatible clang binaries have been installed")
endif()
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
list(APPEND CMAKE_PREFIX_PATH
"${HIP_PATH}/lib/cmake"
"${HIP_PATH}/../lib/cmake" # hopefully catches all extra HIP dependencies
)
project(12_cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
else()
message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
endif()
find_package(hip REQUIRED)
# For ROCm >=3.5, wipe hip-clang specific interface options which are propagated
set_target_properties(hip::device PROPERTIES INTERFACE_COMPILE_OPTIONS "-fPIC")
set_target_properties(hip::device PROPERTIES INTERFACE_LINK_LIBRARIES "hip::host")
set(MY_SOURCE_FILES MatrixTranspose.cpp)
set(MY_TARGET_NAME MatrixTranspose)
set(MY_HIPCC_OPTIONS "-fPIC")
set(MY_HCC_OPTIONS )
set(MY_NVCC_OPTIONS )
set(MY_CLANG_OPTIONS )
if (BUILD_SHARED_LIBS)
set(STATIC_OR_SHARED SHARED)
else()
set(STATIC_OR_SHARED STATIC)
endif()
set_source_files_properties(${MY_SOURCE_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_library(${MY_TARGET_NAME} ${MY_SOURCE_FILES} HIPCC_OPTIONS "${MY_HIPCC_OPTIONS}" HCC_OPTIONS "${MY_HCC_OPTIONS}" NVCC_OPTIONS "${MY_NVCC_OPTIONS}" CLANG_OPTIONS "${MY_CLANG_OPTIONS}" ${STATIC_OR_SHARED})
set_target_properties(${MY_TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries (${MY_TARGET_NAME} PRIVATE hip::device)
set (SOURCES main.cpp)
add_executable (hip_cmake_test main.cpp)
target_include_directories(hip_cmake_test
PRIVATE
$<BUILD_INTERFACE:${HIP_PATH}/include>
$<BUILD_INTERFACE:${HIP_PATH}/../include>)
target_link_libraries (hip_cmake_test MatrixTranspose hip::device)
ping @hartwiganzt
Another way to fix this issue is to call set_target_properties(${MY_TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
after calling hip_add_library
. Setting HCC_PATH
before find_package(HIP QUIET)
is also required. In this case, the custom CMAKE_CXX_CREATE_SHARED_LIBRARY
is not required.
@xiaocenxiaocen Do you still need assistance with this ticket? If not, please close ticket. Thanks!