[Issue]: different triangular solve behavior between hipSPARSE and cuSPARSE
Problem Description
I was trying triangular solve but get different behavior between hipSPARSE on AMD GPU and cuSPARSE on NVIDIA GPU. if I destroy the DnVec and SpMat and recreate them again between analysis and solve, hipSPARSE will fail with error HIPSPARSE_STATUS_INVALID_VALUE, but cuSPARSE can run successfully.
in line 73, switching it to zero will be correct. when it is one, it is the issue reproducer. cuda variant uses the correponding function from hip to cuda.
note: if we delete the analysis on cuda side, it throw the following error.
** On entry to cusparseSpSV_solve(): cusparseSpSV_analysis() must be executed before cusparseSpSV_solve()
reproducer:
#include <hip/hip_runtime.h>
#include <hipsparse/hipsparse.h>
#include <iostream>
#include <vector>
int main(int argc, char** argv) {
/**
* 3 0 0 0
* 0 1 0 0
* 0 -3 2 0
* -5 0 0 4
*/
int nnz = 6;
std::vector<float> values{3, 1, -3, 2, -5, 4};
std::vector<int> rowptr{0, 1, 2, 4, 6};
std::vector<int> colind{0, 1, 1, 2, 0, 3};
int n = 4;
std::vector<float> rhs{1, 1, 1, 1};
float* dvalues;
int *drowptr, *dcolind;
hipMalloc(&dvalues, sizeof(float) * nnz);
hipMalloc(&drowptr, sizeof(int) * (n + 1));
hipMalloc(&dcolind, sizeof(int) * nnz);
hipMemcpy(dvalues, values.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);
hipMemcpy(drowptr, rowptr.data(), sizeof(int) * (n + 1),
hipMemcpyHostToDevice);
hipMemcpy(dcolind, colind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);
float* drhs;
hipMalloc(&drhs, sizeof(float) * n);
hipMemcpy(drhs, rhs.data(), sizeof(float) * n, hipMemcpyHostToDevice);
float* dx;
hipMalloc(&dx, sizeof(float) * n);
// handle
hipsparseHandle_t handle;
hipsparseSpSVDescr_t spsv_desc;
hipsparseCreate(&handle);
hipsparseSpSV_createDescr(&spsv_desc);
// buffer allocation and analysis
auto diag_type = HIPSPARSE_DIAG_TYPE_NON_UNIT;
auto fill_mode = HIPSPARSE_FILL_MODE_LOWER;
float alpha = 1.0;
hipsparseSpMatDescr_t matA;
hipsparseDnVecDescr_t vecB, vecC;
hipsparseCreateDnVec(&vecB, n, drhs, HIP_R_32F);
hipsparseCreateDnVec(&vecC, n, dx, HIP_R_32F);
// Create sparse matrix A in CSR format
hipsparseCreateCsr(&matA, n, n, nnz, drowptr, dcolind, dvalues,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);
hipsparseSpMatSetAttribute(matA, HIPSPARSE_SPMAT_FILL_MODE, &fill_mode,
sizeof(fill_mode));
hipsparseSpMatSetAttribute(matA, HIPSPARSE_SPMAT_DIAG_TYPE, &diag_type,
sizeof(diag_type));
long unsigned int buffer_size = 0;
hipsparseSpSV_bufferSize(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, &alpha,
matA, vecB, vecC, HIP_R_32F,
HIPSPARSE_SPSV_ALG_DEFAULT, spsv_desc, &buffer_size);
void* buffer;
hipMalloc(&buffer, buffer_size);
hipsparseSpSV_analysis(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, &alpha,
matA, vecB, vecC, HIP_R_32F,
HIPSPARSE_SPSV_ALG_DEFAULT, spsv_desc, buffer);
// if we delete the hipsparse DnVec and SpMat between them?
if (1) {
hipsparseDestroyDnVec(vecC);
hipsparseDestroyDnVec(vecB);
hipsparseDestroySpMat(matA);
hipsparseCreateDnVec(&vecB, n, drhs, HIP_R_32F);
hipsparseCreateDnVec(&vecC, n, dx, HIP_R_32F);
hipsparseCreateCsr(&matA, n, n, nnz, drowptr, dcolind, dvalues,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);
hipsparseSpMatSetAttribute(matA, HIPSPARSE_SPMAT_FILL_MODE, &fill_mode,
sizeof(fill_mode));
hipsparseSpMatSetAttribute(matA, HIPSPARSE_SPMAT_DIAG_TYPE, &diag_type,
sizeof(diag_type));
}
// solve
auto status = hipsparseSpSV_solve(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, matA, vecB, vecC, HIP_R_32F,
HIPSPARSE_SPSV_ALG_DEFAULT, spsv_desc);
std::cout << status << " " << HIPSPARSE_STATUS_INVALID_VALUE << std::endl;
std::vector<float> x(n);
hipMemcpy(x.data(), dx, sizeof(float) * n, hipMemcpyDeviceToHost);
// answer should be the the [1/3 1 2 2/3]'
for (const auto& v : x) {
std::cout << v << std::endl;
}
hipsparseDestroyDnVec(vecC);
hipsparseDestroyDnVec(vecB);
hipsparseDestroySpMat(matA);
hipFree(buffer);
hipFree(dvalues);
hipFree(drowptr);
hipFree(dcolind);
hipFree(dx);
hipFree(drhs);
return 0;
}
Operating System
Rocky Linux 9.5 (Blue Onyx)
CPU
AMD EPYC 7713 64-Core Processor
GPU
MI50 (gfx906)
ROCm Version
ROCm 6.3.2
ROCm Component
hipSPARSE
Steps to Reproduce
No response
(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support
No response
Additional Information
No response
Thank you @yhmtsai !
line 73 is
// if we delete the hipsparse DnVec and SpMat between them? if (1) {
the error message
// rocSPARSE.error.trace: { "function": "csrsv_solve_dispatch",
// "line" : "118",
// "file" : "/long_pathname_so_that_rpms_can_package_the_debug_info/src/rocSPARSE/library/src/level2/rocsparse_csrsv_solve.cpp",
// "status" : "invalid pointer",
// "msg" : "none" }
// rocSPARSE.error.trace: { "function": "csrsv_solve_template",
// "line" : "372",
// "file" : "/long_pathname_so_that_rpms_can_package_the_debug_info/src/rocSPARSE/library/src/level2/rocsparse_csrsv_solve.cpp",
// "status" : "invalid pointer",
// "msg" : "none" }
// rocSPARSE.error.trace: { "function": "spsv_template",
// "line" : "157",
// "file" : "/long_pathname_so_that_rpms_can_package_the_debug_info/src/rocSPARSE/library/src/level2/rocsparse_spsv.cpp",
// "status" : "invalid pointer",
// "msg" : "none" }
// rocSPARSE.error.trace: { "function": "spsv_dynamic_dispatch",
// "line" : "248",
// "file" : "/long_pathname_so_that_rpms_can_package_the_debug_info/src/rocSPARSE/library/src/level2/rocsparse_spsv.cpp",
// "status" : "invalid pointer",
// "msg" : "none" }
// rocSPARSE.error.trace: { "function": "rocsparse_spsv",
// "line" : "349",
// "file" : "/long_pathname_so_that_rpms_can_package_the_debug_info/src/rocSPARSE/library/src/level2/rocsparse_spsv.cpp",
// "status" : "invalid pointer",
// "msg" : "none" }
https://github.com/ROCm/rocSPARSE-internal/blob/develop/library/src/level2/rocsparse_csrsv_solve.cpp#L116
@yhmtsai can you destroy and re-create the cusparse_handle between the analysis phase and the compute phase, and tell me what the cusparse behaviour is?
Sure. cuSPARSE still works by destroying and re-creating cusparse handle. I also create additional handle and use different handle in solve than analysis, and cuSPARSE also works in this case.
This issue has been migrated to: https://github.com/ROCm/rocm-libraries/issues/786
Imported to ROCm/rocm-libraries