CUDALibrarySamples
CUDALibrarySamples copied to clipboard
Eigen decomposition for Hermitian complex matrix does not match the result with matlab
Hello,
I am following the example of eigen decomposition from here, https://github.com/NVIDIA/CUDALibrarySamples/blob/master/cuSOLVER/syevd/cusolver_syevd_example.cu
I need to do it for complex number matrix. The problem is the eigen vector is not matching the result with Matlab result.
Does anyone have any idea about it where is the mistake in my code?
My code is here for convenience,
#include <cstdio>
#include <cstdlib>
#include <vector>
#include <cuda_runtime.h>
#include <cusolverDn.h>
#include "cusolver_utils.h"
int N = 16;
void BuildMatrix(cuComplex* input);
void main()
{
cusolverDnHandle_t cusolverH = NULL;
cudaStream_t stream = NULL;
printf("*******************\n");
cuComplex* h_idata = (cuComplex*)malloc(sizeof(cuComplex) * N);
cuComplex* h_eigenVector = (cuComplex*)malloc(sizeof(cuComplex) * N); // eigen vector
float* h_eigenValue = (float*)malloc(sizeof(float) * 4); // eigen Value
BuildMatrix(h_idata);
int count = 0;
for (int i = 0; i < N / 4; i++)
{
for (int j = 0; j < 4; j++)
{
printf("%f + %f\t", h_idata[count].x, h_idata[count].y);
count++;
}
printf("\n");
}
printf("\n*****************\n");
/* step 1: create cusolver handle, bind a stream */
CUSOLVER_CHECK(cusolverDnCreate(&cusolverH));
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
CUSOLVER_CHECK(cusolverDnSetStream(cusolverH, stream));
// step 2: reserve memory in cuda and copy input data from host to device
cuComplex* d_idata;
float* d_eigenValue = nullptr;
int* d_info = nullptr;
CUDA_CHECK(cudaMalloc((void**)&d_idata, N * sizeof(cuComplex)));
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_eigenValue), N * sizeof(float)));
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_info), sizeof(int)));
CUDA_CHECK(cudaMemcpyAsync(d_idata, h_idata, N * sizeof(cuComplex), cudaMemcpyHostToDevice, stream));
// step 3: query working space of syevd
cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvalues and eigenvectors.
cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
int lwork = 0; /* size of workspace */
cuComplex* d_work = nullptr; /* device workspace*/
const int m = 4;
const int lda = m;
cusolverDnCheevd_bufferSize(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, &lwork);
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_work), sizeof(cuComplex) * lwork));
// step 4: compute spectrum
cusolverDnCheevd(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, d_work, lwork, d_info);
CUDA_CHECK(
cudaMemcpyAsync(h_eigenVector, d_idata, N * sizeof(cuComplex), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(
cudaMemcpyAsync(h_eigenValue, d_eigenValue, 4 * sizeof(double), cudaMemcpyDeviceToHost, stream));
int info = 0;
CUDA_CHECK(cudaMemcpyAsync(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
std::printf("after syevd: info = %d\n", info);
if (0 > info)
{
std::printf("%d-th parameter is wrong \n", -info);
exit(1);
}
count = 0;
for (int i = 0; i < N / 4; i++)
{
for (int j = 0; j < 4; j++)
{
printf("%f + %f\t", h_eigenVector[count].x, h_eigenVector[count].y);
count++;
}
printf("\n");
}
printf("\n");
for (int i = 0; i < N / 4; i++)
{
std::cout << h_eigenValue[i] << std::endl;
}
printf("\n*****************\n");
/* free resources */
CUDA_CHECK(cudaFree(d_idata));
CUDA_CHECK(cudaFree(d_eigenValue));
CUDA_CHECK(cudaFree(d_info));
CUDA_CHECK(cudaFree(d_work));
CUSOLVER_CHECK(cusolverDnDestroy(cusolverH));
CUDA_CHECK(cudaStreamDestroy(stream));
CUDA_CHECK(cudaDeviceReset());
}
//0.5560 + 0.0000i - 0.4864 + 0.0548i 0.8592 + 0.2101i - 1.5374 - 0.2069i
//- 0.4864 - 0.0548i 0.4317 + 0.0000i - 0.7318 - 0.2698i 1.3255 + 0.3344i
//0.8592 - 0.2101i - 0.7318 + 0.2698i 1.4099 + 0.0000i - 2.4578 + 0.2609i
//- 1.5374 + 0.2069i 1.3255 - 0.3344i - 2.4578 - 0.2609i 4.3333 + 0.0000i
void BuildMatrix(cuComplex* input)
{
std::vector<float> realVector = { 0.5560, -0.4864, 0.8592, -1.5374, -0.4864, 0.4317, -0.7318, 1.3255,
0.8592, -0.7318, 1.4099, -2.4578, -1.5374, 1.3255, -2.4578, 4.3333 };
std::vector<float> imagVector = { 0, -0.0548, -0.2101, 0.2069, 0.0548, 0.0000, 0.2698, -0.3344,
0.2101, -0.2698, 0, -0.2609, -0.2069, 0.3344, 0.2609, 0 };
for (int i = 0; i < N; i++)
{
input[i].x = realVector.at(i) * std::pow(10, 11);
input[i].y = imagVector.at(i) * std::pow(10, 11);
}
}
Hi, this is somewhat old but in case it is helpful: eigenvectors may have arbitrary phase, to compare them you need to synchronise or eliminate it.