Eigen decomposition for Hermitian complex matrix does not match the result with matlab

Open Mirage1089 opened this issue 4 years ago • 1 comments

Hello,

I am following the example of eigen decomposition from here, https://github.com/NVIDIA/CUDALibrarySamples/blob/master/cuSOLVER/syevd/cusolver_syevd_example.cu

I need to do it for complex number matrix. The problem is the eigen vector is not matching the result with Matlab result.

Does anyone have any idea about it where is the mistake in my code?

My code is here for convenience,


#include <cstdio>
#include <cstdlib>
#include <vector>

#include <cuda_runtime.h>
#include <cusolverDn.h>

#include "cusolver_utils.h"

int N = 16;
void BuildMatrix(cuComplex* input);

void main()
{
	cusolverDnHandle_t cusolverH = NULL;
	cudaStream_t stream = NULL;
	printf("*******************\n");

	cuComplex* h_idata = (cuComplex*)malloc(sizeof(cuComplex) * N);
	cuComplex* h_eigenVector = (cuComplex*)malloc(sizeof(cuComplex) * N); // eigen vector
	float* h_eigenValue = (float*)malloc(sizeof(float) * 4);	// eigen Value
	BuildMatrix(h_idata);
	int count = 0;
	for (int i = 0; i < N / 4; i++)
	{
		for (int j = 0; j < 4; j++)
		{
			printf("%f + %f\t", h_idata[count].x, h_idata[count].y);
			count++;
		}
		printf("\n");
	}
	printf("\n*****************\n");

	/* step 1: create cusolver handle, bind a stream */
	CUSOLVER_CHECK(cusolverDnCreate(&cusolverH));

	CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
	CUSOLVER_CHECK(cusolverDnSetStream(cusolverH, stream));

	// step 2: reserve memory in cuda and copy input data from host to device
	cuComplex* d_idata;
	float* d_eigenValue = nullptr;
	int* d_info = nullptr;

	CUDA_CHECK(cudaMalloc((void**)&d_idata, N * sizeof(cuComplex)));
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_eigenValue), N * sizeof(float)));
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_info), sizeof(int)));

	CUDA_CHECK(cudaMemcpyAsync(d_idata, h_idata, N * sizeof(cuComplex), cudaMemcpyHostToDevice, stream));

	// step 3: query working space of syevd
	cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvalues and eigenvectors.
	cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;

	int lwork = 0;            /* size of workspace */
	cuComplex* d_work = nullptr; /* device workspace*/
	const int m = 4;
	const int lda = m;

	cusolverDnCheevd_bufferSize(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, &lwork);

	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_work), sizeof(cuComplex) * lwork));

	// step 4: compute spectrum
	cusolverDnCheevd(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, d_work, lwork, d_info);

	CUDA_CHECK(
		cudaMemcpyAsync(h_eigenVector, d_idata, N * sizeof(cuComplex), cudaMemcpyDeviceToHost, stream));
	CUDA_CHECK(
		cudaMemcpyAsync(h_eigenValue, d_eigenValue, 4 * sizeof(double), cudaMemcpyDeviceToHost, stream));

	int info = 0;
	CUDA_CHECK(cudaMemcpyAsync(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost, stream));

	CUDA_CHECK(cudaStreamSynchronize(stream));

	std::printf("after syevd: info = %d\n", info);
	if (0 > info)
	{
		std::printf("%d-th parameter is wrong \n", -info);
		exit(1);
	}

	count = 0;
	for (int i = 0; i < N / 4; i++)
	{
		for (int j = 0; j < 4; j++)
		{
			printf("%f + %f\t", h_eigenVector[count].x, h_eigenVector[count].y);
			count++;
		}
		printf("\n");
	}
	printf("\n");
	for (int i = 0; i < N / 4; i++)
	{
		std::cout << h_eigenValue[i] << std::endl;
	}
	printf("\n*****************\n");

	/* free resources */
	CUDA_CHECK(cudaFree(d_idata));
	CUDA_CHECK(cudaFree(d_eigenValue));
	CUDA_CHECK(cudaFree(d_info));
	CUDA_CHECK(cudaFree(d_work));

	CUSOLVER_CHECK(cusolverDnDestroy(cusolverH));

	CUDA_CHECK(cudaStreamDestroy(stream));

	CUDA_CHECK(cudaDeviceReset());
}

//0.5560 + 0.0000i - 0.4864 + 0.0548i   0.8592 + 0.2101i - 1.5374 - 0.2069i
//- 0.4864 - 0.0548i   0.4317 + 0.0000i - 0.7318 - 0.2698i   1.3255 + 0.3344i
//0.8592 - 0.2101i - 0.7318 + 0.2698i   1.4099 + 0.0000i - 2.4578 + 0.2609i
//- 1.5374 + 0.2069i   1.3255 - 0.3344i - 2.4578 - 0.2609i   4.3333 + 0.0000i
void BuildMatrix(cuComplex* input)
{
	std::vector<float> realVector = { 0.5560, -0.4864, 0.8592, -1.5374, -0.4864, 0.4317, -0.7318, 1.3255,
									0.8592, -0.7318, 1.4099, -2.4578, -1.5374, 1.3255, -2.4578, 4.3333 };
	std::vector<float> imagVector = { 0, -0.0548, -0.2101, 0.2069, 0.0548, 0.0000, 0.2698, -0.3344,
									 0.2101, -0.2698, 0, -0.2609, -0.2069, 0.3344, 0.2609, 0 };

	for (int i = 0; i < N; i++)
	{
		input[i].x = realVector.at(i) * std::pow(10, 11);
		input[i].y = imagVector.at(i) * std::pow(10, 11);
	}
}

Feb 08 '22 14:02 Mirage1089

Hi, this is somewhat old but in case it is helpful: eigenvectors may have arbitrary phase, to compare them you need to synchronise or eliminate it.

Feb 13 '23 23:02 dmt4