scalapack
scalapack copied to clipboard
Wrong lwmin in pdstedc
Inside pdstedc
function pdlasrt
is called with passing of lwork
. But: lwmin
for the 1st one is
6*n + 2*np*nq
while lwmin
for the 2nd function is
lwmin = max( n, np*( nb+nq ) )
and sometimes this can lead to error in pdlasrt
, despite lwork
in pdsted
equal to lwmin
. For example, nq
is small and np
$\approx$ nb
:
$6n + 2np*nq < np*nb < nb*(nb + nq)$
Code for reproduction (I can't load file) and typical output is below.
Typical output for mpirun -np 4 ./a.out 128 127
:
(0x1): m = n = 128 mb = nb = 70
(1x0): m = n = 128 mb = nb = 70
(1x1): m = n = 128 mb = nb = 70
(1x1): requested work = 7496 and iwork = 914
(0x0): m = n = 128 mb = nb = 70
(0x0): requested work = 10568 and iwork = 914
(1x0): requested work = 8888 and iwork = 914
(0x1): requested work = 8888 and iwork = 914
{ 0, 1}: On entry to PDLASRT parameter number 9 had an illegal value
error in pdstedc call, code = -9
C++ code below request minimal workspaces for pdstedc
and call it with corresponding lwork
and liwork
.
#include <mpi.h>
#include <iostream>
/*
To compile:
mpic++ test.cpp -L PATH_TO_LIBS -lscalapack -llapack -lrefblas -lgfortran
To run:
mpirun -np 4 ./a.out n nb
*/
extern "C" {
void Cblacs_get(int, int, int*);
void Cblacs_gridinit(int*, const char*, int, int);
void Cblacs_gridinfo(int, int*, int*, int*,int*);
void Cblacs_gridexit(int);
int numroc_(const int* n, int* nb, const int* iproc, const int* isrc, const int* nprocs);
void descinit_(int* desc, const int* m, const int* n, const int* mb, const int* nb,
const int* irsrc, const int* icsrc, const int* ictxt, const int* ld, int* info);
void pdstedc_( const char* comz, const int* n, double* d, double* e, double* q, const int* iq,
const int* jq, const int* descq, double* work, const int* lwork, int* iwork, const int* liwork,
int* info);
}
int main(int argc, char **argv) {
// some constatns
constexpr int bignum = 1'000'000;
const int izero = 0;
const int ione = 1;
const char compz = 'I';
// init grid 2x2
int ictxt, myrow, mycol;
int p = 2;
int q = 2;
MPI_Init(&argc, &argv);
int myrank;
int nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
Cblacs_get(0, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row-major", p, q);
Cblacs_gridinfo(ictxt, &p, &q, &myrow, &mycol);
if (argc != 3)
{
std::cerr<<"wrong argC"<<std::endl;
return 1;
}
const int mn = std::stoi(argv[1]);
const int mnb = std::stoi(argv[2]);
std::cout<<"("<<myrow<<"x"<<mycol<<"): "<<"m = n = "<<mn<<" mb = nb = "<<mnb<<std::endl;
int info;
// init input arguments for pdstedc
int descq[9];
descinit_(descq, &mn, &mn, &mnb, &mnb, &izero, &izero, &ictxt, &mn, &info);
if (info != 0)
{
std::cerr<<"error in descinit, code = "<<info<<std::endl;
return 0;
}
// allocate memory (I hope, that requested size < bignum)
double* d = new double[bignum];
double* e = new double[bignum];
double* qmat = new double[bignum];
double* work = new double[bignum];
int* iwork = new int[bignum];
double workReq = 0.0/0.0;
int iworkReq = -1000;
const int& imone = -1;
// request minimal workspaces in pdstedc
pdstedc_(&compz, &mn, d, e, qmat, &ione, &ione, descq, &workReq, &imone, &iworkReq, &imone, &info);
if (info != 0)
{
std::cerr<<"error in workspaces request, code = "<<info<<std::endl;
return 0;
}
std::cout<<"("<<myrow<<"x"<<mycol<<"): "<<"requested work = "<<workReq<<" and iwork = "<<iworkReq<<std::endl;
const int lwork = workReq;
const int liwork = iworkReq;
// run pdstedc with minimal workspaces
pdstedc_(&compz, &mn, d, e, qmat, &ione, &ione, descq, work, &lwork, iwork, &liwork, &info);
if (info != 0)
{
std::cerr<<"error in pdstedc call, code = "<<info<<std::endl;
return 0;
}
delete[] d;
delete[] e;
delete[] qmat;
delete[] work;
delete[] iwork;
Cblacs_gridexit(ictxt);
MPI_Finalize();
}
Note: the same problem is actual for single precision versions.
UPD.
Looks like, this will lead to similar bug in p(z|c)heevd
function, since minimal size of workspace for real numbers is about 2*np*nq
for it; a few lines from pzheevd.f
:
* LRWORK (local input) INTEGER
* Size of RWORK array.
* LRWORK >= 1 + 9*N + 3*NP*NQ,
.....
INDZ = INDE + N
INDRWORK = INDZ + NP0*NQ0
LLRWORK = LRWORK - INDRWORK + 1
....
CALL PDSTEDC( 'I', N, W, RWORK( INDE+OFFSET ), RWORK( INDZ ), IZ,
$ JZ, DESCRZ, RWORK( INDRWORK ), LLRWORK, IWORK,
$ LIWORK, IINFO )