amgcl
amgcl copied to clipboard
cuda compute error
hi ddemidov I met an issue when using cuda as backend to compute a topology optimization, with grid size 200x100, the code is below. The calculation results of the previous steps in the loop is right, but 'nan' ocurrs when the loop increases. I am confused why there is a nan. The source code and result are below. I will be appreciated for your advice! : )
while (change > 0.01) {
globalstiffnessforBezier_EW(noElems, d_ctrPtsPairArray, ctrPtsPairNum, h_Acoo, d_Acoo, d_coorowidx, d_coocolidx, d_coovalues,
d_ctrPtToElements,
d_edofMat,
d_cxi, d_cet,
x_d, d_KE,
nelx, nely, E0, Emin, noCtrPts);
h_Acsr = h_Acoo;
Vector_h h_u = solveBezierAMG_C(h_Acsr, h_PK, h_F); // solve by amgcl
sensitivityAnalysisBezier_G(h_u, h_F, freedofs, noDofs, nelx, nely,
d_dc, noElems, d_edofMat, penal, E0, Emin, d_KE, x_d, d_cxi, d_cet, obj);
filterBezier_G(nelx, nely, x_d, d_dc, d_dcn, rmin);
ocupdateBezier_G(nelx, nely, volfrac, change, x_d, d_dxn, d_dcn, dd_dv, dd_move, vol_new);
printf("Iter: %d, obj: %lf, Vol: %lf, ch: %lf \n", loop, obj, vol_new, change);
++loop;
}
// solution code
Vector_h solveBezierAMG_C(Matrix_csr_h h_Acsr, Matrix_csr_h h_PK, Vector_h h_F) {
Matrix_csr_h temp1;
Matrix_csr_h temp2;
cusp::multiply(h_PK, h_Acsr, temp1);
Matrix_csr_h h_PK_tran(h_PK.num_cols, h_PK.num_rows, h_PK.num_entries);
cusp::transpose(h_PK, h_PK_tran);
cusp::multiply(temp1, h_PK_tran, temp2);
// A, d_f, d_u
thrust::device_vector<double> dd_f(h_F.begin(), h_F.end());
thrust::device_vector<double> dd_u(h_PK.num_rows, 0);
auto A = std::make_tuple(
temp2.num_rows,
amgcl::make_iterator_range(thrust::raw_pointer_cast(&temp2.row_offsets[0]), thrust::raw_pointer_cast(&temp2.row_offsets[0]) + temp2.num_cols + 1),
amgcl::make_iterator_range(thrust::raw_pointer_cast(&temp2.column_indices[0]), thrust::raw_pointer_cast(&temp2.column_indices[0]) + temp2.row_offsets[temp2.num_cols]),
amgcl::make_iterator_range(thrust::raw_pointer_cast(&temp2.values[0]), thrust::raw_pointer_cast(&temp2.values[0]) + temp2.row_offsets[temp2.num_cols])
);
int iters;
double error;
// solve
typedef amgcl::backend::cuda<double> Backend;
Backend::params bprm;
cusparseCreate(&bprm.cusparse_handle);
/*typedef amgcl::make_solver<
amgcl::relaxation::as_preconditioner<
amgcl::backend::cuda<double>,
amgcl::relaxation::ilu0>,
amgcl::solver::bicgstab<amgcl::backend::cuda<double>>>
Solver_cuda_bicgstab_ilut;
Solver_cuda_bicgstab_ilut::params pm1;
pm1.solver.maxiter = 1000;
pm1.solver.tol = 1e-6;
Solver_cuda_bicgstab_ilut solver(A, pm1, bprm);
std::tie(iters, error) = solver(dd_f, dd_u);*/
typedef amgcl::make_solver<
amgcl::amg<
Backend,
amgcl::coarsening::smoothed_aggregation,
amgcl::relaxation::spai0
>,
amgcl::solver::cg<Backend>
> Solver;
Solver::params prm;
prm.solver.maxiter = 1000;
prm.solver.tol = 1e-6;
prm.precond.coarsening.aggr.eps_strong = 1e-3;
Solver solve(A, prm, bprm);
std::tie(iters, error) = solve(dd_f, dd_u);
std::cout << "iters: " << iters << ", " << "error: " << error << ".\n";
Vector_h h_u(dd_u.begin(), dd_u.end());
return h_u;
}
right results:
Iter: 0, obj: 818.390559, Vol: 0.399862, ch: 0.200000
Iter: 1, obj: 453.533500, Vol: 0.399963, ch: 0.200000
Iter: 2, obj: 317.637122, Vol: 0.400102, ch: 0.200000
Iter: 3, obj: 247.159557, Vol: 0.400144, ch: 0.200000
Iter: 4, obj: 219.932016, Vol: 0.399920, ch: 0.200000
Iter: 5, obj: 201.616468, Vol: 0.399927, ch: 0.200000
Iter: 6, obj: 187.913326, Vol: 0.399951, ch: 0.200000
Iter: 7, obj: 175.438001, Vol: 0.399829, ch: 0.200000
Iter: 8, obj: 165.994110, Vol: 0.400065, ch: 0.190254
Iter: 9, obj: 158.196875, Vol: 0.400155, ch: 0.178441
Iter: 10, obj: 151.818454, Vol: 0.400174, ch: 0.185031
Iter: 11, obj: 145.985506, Vol: 0.399965, ch: 0.166418
Iter: 12, obj: 140.946531, Vol: 0.399942, ch: 0.164372
Iter: 13, obj: 136.660098, Vol: 0.400098, ch: 0.181497
Iter: 14, obj: 132.494776, Vol: 0.400086, ch: 0.189367
Iter: 15, obj: 128.215185, Vol: 0.399951, ch: 0.189229
Iter: 16, obj: 123.636787, Vol: 0.399986, ch: 0.197572
Iter: 17, obj: 118.764274, Vol: 0.399958, ch: 0.200000
Iter: 18, obj: 114.142652, Vol: 0.400090, ch: 0.200000
Iter: 19, obj: 110.909720, Vol: 0.399906, ch: 0.178289
my results with amgcl:
iters: 131, error: 8.30938e-07.
Iter: 0, obj: 818.403163, Vol: 0.400240, ch: 0.200000
iters: 154, error: 6.65027e-07.
Iter: 1, obj: 452.518742, Vol: 0.399971, ch: 0.200000
iters: 168, error: 9.3293e-07.
Iter: 2, obj: 317.630086, Vol: 0.399974, ch: 0.200000
iters: 190, error: 9.01877e-07.
Iter: 3, obj: 247.339860, Vol: 0.400200, ch: 0.200000
iters: 215, error: 8.96052e-07.
Iter: 4, obj: 203.122346, Vol: 0.400068, ch: 0.200000
iters: 235, error: 9.42463e-07.
Iter: 5, obj: 170.154033, Vol: 0.400202, ch: 0.200000
iters: 260, error: 9.41993e-07.
Iter: 6, obj: 145.739339, Vol: 0.400219, ch: 0.200000
iters: 276, error: 7.61956e-07.
Iter: 7, obj: 126.226413, Vol: 0.399999, ch: 0.200000
iters: 291, error: 9.38887e-07.
Iter: 8, obj: 111.270963, Vol: 0.400115, ch: 0.200000
iters: 305, error: 8.50583e-07.
Iter: 9, obj: 98.469317, Vol: 0.400071, ch: 0.200000
iters: 320, error: 8.74598e-07.
Iter: 10, obj: 87.786592, Vol: -nan(ind), ch: 0.200000
iters: 0, error: nan.
Iter: 11, obj: 0.000000, Vol: -nan(ind), ch: -nan(ind)