amgcl cuda compute error

cuda compute error

Open ripplesding opened this issue 1 year ago • 2 comments

hi ddemidov I met an issue when using cuda as backend to compute a topology optimization, with grid size 200x100, the code is below. The calculation results of the previous steps in the loop is right, but 'nan' ocurrs when the loop increases. I am confused why there is a nan. The source code and result are below. I will be appreciated for your advice! : )

while (change > 0.01) {
	globalstiffnessforBezier_EW(noElems, d_ctrPtsPairArray, ctrPtsPairNum, h_Acoo, d_Acoo, d_coorowidx, d_coocolidx, d_coovalues,
			d_ctrPtToElements,
			d_edofMat,
			d_cxi, d_cet,
			x_d, d_KE,
			nelx, nely, E0, Emin, noCtrPts);
	h_Acsr = h_Acoo;
		
	Vector_h h_u = solveBezierAMG_C(h_Acsr, h_PK, h_F);  // solve by amgcl
	sensitivityAnalysisBezier_G(h_u, h_F, freedofs, noDofs, nelx, nely,
			d_dc, noElems, d_edofMat, penal, E0, Emin, d_KE, x_d, d_cxi, d_cet, obj);
	filterBezier_G(nelx, nely, x_d, d_dc, d_dcn, rmin);
	ocupdateBezier_G(nelx, nely, volfrac, change, x_d, d_dxn, d_dcn, dd_dv, dd_move, vol_new);
	printf("Iter: %d, obj: %lf, Vol: %lf, ch: %lf \n", loop, obj, vol_new, change);
	++loop;
}

// solution code 
Vector_h solveBezierAMG_C(Matrix_csr_h h_Acsr, Matrix_csr_h h_PK, Vector_h h_F) {

	Matrix_csr_h temp1;
	Matrix_csr_h temp2;
	cusp::multiply(h_PK, h_Acsr, temp1);
	Matrix_csr_h h_PK_tran(h_PK.num_cols, h_PK.num_rows, h_PK.num_entries);
	cusp::transpose(h_PK, h_PK_tran);
	cusp::multiply(temp1, h_PK_tran, temp2);


	// A, d_f, d_u
	thrust::device_vector<double> dd_f(h_F.begin(), h_F.end());
	thrust::device_vector<double> dd_u(h_PK.num_rows, 0);

	auto A = std::make_tuple(
		temp2.num_rows,
		amgcl::make_iterator_range(thrust::raw_pointer_cast(&temp2.row_offsets[0]), thrust::raw_pointer_cast(&temp2.row_offsets[0]) + temp2.num_cols + 1),
		amgcl::make_iterator_range(thrust::raw_pointer_cast(&temp2.column_indices[0]), thrust::raw_pointer_cast(&temp2.column_indices[0]) + temp2.row_offsets[temp2.num_cols]),
		amgcl::make_iterator_range(thrust::raw_pointer_cast(&temp2.values[0]), thrust::raw_pointer_cast(&temp2.values[0]) + temp2.row_offsets[temp2.num_cols])
	);
	int iters;
	double error;

	// solve
	typedef amgcl::backend::cuda<double> Backend;
	Backend::params bprm;
	cusparseCreate(&bprm.cusparse_handle);

	/*typedef amgcl::make_solver<
		amgcl::relaxation::as_preconditioner<
		amgcl::backend::cuda<double>,
		amgcl::relaxation::ilu0>,
		amgcl::solver::bicgstab<amgcl::backend::cuda<double>>>
		Solver_cuda_bicgstab_ilut;

	Solver_cuda_bicgstab_ilut::params pm1;
	pm1.solver.maxiter = 1000;
	pm1.solver.tol = 1e-6;
	Solver_cuda_bicgstab_ilut solver(A, pm1, bprm);
	std::tie(iters, error) = solver(dd_f, dd_u);*/

	typedef amgcl::make_solver<
		amgcl::amg<
		Backend,
		amgcl::coarsening::smoothed_aggregation,
		amgcl::relaxation::spai0
		>,
		amgcl::solver::cg<Backend>
	> Solver;
	Solver::params prm;
	prm.solver.maxiter = 1000;
	prm.solver.tol = 1e-6;
	prm.precond.coarsening.aggr.eps_strong = 1e-3;
	Solver solve(A, prm, bprm);
	std::tie(iters, error) = solve(dd_f, dd_u);

	std::cout << "iters: " << iters << "， " << "error: " << error << ".\n";
	Vector_h h_u(dd_u.begin(), dd_u.end());

	return h_u;
}

right results:
Iter: 0, obj: 818.390559, Vol: 0.399862, ch: 0.200000
Iter: 1, obj: 453.533500, Vol: 0.399963, ch: 0.200000
Iter: 2, obj: 317.637122, Vol: 0.400102, ch: 0.200000
Iter: 3, obj: 247.159557, Vol: 0.400144, ch: 0.200000
Iter: 4, obj: 219.932016, Vol: 0.399920, ch: 0.200000
Iter: 5, obj: 201.616468, Vol: 0.399927, ch: 0.200000
Iter: 6, obj: 187.913326, Vol: 0.399951, ch: 0.200000
Iter: 7, obj: 175.438001, Vol: 0.399829, ch: 0.200000
Iter: 8, obj: 165.994110, Vol: 0.400065, ch: 0.190254
Iter: 9, obj: 158.196875, Vol: 0.400155, ch: 0.178441
Iter: 10, obj: 151.818454, Vol: 0.400174, ch: 0.185031
Iter: 11, obj: 145.985506, Vol: 0.399965, ch: 0.166418
Iter: 12, obj: 140.946531, Vol: 0.399942, ch: 0.164372
Iter: 13, obj: 136.660098, Vol: 0.400098, ch: 0.181497
Iter: 14, obj: 132.494776, Vol: 0.400086, ch: 0.189367
Iter: 15, obj: 128.215185, Vol: 0.399951, ch: 0.189229
Iter: 16, obj: 123.636787, Vol: 0.399986, ch: 0.197572
Iter: 17, obj: 118.764274, Vol: 0.399958, ch: 0.200000
Iter: 18, obj: 114.142652, Vol: 0.400090, ch: 0.200000
Iter: 19, obj: 110.909720, Vol: 0.399906, ch: 0.178289

my results with amgcl:
iters: 131， error: 8.30938e-07.
Iter: 0, obj: 818.403163, Vol: 0.400240, ch: 0.200000
iters: 154， error: 6.65027e-07.
Iter: 1, obj: 452.518742, Vol: 0.399971, ch: 0.200000
iters: 168， error: 9.3293e-07.
Iter: 2, obj: 317.630086, Vol: 0.399974, ch: 0.200000
iters: 190， error: 9.01877e-07.
Iter: 3, obj: 247.339860, Vol: 0.400200, ch: 0.200000
iters: 215， error: 8.96052e-07.
Iter: 4, obj: 203.122346, Vol: 0.400068, ch: 0.200000
iters: 235， error: 9.42463e-07.
Iter: 5, obj: 170.154033, Vol: 0.400202, ch: 0.200000
iters: 260， error: 9.41993e-07.
Iter: 6, obj: 145.739339, Vol: 0.400219, ch: 0.200000
iters: 276， error: 7.61956e-07.
Iter: 7, obj: 126.226413, Vol: 0.399999, ch: 0.200000
iters: 291， error: 9.38887e-07.
Iter: 8, obj: 111.270963, Vol: 0.400115, ch: 0.200000
iters: 305， error: 8.50583e-07.
Iter: 9, obj: 98.469317, Vol: 0.400071, ch: 0.200000
iters: 320， error: 8.74598e-07.
Iter: 10, obj: 87.786592, Vol: -nan(ind), ch: 0.200000
iters: 0， error: nan.
Iter: 11, obj: 0.000000, Vol: -nan(ind), ch: -nan(ind)

Apr 04 '23 09:04 ripplesding

amgcl amgcl copied to clipboard

cuda compute error

amgcl
amgcl copied to clipboard