Asynchronous `fill` for variables
Traditional initialization code executed in serial on CPU:
VariableCellInteger var_ci(...);
VariableCellReal var_cr(...);
MaterialVariableCellReal var_mr1(...);
MaterialVariableCellReal var_mr2(...);
...
MaterialVariableCellReal var_mr7(...);
VariableNodeReal var_nr(...);
VariableNodeReal3 var_nr3(...);
var_ci.fill(1);
var_cr.fill(0.0);
var_mr1.fill(0.0);
var_mr2.fill(1.0);
...
var_mr7.fill(0.0);
var_nr.fill(0.0);
var_nr3.fill(Real3::zero());
// Computations
ENUMERATE_CELL(icell, cell_group) {
var_cr[cid] += ... ;
...;
}
Today, we have to explicitly write asynchronous loops to initialize the variables on GPU (note we try to minimize the number of kernels, we don't have "1 var init" = "1 kernel"):
Ref<RunQueue> async_queue = makeQueueRef(...->defaultRunner());
async_queue->setAsync(true);
auto command_c = makeCommand(async_queue.get());
auto out_var_ci = ax::viewOut(command_c, var_ci);
auto out_var_cr = ax::viewOut(command_c, var_cr);
auto out_var_mr1 = ax::viewOut(command_c, var_mr1);
auto out_var_mr2 = ax::viewOut(command_c, var_mr2);
...
auto out_var_mr7 = ax::viewOut(command_c, var_mr7);
CellToAllEnvCellAccessor cell2allenvcell(mesh_material_mng);
command_c << RUNCOMMAND_ENUMERATE_CELL_ALLENVCELL(cell2allenvcell, cid, allCells())
{
out_var_ci[cid] = 1;
out_var_cr[cid] = 0.0;
out_var_mr1[cid] = 0.0;
out_var_mr2[cid] = 1.0;
...
out_var_mr7[cid] = 0.0;
if (cell2allenvcell.nbEnvironment(cid) > 1)
{
ENUMERATE_CELL_ALLENVCELL(iev, cid, cell2allenvcell) {
out_var_mr1[*iev] = 0.0;
out_var_mr2[*iev] = 1.0;
...
out_var_mr7[*iev] = 0.0;
}
}
}:
auto command_n = makeCommand(async_queue.get());
auto out_var_nr = ax::viewOut(command_n, var_nr);
auto out_var_nr3 = ax::viewOut(command_n, var_nr3);
command_n << RUNCOMMAND_ENUMERATE(Node, nid, allNodes())
{
out_var_nr[nid] = 0.0;
out_var_nr3[nid] = Real3::zero();
};
// Asynchronous computations
auto command = makeCommand(async_queue.get());
command << RUNCOMMAND_ENUMERATE(Cell, cid, cell_group) {
... ;
};
async_queue->barrier();
We would like to asynchronously fill on an asynchronous queue like this:
Ref<RunQueue> async_queue = makeQueueRef(...->defaultRunner());
async_queue->setAsync(true);
// Asynchronous initializations
asyncFill<Cell>(async_queue, {var_ci, 1}, {var_cr, 0.0}, {var_mr1, 0.0}, {var_mr2, 1.0}, ..., {var_mr7, 0.0});
// Or ? asyncFill<MatCell>(async_queue, {var_mr1, 0.0}, {var_mr2, 1.0}, ..., {var_mr7, 0.0}); ?
asyncFill<Node>(async_queue, {var_nr, 0.0}, {var_nr3, Real3::zero()});
// Asynchronous computations
auto command = makeCommand(async_queue.get());
command << RUNCOMMAND_ENUMERATE(Cell, cid, cell_group) {
... ;
};
// Synchronization
async_queue->barrier();
Would it possible?
Yes I think it will be possible.
In your first example, there is no fill for the material part of the variable (only the environment part). Is it because you suppose there is only one material in your environment ?
That's true, in my example I assume I have only one material in my environment.
To be more general, we have to consider that asyncFill would perform the equivalent of the original fill but in a asynchronous way through the accelerator API.
It is implemented for Variables in #991. Next step if for Material Variables.