PHARE running with mpirun fails for some number of processes? cause?

running the script below with

mpirun -n 6 python3 translat1d.py td 2

fails with :

regriding adding protons on level 1
regriding adding protons on level 1
regriding adding protons on level 1
regriding adding protons on level 1
regriding adding levelghostparticles on level 1 protons
regriding adding levelghostparticles on level 1 protons
regriding adding levelghostparticles on level 1 protons
regriding adding levelghostparticles on level 1 protons
regriding adding levelghostparticles on level 1 protons
P=0000003:Program abort called in file ``/home/aunai/Documents/code/phare/PHARE/subprojects/samrai/source/SAMRAI/tbox/AsyncCommPeer.C'' at line 915
P=0000003:ERROR MESSAGE:
P=0000003:AsyncCommPeer::getRecvData() called without a
P=0000003:corresponding receive.
P=0000000:Program abort called in file ``/home/aunai/Documents/code/phare/PHARE/subprojects/samrai/source/SAMRAI/tbox/AsyncCommPeer.C'' at line 915
P=0000000:ERROR MESSAGE:
P=0000000:AsyncCommPeer::getRecvData() called without a
P=0000000:corresponding receive.
regriding adding levelghostparticles on level 1 protons
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 3 in communicator MPI COMMUNICATOR 3 DUP FROM 0
with errorcode -1.

NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.

but it does not fail right away.... if the number of process is not adequate, why? and should we prevent that ?

#!/usr/bin/env python3

import pyphare.pharein as ph #lgtm [py/import-and-import-from]
from pyphare.pharein import Simulation
from pyphare.pharein import MaxwellianFluidModel
from pyphare.pharein import ElectromagDiagnostics,FluidDiagnostics
from pyphare.pharein import ElectronModel
from pyphare.simulator.simulator import Simulator
from pyphare.pharein import global_vars as gv

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.use('Agg')




def config_uni(**kwargs):
    """ Configure the simulation

    This function defines the Simulation object,
    user initialization model and diagnostics.
    """
    Simulation(
        smallest_patch_size=20,
        largest_patch_size=20,
        time_step_nbr=2000,        # number of time steps (not specified if time_step and final_time provided)
        final_time=20.,             # simulation final time (not specified if time_step and time_step_nbr provided)
        boundary_types="periodic", # boundary condition, string or tuple, length == len(cell) == len(dl)
        cells=500,                # integer or tuple length == dimension
        dl=1,                  # mesh size of the root level, float or tuple
        refinement_boxes={"L0": {"B0": [(100, ), (200, )]},
                          "L1":{"B0":[(300,),(350,)]}},
        diag_options={"format": "phareh5", "options": {"dir": kwargs["diagdir"],"mode":"overwrite"}}
    )


    def density(x):
        return 1.

    def bx(x):
        return 0.

    def by(x):
        return 1.

    def bz(x):
        return 0.5


    def vx(x):
        return kwargs["vx"]

    def vy(x):
        return 0.

    def vz(x):
        return 0.


    def vthx(x):
        return 0.1


    def vthy(x):
        return 0.1


    def vthz(x):
        return 0.1


    vvv = {
        "vbulkx": vx, "vbulky": vy, "vbulkz": vz,
        "vthx": vthx, "vthy": vthy, "vthz": vthz
    }

    MaxwellianFluidModel(
        bx=bx, by=by, bz=bz,
        protons={"charge": 1, "density": density, **vvv}
    )

    ElectronModel(closure="isothermal", Te=0.12)



    sim = ph.global_vars.sim

    timestamps = np.arange(0, sim.final_time +sim.time_step, sim.time_step)



    for quantity in ["E", "B"]:
        ElectromagDiagnostics(
            quantity=quantity,
            write_timestamps=timestamps,
            compute_timestamps=timestamps,
        )


    for quantity in ["density", "bulkVelocity"]:
        FluidDiagnostics(
            quantity=quantity,
            write_timestamps=timestamps,
            compute_timestamps=timestamps,
            )





def config_td(**kwargs):
    """ Configure the simulation

    This function defines the Simulation object,
    user initialization model and diagnostics.
    """
    Simulation(
        smallest_patch_size=20,
        largest_patch_size=20,
        time_step_nbr=2000,         # number of time steps (not specified if time_step and final_time provided)
        final_time=20.,             # simulation final time (not specified if time_step and time_step_nbr provided)
        boundary_types="periodic",  # boundary condition, string or tuple, length == len(cell) == len(dl)
        cells=200,                  # integer or tuple length == dimension
        dl=1.0,                     # mesh size of the root level, float or tuple
        refinement="tagging",
        max_nbr_levels = 3,
        #refinement_boxes={"L0": {"B0": [(50, ), (150, )]},
        #                  "L1":{"B0":[(125,),(175,)]}},
        diag_options={"format": "phareh5", "options": {"dir": kwargs["diagdir"],"mode":"overwrite"}}
    )

    def density(x):
        return 1.


    def S(x,x0,l):
        return 0.5*(1+np.tanh((x-x0)/l))


    def bx(x):
        return 0.


    def by(x):
        from pyphare.pharein.global_vars import sim
        L = sim.simulation_domain()[0]
        v1=-1
        v2=1.
        return v1 + (v2-v1)*(S(x,L*0.25,1) -S(x, L*0.75, 1))


    def bz(x):
        return 0.5


    def b2(x):
        return bx(x)**2 + by(x)**2 + bz(x)**2


    def T(x):
        K = 1
        return 1/density(x)*(K - b2(x)*0.5)


    def vx(x):
        return kwargs["vx"]


    def vy(x):
        return 0.


    def vz(x):
        return 0.


    def vthx(x):
        return T(x)


    def vthy(x):
        return T(x)


    def vthz(x):
        return T(x)


    vvv = {
        "vbulkx": vx, "vbulky": vy, "vbulkz": vz,
        "vthx": vthx, "vthy": vthy, "vthz": vthz
    }

    MaxwellianFluidModel(
        bx=bx, by=by, bz=bz,
        protons={"charge": 1, "density": density, **vvv}
    )

    ElectronModel(closure="isothermal", Te=0.12)



    sim = ph.global_vars.sim

    timestamps = np.arange(0, sim.final_time, sim.time_step)
    print(timestamps)


    for quantity in ["E", "B"]:
        ElectromagDiagnostics(
            quantity=quantity,
            write_timestamps=timestamps,
            compute_timestamps=timestamps,
        )


    for quantity in ["density", "bulkVelocity"]:
        FluidDiagnostics(
            quantity=quantity,
            write_timestamps=timestamps,
            compute_timestamps=timestamps,
            )



import sys

def main():

    case = sys.argv[1] # "td" or "uni"
    print(case)
    if case == "uni":
        config = config_uni
    elif case == "td":
        config = config_td
    else:
        raise ValueError("unknown test case : {case}")
    vx   = float(sys.argv[2])

    params = {"vx":vx, "diagdir":case+"_{:04.1f}".format(vx)}
    print("-----------------------------------")
    print(params)
    print("-----------------------------------")
    config(**params)
    simulator = Simulator(gv.sim)
    print(gv.sim.dl)
    simulator.initialize()
    simulator.run()
    gv.sim = None


if __name__=="__main__":
    main()

Apr 11 '21 10:04 nicolasaunai

maybe make a PR using this something like this: https://github.com/PHAREHUB/PHARE/blob/master/tests/simulator/CMakeLists.txt#L15

would show it being reproduced

Apr 11 '21 10:04 PhilipDeegan

given the slowness of the CI these days I'm not sure this would bring something? Ok crash also seen for 2 cores

Apr 11 '21 15:04 nicolasaunai

Looks like this line is the cause https://github.com/PHAREHUB/PHARE/blob/master/src/amr/messengers/hybrid_hybrid_messenger_strategy.h#L185

stack trace https://gist.github.com/PhilipDeegan/2361926becec3dfb8c1513ca10fbb8d6

Apr 13 '21 08:04 PhilipDeegan

PHARE PHARE copied to clipboard

running with mpirun fails for some number of processes? cause?

PHARE
PHARE copied to clipboard