Bug in MPI-parallel p4est meshes for heavily scattered elements/MPI ranks
In preparation of #2331 I noticed that for the elixir elixir_euler_NACA6412airfoil_mach2.jl the MPI-parallel simulation gives wrong results compared to single/multi-threaded simulation.
In particular, for the simplified elixir (without different boundaries)
using Trixi
using OrdinaryDiffEqSSPRK, OrdinaryDiffEqLowStorageRK
###############################################################################
# semidiscretization of the compressible Euler equations
equations = CompressibleEulerEquations2D(1.4)
@inline function initial_condition_mach2_flow(x, t, equations::CompressibleEulerEquations2D)
# set the freestream flow parameters
rho_freestream = 1.4
#v1 = 2.0
v1 = 0.0 # Simplify setup
v2 = 0.0
p_freestream = 1.0
prim = SVector(rho_freestream, v1, v2, p_freestream)
return prim2cons(prim, equations)
end
initial_condition = initial_condition_mach2_flow
# Supersonic inflow boundary condition.
# Calculate the boundary flux entirely from the external solution state, i.e., set
# external solution state values for everything entering the domain.
@inline function boundary_condition_supersonic_inflow(u_inner,
normal_direction::AbstractVector,
x, t, surface_flux_function,
equations::CompressibleEulerEquations2D)
u_boundary = initial_condition_mach2_flow(x, t, equations)
flux = Trixi.flux(u_boundary, normal_direction, equations)
return flux
end
# Supersonic outflow boundary condition.
# Calculate the boundary flux entirely from the internal solution state. Analogous to supersonic inflow
# except all the solution state values are set from the internal solution as everything leaves the domain
@inline function boundary_condition_supersonic_outflow(u_inner,
normal_direction::AbstractVector, x,
t,
surface_flux_function,
equations::CompressibleEulerEquations2D)
flux = Trixi.flux(u_inner, normal_direction, equations)
return flux
end
polydeg = 3
surface_flux = flux_lax_friedrichs
volume_flux = flux_ranocha
basis = LobattoLegendreBasis(polydeg)
shock_indicator = IndicatorHennemannGassner(equations, basis,
alpha_max = 0.5,
alpha_min = 0.001,
alpha_smooth = true,
variable = density_pressure)
volume_integral = VolumeIntegralShockCapturingHG(shock_indicator;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)
# DG Solver
solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
volume_integral = volume_integral)
# Mesh generated from the following gmsh geometry input file:
# https://gist.githubusercontent.com/DanielDoehring/5ade6d93629f0d8c23a598812dbee2a9/raw/d2bc904fe92146eae1a36156e7f5c535dc1a80f1/NACA6412.geo
mesh_file = joinpath(@__DIR__, "mesh_NACA6412.inp")
isfile(mesh_file) ||
Trixi.download("https://gist.githubusercontent.com/DanielDoehring/e2a389f04f1e37b33819b9637e8ee4c3/raw/4bf7607a2ce4432fdb5cb87d5e264949b11bd5d7/mesh_NACA6412.inp",
mesh_file)
boundary_symbols = [:PhysicalLine1, :PhysicalLine2, :PhysicalLine3, :PhysicalLine4]
#mesh = P4estMesh{2}(mesh_file, polydeg = polydeg, boundary_symbols = boundary_symbols)
mesh = P4estMesh{2}(mesh_file, polydeg = polydeg) # Simplify setup: Use no BCs
boundary_conditions = Dict(:PhysicalLine1 => boundary_condition_supersonic_inflow, # Left boundary
:PhysicalLine2 => boundary_condition_supersonic_outflow, # Right boundary
:PhysicalLine3 => boundary_condition_supersonic_outflow, # Top and bottom boundary
:PhysicalLine4 => boundary_condition_slip_wall) # Airfoil
boundary_conditions = Dict(:all => boundary_condition_slip_wall) # Simplify setup: All bnds set to wall
semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
boundary_conditions = boundary_conditions)
tspan = (0.0, 0.1)
ode = semidiscretize(semi, tspan)
summary_callback = SummaryCallback()
analysis_interval = 1000
analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
stepsize_callback = StepsizeCallback(cfl = 4.0)
save_solution = SaveSolutionCallback(interval = 4000,
save_initial_solution = false,
save_final_solution = true,
solution_variables = cons2prim)
callbacks = CallbackSet(summary_callback,
analysis_callback,
save_solution,
stepsize_callback)
# Run the simulation
###############################################################################
sol = solve(ode, SSPRK104(; thread = Trixi.True());
dt = 1.0, # overwritten by the `stepsize_callback`
ode_default_options()...,
callback = callbacks);
One gets for the MPI simulation with 2 ranks the following error report:
────────────────────────────────────────────────────────────────────────────────────────────────────
Simulation running 'CompressibleEulerEquations2D' with DGSEM(polydeg=3)
────────────────────────────────────────────────────────────────────────────────────────────────────
#timesteps: 30 run time: 4.13845889e-01 s
Δt: 3.17500708e-03 └── GC time: 0.00000000e+00 s (0.000%)
sim. time: 1.00000000e-01 (100.000%) time/DOF/rhs!: 7.54297026e-08 s
PID: 1.44723253e-07 s
#DOFs per field: 14016 alloc'd memory: 350.857 MiB
#elements: 876
Variable: rho rho_v1 rho_v2 rho_e
L2 error: 7.11105785e-16 4.69939591e-15 6.33788806e-15 2.18807215e-15
Linf error: 5.50670620e-14 2.16941714e-13 4.15677194e-13 1.38999923e-13
∑∂S/∂U ⋅ Uₜ : 7.19453088e-28
────────────────────────────────────────────────────────────────────────────────────────────────────
vs 2 threads
────────────────────────────────────────────────────────────────────────────────────────────────────
Simulation running 'CompressibleEulerEquations2D' with DGSEM(polydeg=3)
────────────────────────────────────────────────────────────────────────────────────────────────────
#timesteps: 30 run time: 4.82772657e-01 s
Δt: 3.17500708e-03 └── GC time: 0.00000000e+00 s (0.000%)
sim. time: 1.00000000e-01 (100.000%) time/DOF/rhs!: 1.09081139e-07 s
PID: 1.80980468e-07 s
#DOFs per field: 14016 alloc'd memory: 457.552 MiB
#elements: 876
Variable: rho rho_v1 rho_v2 rho_e
L2 error: 9.21404117e-16 4.94766532e-15 6.59203882e-15 2.37473843e-15
Linf error: 5.52891066e-14 2.16683843e-13 4.17155306e-13 1.38999923e-13
∑∂S/∂U ⋅ Uₜ : 6.99590636e-28
────────────────────────────────────────────────────────────────────────────────────────────────────
I suspect that this is due to the violation of some underlying assumption in the MPI parallel solver. In particular, there might be the assumption that there are no "scattered" cells like in this case:
which is a consequence of the very scattered element ids:
In contrast, for the SD7003 airfoil that is now also used for the test, the MPI parallel implementation gives the same results as the standard simulation run.
Here, things are also way more regular:
@andrewwinters5000 @sloede @amrueda @benegee
Thanks for reporting this. Where exactly is the "error" or are you referring to the fact that the simulation results are not binary identical?
@benegee is it possible that this is related to the bug you described for when you have an initial refinement level of zero?
So far I can only confirm what you observed, @DanielDoehring. I see almost the exact numbers when running with threads or MPI (MPI.jl and system OpenMPI).
So for the reduced case (and especially the short runtime) this is innocent looking, but for the real simulation the results are more or less completely different (which becomes also apparent upon plotting):
MPI 2 Ranks:
────────────────────────────────────────────────────────────────────────────────────────────────────
Simulation running 'CompressibleEulerEquations2D' with DGSEM(polydeg=3)
────────────────────────────────────────────────────────────────────────────────────────────────────
#timesteps: 2707 run time: 2.02162206e+01 s
Δt: 1.36907006e-03 └── GC time: 0.00000000e+00 s (0.000%)
sim. time: 4.00000000e+00 (100.000%) time/DOF/rhs!: 1.08554068e-07 s
PID: 1.09998185e-07 s
#DOFs per field: 14016 alloc'd memory: 607.558 MiB
#elements: 876
Variable: rho rho_v1 rho_v2 rho_e
L2 error: 8.32128633e-01 6.83984585e-01 6.51280605e-01 2.88845749e+00
Linf error: 2.93951933e+00 2.81514649e+00 2.39722475e+00 9.64809217e+00
∑∂S/∂U ⋅ Uₜ : -3.30412101e-01
────────────────────────────────────────────────────────────────────────────────────────────────────
Serial/2 Threads:
────────────────────────────────────────────────────────────────────────────────────────────────────
Simulation running 'CompressibleEulerEquations2D' with DGSEM(polydeg=3)
────────────────────────────────────────────────────────────────────────────────────────────────────
#timesteps: 2738 run time: 3.24220763e+01 s
Δt: 2.97410713e-04 └── GC time: 1.43908126e-01 s (0.444%)
sim. time: 4.00000000e+00 (100.000%) time/DOF/rhs!: 1.74224815e-07 s
PID: 1.77655654e-07 s
#DOFs per field: 14016 alloc'd memory: 286.243 MiB
#elements: 876
Variable: rho rho_v1 rho_v2 rho_e
L2 error: 6.96918814e-01 7.23552226e-01 4.79487235e-01 2.45545470e+00
Linf error: 2.81948946e+00 3.08016534e+00 2.15999276e+00 9.14979570e+00
∑∂S/∂U ⋅ Uₜ : 2.41134420e-01
────────────────────────────────────────────────────────────────────────────────────────────────────
This is weird!
Can it be that some rotations between elements (that only occur at MPI interfaces) are not accounted for by index_to_start_step_2d? or maybe interfaces.node_indices is not defined correctly for the abaqus mesh that you are reading in and it only shows for certain MPI interfaces?
I have not investigated this further - but yeah, I guess the mpiinterfaces must be the troublemaker here.
Hi, i have encountered a similar bug when running examples/p4est_2d_dgsem/elixir_euler_forward_step_amr.jl with MPI.jl
mpiexec() do cmd run($cmd -n 8 $(Base.julia_cmd()) --threads=1 --project=@. -e 'using Trixi; trixi_include(/storage/home/yiyang/work/Trixi.jl/examples/p4est_2d_dgsem/elixir_euler_forward_step_amr.jl)') end
The simulation terminated after around 600000 steps with following error:
ERROR: LoadError: DomainError with -0.07064803336262893: Exponentiation yielding a complex result requires a complex argument.
while calculating the boudary_flux.
I think it's related to this issue, the "error" amplified into a bug over sufficiently long runtime.
@YiyangZhuu The failure you are seeing might also be caused by loss of positivity. The forward facing step can be quite prone to this, particularly at the corner point. One thing to try would be to swap the order in the positivity limiter to limit the pressure before the density, e.g., change the corresponding lines in the elixir to be
# positivity limiter necessary for this example with strong shocks
stage_limiter! = PositivityPreservingLimiterZhangShu(thresholds = (5.0e-6, 5.0e-6),
variables = (pressure, Trixi.density))
Hi, i have encountered a similar bug when running examples/p4est_2d_dgsem/elixir_euler_forward_step_amr.jl with MPI.jl
mpiexec() do cmd run($cmd -n 8 $(Base.julia_cmd()) --threads=1 --project=@. -e 'using Trixi; trixi_include(/storage/home/yiyang/work/Trixi.jl/examples/p4est_2d_dgsem/elixir_euler_forward_step_amr.jl)') end
Have you tried the thread-parallel only version? I.e., $cmd -n 1 $(Base.julia_cmd()) --threads=8? Only in this case we are talking about the same bug
Hi, i have encountered a similar bug when running examples/p4est_2d_dgsem/elixir_euler_forward_step_amr.jl with MPI.jl
mpiexec() do cmd run($cmd -n 8 $(Base.julia_cmd()) --threads=1 --project=@. -e 'using Trixi; trixi_include(/storage/home/yiyang/work/Trixi.jl/examples/p4est_2d_dgsem/elixir_euler_forward_step_amr.jl)') endHave you tried the thread-parallel only version? I.e.,
$cmd -n 1 $(Base.julia_cmd()) --threads=8? Only in this case we are talking about the same bug
is it equivalent to set 'JULIA_NUM_THREADS=8' at startup? I used it and it worked fine
So for thread only you can just call julia --threads 8 for instance.