Non hpx-mpi communication in multithreaded/funneled interleaved with channels, freezes hpx.
Expected Behavior
Doing MPI calls that don't involve HPX shouldn't produce segfaults.
Actual Behavior
Doing MPI calls that don't involve HPX produces segfaults.
Steps to Reproduce the Problem
Run the following several times with at some point it will segfault
mpirun -n 4 ./mpi_conflict.cpp --hpx:threads 4
If you only use one hpx thread, it won't segfault
mpirun -n 4 ./mpi_conflict.cpp --hpx:threads 1
Click to expand code sample
#include <hpx/hpx_init.hpp>
#include <hpx/channel.hpp>
#include <boost/mpi.hpp>
constexpr int number_of_trails = 1000'000;
int hpx_main() {
boost::mpi::environment theEnvironment(boost::mpi::threading::multiple);
boost::mpi::communicator mpi_world;
int data_send = 4;
int data_recieve;
int my_rank = mpi_world.rank();
int i_am_even = my_rank % 2 == 0 ? 1 : -1;
// Positive modulo
int my_neighbour = (((my_rank + i_am_even) % mpi_world.size()) + mpi_world.size() % +mpi_world.size());
for (size_t i = 0; i < number_of_trails; ++i) {
// std::cout <<hpx::util::format(" {} / {} \r",i,number_of_trails);
// Odd even rule
if (my_rank % 2 == 0) {
mpi_world.send(my_neighbour, 5, &data_send, 1);
mpi_world.recv(my_neighbour, 5, &data_recieve, 1);
} else {
mpi_world.recv(my_neighbour, 5, &data_recieve, 1);
mpi_world.send(my_neighbour, 5, &data_send, 1);
}
}
return hpx::finalize();
}
int main(int argc, char *argv[]) {
// std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1","hpx.parcel.mpi.enable!=0"};
std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1"};
hpx::init_params init_args;
init_args.cfg = cfg;
return hpx::init(argc, argv, init_args);
}
Stacktrace && HPX config
Click to expand stacktrace
{config}:
Core library:
HPX_AGAS_LOCAL_CACHE_SIZE=4096
HPX_HAVE_MALLOC=jemalloc
HPX_PARCEL_MAX_CONNECTIONS=512
HPX_PARCEL_MAX_CONNECTIONS_PER_LOCALITY=4
HPX_PREFIX (configured)=
HPX_PREFIX=/home/jn98zk/cpp_stuff/hpx-1.7.1/installation
HPX_FILESYSTEM_WITH_BOOST_FILESYSTEM_COMPATIBILITY=OFF
HPX_ITERATOR_SUPPORT_WITH_BOOST_ITERATOR_TRAVERSAL_TAG_COMPATIBILITY=OFF
HPX_WITH_AGAS_DUMP_REFCNT_ENTRIES=OFF
HPX_WITH_APEX=OFF
HPX_WITH_ASYNC_MPI=OFF
HPX_WITH_ATTACH_DEBUGGER_ON_TEST_FAILURE=OFF
HPX_WITH_AUTOMATIC_SERIALIZATION_REGISTRATION=ON
HPX_WITH_COROUTINE_COUNTERS=OFF
HPX_WITH_DISTRIBUTED_RUNTIME=ON
HPX_WITH_DYNAMIC_HPX_MAIN=ON
HPX_WITH_GOOGLE_PERFTOOLS=OFF
HPX_WITH_IO_COUNTERS=ON
HPX_WITH_IO_POOL=ON
HPX_WITH_ITTNOTIFY=OFF
HPX_WITH_LOGGING=ON
HPX_WITH_NETWORKING=ON
HPX_WITH_PAPI=OFF
HPX_WITH_PARALLEL_TESTS_BIND_NONE=OFF
HPX_WITH_PARCELPORT_ACTION_COUNTERS=OFF
HPX_WITH_PARCELPORT_LIBFABRIC=OFF
HPX_WITH_PARCELPORT_MPI=ON
HPX_WITH_PARCELPORT_MPI_MULTITHREADED=ON
HPX_WITH_PARCELPORT_TCP=ON
HPX_WITH_PARCEL_COALESCING=ON
HPX_WITH_PARCEL_PROFILING=OFF
HPX_WITH_SANITIZERS=OFF
HPX_WITH_SCHEDULER_LOCAL_STORAGE=OFF
HPX_WITH_SPINLOCK_DEADLOCK_DETECTION=ON
HPX_WITH_STACKTRACES=ON
HPX_WITH_STACKTRACES_DEMANGLE_SYMBOLS=ON
HPX_WITH_STACKTRACES_STATIC_SYMBOLS=OFF
HPX_WITH_TESTS_DEBUG_LOG=OFF
HPX_WITH_THREAD_BACKTRACE_ON_SUSPENSION=OFF
HPX_WITH_THREAD_CREATION_AND_CLEANUP_RATES=OFF
HPX_WITH_THREAD_CUMULATIVE_COUNTS=ON
HPX_WITH_THREAD_DEBUG_INFO=OFF
HPX_WITH_THREAD_DESCRIPTION_FULL=OFF
HPX_WITH_THREAD_GUARD_PAGE=ON
HPX_WITH_THREAD_IDLE_RATES=OFF
HPX_WITH_THREAD_LOCAL_STORAGE=OFF
HPX_WITH_THREAD_MANAGER_IDLE_BACKOFF=ON
HPX_WITH_THREAD_QUEUE_WAITTIME=OFF
HPX_WITH_THREAD_STACK_MMAP=ON
HPX_WITH_THREAD_STEALING_COUNTS=OFF
HPX_WITH_THREAD_TARGET_ADDRESS=OFF
HPX_WITH_TIMER_POOL=ON
HPX_WITH_TUPLE_RVALUE_SWAP=ON
HPX_WITH_VALGRIND=OFF
HPX_WITH_VERIFY_LOCKS=OFF
HPX_WITH_VERIFY_LOCKS_BACKTRACE=OFF
HPX_WITH_VERIFY_LOCKS_GLOBALLY=OFF
Module coroutines:
HPX_COROUTINES_WITH_SWAP_CONTEXT_EMULATION=OFF
Module datastructures:
HPX_DATASTRUCTURES_WITH_ADAPT_STD_TUPLE=ON
Module serialization:
HPX_SERIALIZATION_WITH_ALL_TYPES_ARE_BITWISE_SERIALIZABLE=OFF
HPX_SERIALIZATION_WITH_BOOST_TYPES=ON
Module topology:
HPX_TOPOLOGY_WITH_ADDITIONAL_HWLOC_TESTING=OFF
{version}: V1.7.1 (AGAS: V3.0), Git: unknown
{boost}: V1.79.0
{build-type}: release
{date}: May 4 2022 12:46:28
{platform}: linux
{compiler}: GNU C++ version 11.1.0
{stdlib}: GNU libstdc++ version 20210427
{stack-trace}: 15 frames:
0x7f861a2a2090 : /lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f861a2a2090] in /lib/x86_64-linux-gnu/libc.so.6
0x7f86182a018e : /usr/lib/x86_64-linux-gnu/openmpi/lib/openmpi3/mca_btl_vader.so(+0x518e) [0x7f86182a018e] in /usr/lib/x86_64-linux-gnu/openmpi/lib/openmpi3/mca_btl_vader.so
0x7f8619e81854 : opal_progress [0x34] in /lib/x86_64-linux-gnu/libopen-pal.so.40
0x7f861a6d51b1 : ompi_request_default_test [0x31] in /lib/x86_64-linux-gnu/libmpi.so.40
0x7f861a7138d2 : PMPI_Test [0x52] in /lib/x86_64-linux-gnu/libmpi.so.40
0x7f861aec9da2 : /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1(+0x27cda2) [0x7f861aec9da2] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1
0x7f861aed0f07 : /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1(+0x283f07) [0x7f861aed0f07] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1
0x7f861aed2784 : hpx::parcelset::parcelport_impl<hpx::parcelset::policies::mpi::parcelport>::do_background_work(unsigned long, hpx::parcelset::parcelport_background_mode) [0x44] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1
0x7f861aea9738 : hpx::parcelset::parcelhandler::do_background_work(unsigned long, bool, hpx::parcelset::parcelport_background_mode) [0x68] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1
0x7f861b349356 : /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1(+0x6fc356) [0x7f861b349356] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx.so.1
0x7f861ab7097e : /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so(+0xfe97e) [0x7f861ab7097e] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so
0x7f861ab7191f : /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so(+0xff91f) [0x7f861ab7191f] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so
0x7f861ab1fbc6 : hpx::threads::coroutines::detail::coroutine_impl::operator()() [0xc6] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so
0x7f861ab1f22f : /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so(+0xad22f) [0x7f861ab1f22f] in /home/jn98zk/cpp_stuff/hpx-1.7.1/installation/lib/libhpx_core.so
I checked HPX1.9.0 It wouldn't segfault there however hard I tried.
Well I got it to freeze now :) By combining channels and mpi communications. If I switch to boost::mpi::threading::funneled, It survives a bit longer.
//
// Created by jn98zk on 6/12/22.
//
#include <hpx/hpx_init.hpp>
#include <hpx/channel.hpp>
#include <hpx/future.hpp>
#include <hpx/iostream.hpp>
#include <boost/mpi.hpp>
constexpr int number_of_trails = 1'0000;
HPX_REGISTER_CHANNEL(int);
int hpx_main() {
boost::mpi::environment theEnvironment(boost::mpi::threading::multiple);
boost::mpi::communicator mpi_world;
int world_size = hpx::get_num_localities().get();
int data_send = 4;
int data_recieve;
int my_rank = hpx::get_locality_id();
int i_am_even = my_rank % 2 == 0 ? 1 : -1;
// Positive modulo
int my_neighbour = (((my_rank + i_am_even) % world_size) + world_size % +world_size);
hpx::distributed::channel<int> send(hpx::find_here());
auto const iteration_name = hpx::util::format("/data/");
hpx::register_with_basename(iteration_name, send, my_rank);
auto recv = hpx::find_from_basename<hpx::distributed::channel<int>>(iteration_name, my_neighbour);
for (size_t i = 0; i < number_of_trails; ++i) {
send.set(hpx::launch::async, 4, i);
auto recv_data = recv.get(hpx::launch::async, i);
if (my_rank % 2 == 0) {
mpi_world.send(my_neighbour, 0, &data_send, 1);
mpi_world.recv(my_neighbour, 0, &data_send, 1);
} else {
mpi_world.recv(my_neighbour, 0, &data_send, 1);
mpi_world.send(my_neighbour, 0, &data_send, 1);
}
recv_data.get();
hpx::util::format_to(std::cout, "Running {}\n", i);
}
// std::cout << hpx::util::format("rank {} will send to {} ",my_rank,my_neighbour) << std::endl;
// std::cout << hpx::util::format("rank {} will recive from to {} ",my_rank,my_neighbour) << std::endl;
// auto recv_data = recv.get(hpx::launch::sync);
return hpx::finalize();
}
// Run with 2 ranks
int main(int argc, char *argv[]) {
// std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1","hpx.parcel.mpi.enable!=0"};
std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1"};
//LD_LIBRARY_PATH
hpx::init_params init_args;
init_args.cfg = cfg;
return hpx::init(argc, argv, init_args);
}
Versions:
HPX: V1.9.0-trunk (AGAS: V3.0), Git: 0bdb2c43a0
Boost: V1.79.0
Hwloc: V2.5.0
MPI: OpenMPI V4.1.4, MPI V3.1
Build:
Type: release
Date: Jun 13 2022 03:48:43
Platform: linux
Compiler: GNU C++ version 11.1.0
Standard Library: GNU libstdc++ version 20210427
Allocator: jemalloc
Even funnier if I do the channel anti-pattern (create a new channel on each iteration) it will survive the test
//
// Created by jn98zk on 6/12/22.
//
#include <hpx/hpx_init.hpp>
#include <hpx/channel.hpp>
#include <hpx/future.hpp>
#include <hpx/iostream.hpp>
#include <boost/mpi.hpp>
constexpr int number_of_trails = 1'0000;
HPX_REGISTER_CHANNEL(int);
int hpx_main() {
boost::mpi::environment theEnvironment(boost::mpi::threading::funneled);
boost::mpi::communicator mpi_world;
int world_size = hpx::get_num_localities().get();
int data_send = 4;
int data_recieve;
int my_rank = hpx::get_locality_id();
int i_am_even = my_rank % 2 == 0 ? 1 : -1;
// Positive modulo
int my_neighbour = (((my_rank + i_am_even) % world_size) + world_size % +world_size);
for (size_t i = 0; i < number_of_trails; ++i) {
hpx::distributed::channel<int> send(hpx::find_here());
auto const iteration_name = hpx::util::format("/data/{}",i);
hpx::register_with_basename(iteration_name, send, my_rank);
auto recv = hpx::find_from_basename<hpx::distributed::channel<int>>(iteration_name, my_neighbour);
send.set(hpx::launch::async, 4, i);
auto recv_data = recv.get(hpx::launch::async, i);
if (my_rank % 2 == 0) {
mpi_world.send(my_neighbour, i, &data_send, 1);
mpi_world.recv(my_neighbour, i, &data_recieve, 1);
} else {
mpi_world.recv(my_neighbour, i, &data_recieve, 1);
mpi_world.send(my_neighbour, i, &data_send, 1);
}
recv_data.get();
hpx::util::format_to(std::cout, "Running {}\n", i);
}
// std::cout << hpx::util::format("rank {} will send to {} ",my_rank,my_neighbour) << std::endl;
// std::cout << hpx::util::format("rank {} will recive from to {} ",my_rank,my_neighbour) << std::endl;
// auto recv_data = recv.get(hpx::launch::sync);
return hpx::finalize();
}
int main(int argc, char *argv[]) {
// std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1","hpx.parcel.mpi.enable!=0"};
std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1"};
//LD_LIBRARY_PATH
hpx::init_params init_args;
init_args.cfg = cfg;
return hpx::init(argc, argv, init_args);
}
This should be fixed by #6213. Would you have the time to try it again?
Thank for the update, I'll be able to report back latest by Sunday.
Unfortunately it did not work with the latest main branch :( Still gets stuck after some iterations
Versions:
HPX: V1.9.0-trunk (AGAS: V3.0), Git: 42f62129fb
Boost: V1.81.0
Hwloc: V2.8.0
MPI: OpenMPI V4.1.4, MPI V3.1
Build:
Type: release
Date: Apr 22 2023 11:52:34
Platform: linux
Compiler: GNU C++ version 11.3.0
Standard Library: GNU libstdc++ version 20220421
Allocator: jemalloc
CMakeLists.txt
cmake_minimum_required(VERSION 3.25)
project(hpx_fun)
find_package(HPX REQUIRED)
find_package(Boost REQUIRED mpi serialization)
find_package(MPI REQUIRED)
set(CMAKE_CXX_STANDARD 17)
add_executable(hpx_fun main.cpp)
target_link_libraries(hpx_fun PUBLIC HPX::hpx HPX::wrap_main HPX::iostreams_component Boost::mpi Boost::serialization MPI::MPI_CXX)
@John98Zakaria thanks for testing! Back to the drawing board, then :/