mercury
mercury copied to clipboard
bus error happens at various locations
Describe the bug bus error happens at various locations
Screenshots https://github.com/mercury-hpc/mercury/blob/v2.1.0/src/util/mercury_poll.c#L355
Program terminated with signal 7, Bus error.
#0 0x00002ab2d2ab1cc6 in hg_poll_wait (poll_set=0x29abd00,
timeout=timeout@entry=100, max_events=max_events@entry=1,
events=events@entry=0x29a8950,
actual_events=actual_events@entry=0x2ab30fd1afb4)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/util/mercury_poll.c:355
355 /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/util/mercury_poll.c: No such file or directory.
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 0x00002ab2d2ab1cc6 in hg_poll_wait (poll_set=0x29abd00,
timeout=timeout@entry=100, max_events=max_events@entry=1,
events=events@entry=0x29a8950,
actual_events=actual_events@entry=0x2ab30fd1afb4)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/util/mercury_poll.c:355
#1 0x00002ab2d2697900 in hg_core_poll_wait (
progressed_ptr=<synthetic pointer>, timeout_ms=100, context=0x29a8850)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3718
#2 hg_core_progress (context=0x29a8850, timeout_ms=timeout_ms@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3660
#3 0x00002ab2d269c88b in HG_Core_progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:5029
#4 0x00002ab2d268e8d3 in HG_Progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:2022
#5 0x00002ab2d226c43c in __margo_hg_progress_fn (foo=0x2c5f350)
at src/margo-core.c:1482
#6 0x00002ab2d2ef246a in ABTD_ythread_func_wrapper ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#7 0x00002ab2d2ef25f1 in make_fcontext ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#8 0x0000000000000000 in ?? ()
(gdb) p *poll_set
$2 = {lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0,
__kind = 512, __spins = 0, __elision = 0, __list = {__prev = 0x0,
__next = 0x0}},
__size = '\000' <repeats 17 times>, "\002", '\000' <repeats 21 times>,
__align = 0}, events = 0x29a4980, max_events = 32, nfds = 2, fd = 117}
(gdb) p *poll_set->events
$3 = {events = 1, data = {ptr = 0x3, fd = 3, u32 = 3, u64 = 3}}
https://github.com/mercury-hpc/mercury/blob/v2.1.0/src/mercury_proc.c#L380
Program terminated with signal 7, Bus error.
#0 hg_proc_checksum_update (proc=proc@entry=0x30d1f40,
data=data@entry=0x7ffc33402a10, data_size=data_size@entry=4)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_proc.c:380
380 /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_proc.c: No such file or directory.
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 hg_proc_checksum_update (proc=proc@entry=0x30d1f40,
data=data@entry=0x7ffc33402a10, data_size=data_size@entry=4)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_proc.c:380
#1 0x00002b9b73da3ff4 in hg_proc_hg_int32_t (data=<optimized out>,
proc=0x30d1f40)
at /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/include/mercury_proc.h:745
#2 hg_proc_kv_get_rdma_out_t (proc=0x30d1f40, data=0x7ffc33402a10)
at kv_types.h:45
#3 0x00002b9b743d8718 in hg_get_struct (hg_handle=0x30d1d70,
hg_proc_info=<optimized out>, op=op@entry=HG_OUTPUT,
struct_ptr=struct_ptr@entry=0x7ffc33402a10)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:505
#4 0x00002b9b743daf4b in HG_Get_output (handle=<optimized out>,
out_struct=out_struct@entry=0x7ffc33402a10)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:1774
#5 0x00002b9b73da736a in fs_rpc_inode_write_rdma_bulk (
server=server@entry=0x30dc960 "ofi+verbs;ofi_rxm://10.10.10.9:42020",
key=key@entry=0x30d7cc0, key_size=key_size@entry=84,
client=client@entry=0x2b9b73fad4a0 <chfs_client> "ofi+verbs;ofi_rxm://10.10.0.14:36935", buf=0x34b4d80, size=size@entry=0x7ffc33402b60,
offset=offset@entry=0, mode=mode@entry=33204,
chunk_size=chunk_size@entry=1048576, errp=errp@entry=0x7ffc33402b5c)
at fs_client.c:230
#6 0x00002b9b73da74cb in fs_rpc_inode_write_rdma (
server=server@entry=0x30dc960 "ofi+verbs;ofi_rxm://10.10.10.9:42020",
key=key@entry=0x30d7cc0, key_size=key_size@entry=84,
client=client@entry=0x2b9b73fad4a0 <chfs_client> "ofi+verbs;ofi_rxm://10.10.0.14:36935", buf=buf@entry=0x2b9bb340c000, size=size@entry=0x7ffc33402b60,
offset=offset@entry=0, mode=mode@entry=33204,
chunk_size=chunk_size@entry=1048576, errp=errp@entry=0x7ffc33402b5c)
at fs_client.c:264
#7 0x00002b9b73da1afb in chfs_rpc_inode_write (errp=0x7ffc33402b5c,
chunk_size=1048576, mode=33204, offset=0, size=0x7ffc33402b60,
buf=0x2b9bb340c000, key_size=84, key=0x30d7cc0) at chfs.c:455
#8 chfs_pwrite (fd=0, buf=0x2b9bb340c000, size=1048576, offset=7877951488)
at chfs.c:699
#9 0x00000000004286ae in WriteOrReadSingle (offset=7877951488,
pretendRank=pretendRank@entry=219, transfer=1048576,
transferCount=transferCount@entry=0x7ffc33402c88,
errors=errors@entry=0x7ffc33402c7c, test=test@entry=0x2bdd9e0,
fd=0x30e1dd0, access=0, ioBuffers=<optimized out>) at ior.c:1639
#10 0x000000000042ad75 in WriteOrRead (test=test@entry=0x2bdd9e0,
results=results@entry=0x2bddbf0, fd=fd@entry=0x30e1dd0,
access=access@entry=0, ioBuffers=ioBuffers@entry=0x7ffc33402e50)
at ior.c:1772
#11 0x000000000042bd54 in TestIoSys (test=0x2bdd9e0) at ior.c:1260
#12 0x000000000042d15e in ior_run (argc=28, argv=0x2beb2f0,
world_com=<optimized out>, world_out=<optimized out>) at ior.c:168
#13 0x000000000040a6cf in ior_process_write (argv=0x2bdfa50,
out=0x2b9b76508400 <_IO_2_1_stdout_>, res_out=0x65ff90 <o+16>)
at src/phase_ior.c:6
#14 0x000000000040ac4c in run () at src/phase_ior_easy_write.c:48
#15 0x0000000000406031 in main (argc=4, argv=0x7ffc33403fa8) at src/main.c:389
(gdb) p *proc
$2 = {proc_buf = {buf = 0x2b9b8f5e61f0, buf_ptr = 0x2b9b8f5e61f4, size = 4072,
size_left = 4068, is_mine = 0 '\000'}, extra_buf = {buf = 0x0,
buf_ptr = 0x0, size = 0, size_left = 0, is_mine = 0 '\000'},
hg_class = 0x2bde8e0, current_buf = 0x30d1f40, checksum = 0x30d1fd0,
checksum_hash = 0x30d2030, checksum_size = 4, op = HG_DECODE,
flags = 0 '\000'}
https://github.com/mercury-hpc/mercury/blob/v2.1.0/src/mercury_core.c#L3842
Program terminated with signal 7, Bus error.
#0 0x00002b32cf14f600 in NA_Trigger@plt ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/lib/libmercury.so.2
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 0x00002b32cf14f600 in NA_Trigger@plt ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/lib/libmercury.so.2
#1 0x00002b32cf159f8d in hg_core_progress_na (na_class=0x21f26c0,
na_context=0x2432b00, timeout_ms=timeout_ms@entry=0,
progressed_ptr=progressed_ptr@entry=0x2b330cf3cfb3 "")
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3842
#2 0x00002b32cf15cbf6 in hg_core_poll_wait (
progressed_ptr=<synthetic pointer>, timeout_ms=100, context=0x24329c0)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3760
#3 hg_core_progress (context=0x24329c0, timeout_ms=timeout_ms@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3660
#4 0x00002b32cf16188b in HG_Core_progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:5029
#5 0x00002b32cf1538d3 in HG_Progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:2022
#6 0x00002b32ced3143c in __margo_hg_progress_fn (foo=0x26c7b90)
at src/margo-core.c:1482
#7 0x00002b32cf9b746a in ABTD_ythread_func_wrapper ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#8 0x00002b32cf9b75f1 in make_fcontext ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#9 0x0000000000000000 in ?? ()
(gdb) p *na_context
$1 = {plugin_context = 0x240c9a0}
(gdb) p *na_class
$2 = {ops = 0x2b32cf99ba20 <na_ofi_class_ops_g>, plugin_class = 0x243cdf0,
protocol_name = 0x243d170 "verbs;ofi_rxm", progress_mode = 0,
listen = 0 '\000'}
https://github.com/mercury-hpc/mercury/blob/v2.1.0/src/na/na.c#L1208
Program terminated with signal 7, Bus error.
#0 NA_Progress (na_class=na_class@entry=0x13096d0,
context=context@entry=0x1527ef0, timeout_ms=0)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na.c:1208
1208 /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na.c: No such file or directory.
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 NA_Progress (na_class=na_class@entry=0x13096d0,
context=context@entry=0x1527ef0, timeout_ms=0)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na.c:1208
#1 0x00002aacecb050a9 in hg_core_progress_na (na_class=0x13096d0,
na_context=0x1527ef0, timeout_ms=timeout_ms@entry=0,
progressed_ptr=progressed_ptr@entry=0x2aad2ac04fb3 "")
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/util/mercury_time.h:445
#2 0x00002aacecb07bf6 in hg_core_poll_wait (
progressed_ptr=<synthetic pointer>, timeout_ms=99, context=0x1527db0)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3760
#3 hg_core_progress (context=0x1527db0, timeout_ms=timeout_ms@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3660
#4 0x00002aacecb0c88b in HG_Core_progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:5029
#5 0x00002aacecafe8d3 in HG_Progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:2022
#6 0x00002aacec6dc43c in __margo_hg_progress_fn (foo=0x17ded50)
at src/margo-core.c:1482
#7 0x00002aaced36246a in ABTD_ythread_func_wrapper ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#8 0x00002aaced3625f1 in make_fcontext ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#9 0x0000000000000000 in ?? ()
(gdb) p *na_class
$1 = {ops = 0x2aaced346a20 <na_ofi_class_ops_g>, plugin_class = 0x1553e40,
protocol_name = 0x152f0c0 "verbs;ofi_rxm", progress_mode = 0,
listen = 0 '\000'}
(gdb) p *context
$2 = {plugin_context = 0x1527fe0}
Program terminated with signal 7, Bus error.
#0 0x00002b996f7d9190 in ofi_cq_progress ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 0x00002b996f7d9190 in ofi_cq_progress ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#1 0x00002b996f7d860f in ofi_cq_readfrom ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#2 0x00002b996daa6823 in fi_cq_readfrom (src_addr=0x2b99b3259ad0, count=16,
buf=0x2b99b3259b50, cq=0x2899880)
at /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/include/rdma/fi_eq.h:400
#3 na_ofi_cq_read (max_count=16, context=0x286b150,
actual_count=<synthetic pointer>, src_err_addrlen=<synthetic pointer>,
src_err_addr=<synthetic pointer>, src_addrs=0x2b99b3259ad0,
cq_events=0x2b99b3259b50)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na_ofi.c:3221
#4 na_ofi_progress (na_class=0x264dcc0, context=0x286b150, timeout_ms=0)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na_ofi.c:5208
#5 0x00002b996da9e133 in NA_Progress (na_class=na_class@entry=0x264dcc0,
context=context@entry=0x286b150, timeout_ms=<optimized out>)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na.c:1272
#6 0x00002b996d4750a9 in hg_core_progress_na (na_class=0x264dcc0,
na_context=0x286b150, timeout_ms=timeout_ms@entry=0,
progressed_ptr=progressed_ptr@entry=0x2b99b3259fb3 "")
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/util/mercury_time.h:445
#7 0x00002b996d477bf6 in hg_core_poll_wait (
progressed_ptr=<synthetic pointer>, timeout_ms=100, context=0x286b010)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3760
#8 hg_core_progress (context=0x286b010, timeout_ms=timeout_ms@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3660
#9 0x00002b996d47c88b in HG_Core_progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:5029
#10 0x00002b996d46e8d3 in HG_Progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:2022
#11 0x00002b996d04c43c in __margo_hg_progress_fn (foo=0x2b21cd0)
at src/margo-core.c:1482
#12 0x00002b996dcd246a in ABTD_ythread_func_wrapper ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#13 0x00002b996dcd25f1 in make_fcontext ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#14 0x0000000000000000 in ?? ()
https://github.com/mercury-hpc/mchecksum/blob/57d9eeeb433c4c2a6b2b9a0572822c1beb9515d6/src/mchecksum.c#L158
Program terminated with signal 7, Bus error.
#0 mchecksum_update (checksum=0x233ab90, data=0x233aa50, size=1)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mchecksum/src/mchecksum.c:158
158 /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mchecksum/src/mchecksum.c: No such file or directory.
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 mchecksum_update (checksum=0x233ab90, data=0x233aa50, size=1)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mchecksum/src/mchecksum.c:158
#1 0x00002b84738abdd6 in hg_core_header_response_proc (op=op@entry=HG_DECODE,
buf=0x2b84ad2c0228, buf_size=<optimized out>,
hg_core_header=hg_core_header@entry=0x233aa50)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core_header.c:237
#2 0x00002b84738a5ac4 in hg_core_proc_header_response (op=HG_DECODE,
hg_core_header=0x233aa50, hg_core_handle=0x233a990)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:838
#3 hg_core_process_output (hg_core_handle=0x233a990,
completed=0x2b84b1686dff "\001",
done_callback=0x2b84738a36b0 <hg_core_send_ack>)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3307
#4 0x00002b84738a86cc in hg_core_recv_output_cb (
callback_info=<optimized out>)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3256
#5 0x00002b8473ecd4f3 in NA_Trigger (context=context@entry=0x1e4c7a0,
timeout_ms=timeout_ms@entry=0, max_count=max_count@entry=1,
callback_ret=callback_ret@entry=0x2b84b1686f3c,
actual_count=actual_count@entry=0x2b84b1686f38)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na.c:1407
#6 0x00002b84738a3f8d in hg_core_progress_na (na_class=0x1e4c770,
na_context=0x1e4c7a0, timeout_ms=timeout_ms@entry=0,
progressed_ptr=progressed_ptr@entry=0x2b84b1686fb3 "")
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3842
#7 0x00002b84738a6bf6 in hg_core_poll_wait (
progressed_ptr=<synthetic pointer>, timeout_ms=100, context=0x2096670)
at /../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3760
#8 hg_core_progress (context=0x2096670, timeout_ms=timeout_ms@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3660
#9 0x00002b84738ab88b in HG_Core_progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:5029
#10 0x00002b847389d8d3 in HG_Progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:2022
#11 0x00002b847347b43c in __margo_hg_progress_fn (foo=0x2321e10)
at src/margo-core.c:1482
#12 0x00002b847410146a in ABTD_ythread_func_wrapper ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#13 0x00002b84741015f1 in make_fcontext ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#14 0x0000000000000000 in ?? ()
(gdb) p checksum_class
$2 = (struct mchecksum_class *) 0x233ab90
(gdb) p *checksum_class
$3 = {data = 0x233abd0, destroy = 0x2b8473ab7fc0 <mchecksum_crc16_destroy>,
reset = 0x2b8473ab7f60 <mchecksum_crc16_reset>,
get_size = 0x2b8473ab7f70 <mchecksum_crc16_get_size>,
get = 0x2b8473ab7fe0 <mchecksum_crc16_get>,
update = 0x2b8473ab7f80 <mchecksum_crc16_update>}
Program terminated with signal 7, Bus error.
#0 0x00002abd1c691e10 in ofi_mutex_lock_op ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
Missing separate debuginfos, use: debuginfo-install glibc-2.17-325.el7_9.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-51.el7_9.x86_64 libatomic-4.8.5-44.el7.x86_64 libcom_err-1.42.9-19.el7.x86_64 libgcc-4.8.5-44.el7.x86_64 libibverbs-55mlnx37-1.55103.x86_64 libnl3-3.2.28-4.el7.x86_64 librdmacm-55mlnx37-1.55103.x86_64 libselinux-2.5-15.el7.x86_64 lustre-client-2.12.6_ddn66-1.el7.x86_64 ncurses-libs-5.9-14.20130511.el7_4.x86_64 numactl-libs-2.0.12-5.el7.x86_64 openssl-libs-1.0.2k-25.el7_9.x86_64 pcre-8.32-17.el7.x86_64 readline-6.2-11.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 0x00002abd1c691e10 in ofi_mutex_lock_op ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#1 0x00002abd1c6f4470 in rxm_cq_write_tx_comp.isra.14.part.15 ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#2 0x00002abd1c6f46a2 in rxm_finish_eager_send ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#3 0x00002abd1c6f6f3c in rxm_handle_comp ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#4 0x00002abd1c6f8504 in rxm_ep_do_progress ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#5 0x00002abd1c6f86d1 in rxm_ep_progress ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#6 0x00002abd1c6a81bd in ofi_cq_progress ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#7 0x00002abd1c6a760f in ofi_cq_readfrom ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/lib/libfabric.so.1
#8 0x00002abd1a975823 in fi_cq_readfrom (src_addr=0x2abd59004ad0, count=16,
buf=0x2abd59004b50, cq=0x2278550)
at /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/libfabric-main-j4mgzyne5655z5vye4pvkncq36thjk6y/include/rdma/fi_eq.h:400
#9 na_ofi_cq_read (max_count=16, context=0x229efd0,
actual_count=<synthetic pointer>, src_err_addrlen=<synthetic pointer>,
src_err_addr=<synthetic pointer>, src_addrs=0x2abd59004ad0,
cq_events=0x2abd59004b50)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na_ofi.c:3221
#10 na_ofi_progress (na_class=0x2058330, context=0x229efd0, timeout_ms=0)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na_ofi.c:5208
#11 0x00002abd1a96d133 in NA_Progress (na_class=na_class@entry=0x2058330,
context=context@entry=0x229efd0, timeout_ms=<optimized out>)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/na/na.c:1272
#12 0x00002abd1a3440a9 in hg_core_progress_na (na_class=0x2058330,
na_context=0x229efd0, timeout_ms=timeout_ms@entry=0,
progressed_ptr=progressed_ptr@entry=0x2abd59004fb3 "")
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/util/mercury_time.h:445
#13 0x00002abd1a346bf6 in hg_core_poll_wait (
progressed_ptr=<synthetic pointer>, timeout_ms=100, context=0x229ee90)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3760
#14 hg_core_progress (context=0x229ee90, timeout_ms=timeout_ms@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:3660
#15 0x00002abd1a34b88b in HG_Core_progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury_core.c:5029
#16 0x00002abd1a33d8d3 in HG_Progress (context=<optimized out>,
timeout=timeout@entry=100)
at /.../spack-stage-mercury-2.1.0-otei3gni3ceongkg3rbtpzvrddjnvjqv/spack-src/src/mercury.c:2022
#17 0x00002abd19f1b43c in __margo_hg_progress_fn (foo=0x252b410)
at src/margo-core.c:1482
#18 0x00002abd1aba146a in ABTD_ythread_func_wrapper ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#19 0x00002abd1aba15f1 in make_fcontext ()
from /.../spack/linux-rhel7-skylake_avx512/gcc-8.3.1/argobots-1.1-4gyndgmftboa6lwbq745v4nmeesjrtoj/lib/libabt.so.1
#20 0x0000000000000000 in ?? ()
Platform (please complete the following information):
- [email protected]
- libfabric@main
- [email protected]
- [email protected]
- rdma-core@55mlnx37
System description
- Compiler version: gcc-8.3.1
- Plugin and protocol used ofi+verbs
- Dependency version libfabric@main