flux-core
flux-core copied to clipboard
valgrind reports new leaks with hwloc 2.10
t5000-valgrind.t
fails with the following report on TOSS 4 systems with hwloc-2.10
==1463441==
==1463441== HEAP SUMMARY:
==1463441== in use at exit: 34,829 bytes in 135 blocks
==1463441== total heap usage: 171,913 allocs, 171,778 frees, 98,729,321 bytes allocated
==1463441==
==1463441== 496 bytes in 1 blocks are possibly lost in loss record 56 of 70
==1463441== at 0x4C3D1C3: calloc (vg_replace_malloc.c:1554)
==1463441== by 0x4015322: UnknownInlinedFun (rtld-malloc.h:44)
==1463441== by 0x4015322: allocate_dtv (dl-tls.c:371)
==1463441== by 0x4015D51: _dl_allocate_tls (dl-tls.c:629)
==1463441== by 0x4E51E32: pthread_create@@GLIBC_2.2.5 (in /usr/lib64/libpthread-2.28.so)
==1463441== by 0xED57BE2: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463441== by 0xEE1800E: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463441== by 0xECF84E2: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463441== by 0xED6A4D7: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463441== by 0xCCC0519: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463441== by 0xCCC38DF: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463441== by 0x4E58E66: __pthread_once_slow (in /usr/lib64/libpthread-2.28.so)
==1463441== by 0xCD0C658: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463441== by 0xCCB40BE: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463441== by 0xCCD7279: cudaGetDeviceCount (in /usr/lib64/hwloc/libcudart.so.12)
==1463441== by 0xCA880D8: hwloc_cuda_discover (in /usr/lib64/hwloc/hwloc_cuda.so)
==1463441== by 0xB9DC90B: hwloc_discover_by_phase (in /usr/lib64/libhwloc.so.15.7.0)
==1463441== by 0xB9DD08D: hwloc_discover (in /usr/lib64/libhwloc.so.15.7.0)
==1463441== by 0xB9DE1FE: hwloc_topology_load (in /usr/lib64/libhwloc.so.15.7.0)
==1463441== by 0x4188C6B: rhwloc_local_topology_load (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463441== by 0x4188CC4: rhwloc_local_topology_xml (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463441== by 0x4179ED4: topo_get_local_xml (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463441== by 0x4179FAF: topo_create (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463441== by 0x4178BFC: mod_main (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463441== by 0x410301: module_thread (in /g/g0/grondo/git/flux-core.git/src/broker/.libs/lt-flux-broker)
==1463441== by 0x4E511C9: start_thread (in /usr/lib64/libpthread-2.28.so)
==1463441== by 0x6043E72: clone (in /usr/lib64/libc-2.28.so)
==1463441==
{
<insert_a_suppression_name_here>
Memcheck:Leak
match-leak-kinds: possible
fun:calloc
fun:UnknownInlinedFun
fun:allocate_dtv
fun:_dl_allocate_tls
fun:pthread_create@@GLIBC_2.2.5
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/hwloc/libcudart.so.12
obj:/usr/lib64/hwloc/libcudart.so.12
fun:__pthread_once_slow
obj:/usr/lib64/hwloc/libcudart.so.12
obj:/usr/lib64/hwloc/libcudart.so.12
fun:cudaGetDeviceCount
fun:hwloc_cuda_discover
fun:hwloc_discover_by_phase
fun:hwloc_discover
fun:hwloc_topology_load
fun:rhwloc_local_topology_load
fun:rhwloc_local_topology_xml
fun:topo_get_local_xml
fun:topo_create
fun:mod_main
fun:module_thread
fun:start_thread
fun:clone
}
==1463441== LEAK SUMMARY:
==1463441== definitely lost: 0 bytes in 0 blocks
==1463441== indirectly lost: 0 bytes in 0 blocks
==1463441== possibly lost: 496 bytes in 1 blocks
==1463441== still reachable: 34,333 bytes in 134 blocks
==1463441== suppressed: 0 bytes in 0 blocks
==1463441== Reachable blocks (those to which a pointer was found) are not shown.
==1463441== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==1463441==
==1463441== For lists of detected and suppressed errors, rerun with: -s
==1463441== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
flux-start: 1 (pid 1463441) exited with rc=1
[0-1]: Terminated
==1463440==
==1463440== HEAP SUMMARY:
==1463440== in use at exit: 47,554 bytes in 170 blocks
==1463440== total heap usage: 727,742 allocs, 727,572 frees, 237,786,590 bytes allocated
==1463440==
==1463440== 496 bytes in 1 blocks are possibly lost in loss record 55 of 69
==1463440== at 0x4C3D1C3: calloc (vg_replace_malloc.c:1554)
==1463440== by 0x4015322: UnknownInlinedFun (rtld-malloc.h:44)
==1463440== by 0x4015322: allocate_dtv (dl-tls.c:371)
==1463440== by 0x4015D51: _dl_allocate_tls (dl-tls.c:629)
==1463440== by 0x4E51E32: pthread_create@@GLIBC_2.2.5 (in /usr/lib64/libpthread-2.28.so)
==1463440== by 0xF66CBE2: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463440== by 0xF72D00E: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463440== by 0xF60D4E2: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463440== by 0xF67F4D7: ??? (in /usr/lib64/libcuda.so.545.23.08)
==1463440== by 0xD5D5519: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463440== by 0xD5D88DF: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463440== by 0x4E58E66: __pthread_once_slow (in /usr/lib64/libpthread-2.28.so)
==1463440== by 0xD621658: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463440== by 0xD5C90BE: ??? (in /usr/lib64/hwloc/libcudart.so.12)
==1463440== by 0xD5EC279: cudaGetDeviceCount (in /usr/lib64/hwloc/libcudart.so.12)
==1463440== by 0xD39D0D8: hwloc_cuda_discover (in /usr/lib64/hwloc/hwloc_cuda.so)
==1463440== by 0xC2F190B: hwloc_discover_by_phase (in /usr/lib64/libhwloc.so.15.7.0)
==1463440== by 0xC2F208D: hwloc_discover (in /usr/lib64/libhwloc.so.15.7.0)
==1463440== by 0xC2F31FE: hwloc_topology_load (in /usr/lib64/libhwloc.so.15.7.0)
==1463440== by 0x4188C6B: rhwloc_local_topology_load (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463440== by 0x4188CC4: rhwloc_local_topology_xml (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463440== by 0x4179ED4: topo_get_local_xml (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463440== by 0x4179FAF: topo_create (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463440== by 0x4178BFC: mod_main (in /g/g0/grondo/git/flux-core.git/src/modules/.libs/resource.so)
==1463440== by 0x410301: module_thread (in /g/g0/grondo/git/flux-core.git/src/broker/.libs/lt-flux-broker)
==1463440== by 0x4E511C9: start_thread (in /usr/lib64/libpthread-2.28.so)
==1463440== by 0x6043E72: clone (in /usr/lib64/libc-2.28.so)
==1463440==
{
<insert_a_suppression_name_here>
Memcheck:Leak
match-leak-kinds: possible
fun:calloc
fun:UnknownInlinedFun
fun:allocate_dtv
fun:_dl_allocate_tls
fun:pthread_create@@GLIBC_2.2.5
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/libcuda.so.545.23.08
obj:/usr/lib64/hwloc/libcudart.so.12
obj:/usr/lib64/hwloc/libcudart.so.12
fun:__pthread_once_slow
obj:/usr/lib64/hwloc/libcudart.so.12
obj:/usr/lib64/hwloc/libcudart.so.12
fun:cudaGetDeviceCount
fun:hwloc_cuda_discover
fun:hwloc_discover_by_phase
fun:hwloc_discover
fun:hwloc_topology_load
fun:rhwloc_local_topology_load
fun:rhwloc_local_topology_xml
fun:topo_get_local_xml
fun:topo_create
fun:mod_main
fun:module_thread
fun:start_thread
fun:clone
}
==1463440== LEAK SUMMARY:
==1463440== definitely lost: 0 bytes in 0 blocks
==1463440== indirectly lost: 0 bytes in 0 blocks
==1463440== possibly lost: 496 bytes in 1 blocks
==1463440== still reachable: 47,058 bytes in 169 blocks
==1463440== suppressed: 0 bytes in 0 blocks
==1463440== Reachable blocks (those to which a pointer was found) are not shown.
==1463440== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==1463440==
==1463440== For lists of detected and suppressed errors, rerun with: -s
==1463440== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
flux-start: 0 (pid 1463440) exited with rc=1
not ok 1 - valgrind reports no new errors on 2 broker run
#
# run_timeout 300 \
# flux start -s ${VALGRIND_NBROKERS} \
# --test-exit-timeout=120 \
# -o,--config-path=valgrind.toml \
# --wrap=libtool,e,${VALGRIND} \
# --wrap=--tool=memcheck \
# --wrap=--leak-check=full \
# --wrap=--gen-suppressions=all \
# --wrap=--trace-children=no \
# --wrap=--child-silent-after-fork=yes \
# --wrap=--num-callers=30 \
# --wrap=--leak-resolution=med \
# --wrap=--error-exitcode=1 \
# --wrap=--suppressions=$VALGRIND_SUPPRESSIONS \
# ${VALGRIND_WORKLOAD}
#
# failed 1 among 1 test(s)
1..1