glusterfs icon indicating copy to clipboard operation
glusterfs copied to clipboard

crash (unknown cause) with test mgmt-handshake-and-volume-sync-post-glusterd-restart.t

Open mykaul opened this issue 2 months ago • 1 comments

Seen when running regression tests (https://build.gluster.org/job/gh_centos7-regression/3404/console , part of https://github.com/gluster/glusterfs/pull/4363 ). I don't think the crash has anything to do with the change, the test fails (without crashing) consistently locally as well.

Backtrace:

Core was generated by `/build/install/sbin/glusterfs -s 127.1.1.3 --volfile-id shd/patchy -p /d/backen'.
Program terminated with signal 11, Segmentation fault.
#0  _gf_log (domain=0x7fc96a1bb236 "logging-infra", file=0x7fc96a1babf0 "/home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c", function=0x7fc96a1bb5d0 <__FUNCTION__.12655> "gf_log_flush_extra_msgs", line=1656, level=GF_LOG_DEBUG, fmt=0x7fc96a1bb1f8 "Log buffer size reduced. About to flush %d extra log messages") at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:2070
2070	    ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr,

Thread 9 (Thread 0x7fc95aaed700 (LWP 21173)):
#0  0x00007fc967de6ad1 in clone () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007fc9688eede0 in ?? () from /lib64/libpthread.so.0
No symbol table info available.
#2  0x00007fc95aaed700 in ?? ()
No symbol table info available.
#3  0x0000000000000000 in ?? ()
No symbol table info available.

Thread 8 (Thread 0x7fc95e300700 (LWP 21128)):
#0  0x00007fc967dddb43 in select () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007fc96a17354c in runner (arg=0x2822000) at /home/jenkins/root/workspace/gh_centos7-regression/contrib/timer-wheel/timer-wheel.c:187
        tv = {tv_sec = 0, tv_usec = 921681}
        base = 0x2822000
#2  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 7 (Thread 0x7fc960584700 (LWP 21124)):
#0  0x00007fc9688f2de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a0f4cef in gf_timer_proc (data=0x280e0d8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/timer.c:140
        now = {tv_sec = 4856893, tv_nsec = 792708918}
        reg = 0x280e0d8
        event = 0x27f61e8
        tmp = 0x0
        old_THIS = 0x0
#2  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 6 (Thread 0x7fc95ba43700 (LWP 21130)):
#0  0x00007fc967de70e3 in epoll_wait () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007fc96a157275 in event_dispatch_epoll_worker (data=0x2802e28) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:715
        event = {events = 4, data = {ptr = 0x400000004, fd = 4, u32 = 4, u64 = 17179869188}}
        ret = 0
        ev_data = 0x2802e28
        event_pool = 0x27d2028
        myindex = 2
        timetodie = 0
        gen = 0
        poller_death_notify = {next = 0x0, prev = 0x0}
        slot = 0x0
        tmp = 0x0
        __FUNCTION__ = "event_dispatch_epoll_worker"
#2  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 5 (Thread 0x7fc95eb01700 (LWP 21127)):
#0  0x00007fc9688f2de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a12ffed in syncenv_task (proc=0x281e3e8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:650
        env = 0x281e028
        task = 0x0
        sleep_till = {tv_sec = 1716977336, tv_nsec = 0}
        ret = 0
#2  0x00007fc96a13031b in syncenv_processor (thdata=0x281e3e8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:797
        proc = 0x281e3e8
        task = 0x0
#3  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#4  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 4 (Thread 0x7fc95f302700 (LWP 21126)):
#0  0x00007fc9688f2de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a12ffed in syncenv_task (proc=0x281e028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:650
        env = 0x281e028
        task = 0x0
        sleep_till = {tv_sec = 1716977336, tv_nsec = 0}
        ret = 0
#2  0x00007fc96a13031b in syncenv_processor (thdata=0x281e028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:797
        proc = 0x281e028
        task = 0x0
#3  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#4  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 3 (Thread 0x7fc95fd83700 (LWP 21125)):
#0  0x00007fc9688f63c1 in sigwait () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x000000000040b4f5 in ?? ()
No symbol table info available.
#2  0x0000000000000000 in ?? ()
No symbol table info available.

Thread 2 (Thread 0x7fc96a6005c0 (LWP 21123)):
#0  0x00007fc9688f0017 in pthread_join () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a157551 in event_dispatch_epoll (event_pool=0x27d2028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:809
        i = 2
        t_id = 140502802642688
        pollercount = 2
        ret = 0
        ev_data = 0x2802e28
        __FUNCTION__ = "event_dispatch_epoll"
#2  0x00007fc96a117705 in gf_event_dispatch (event_pool=0x27d2028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event.c:115
        ret = -1
        __FUNCTION__ = "gf_event_dispatch"
#3  0x00007fc96a18242c in gf_io_legacy_wait () at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/gf-io-legacy.c:35
No locals.
#4  0x00007fc96a17c970 in gf_io_main (workers=0, handlers=0x7ffd0d0ad4c0, data=0x0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/gf-io.c:431
        signals = {11, 7, 4, 31, 8, 6, 18, 0}
        pool = {mutex = {__data = {__lock = 4213136, __count = 0, __owner = 218813904, __nusers = 32765, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0xd0ac3f0}}, __size = "\220I@\000\000\000\000\000\320\325\n\r\375\177", '\000' <repeats 18 times>, "\360\303\n\r\000\000\000", __align = 4213136}, threads = {next = 0x7fc96a3e87e0 <gf_io_engine_legacy>, prev = 0x0}}
        cfg = {name = 0x0, pool = 0x0, cpus = 0x0, setup = 0x0, main = 0x0, signals = 0x7fc95f303000, num_threads = 0, stack_size = 0, priority = 0, first_id = 0, index = 0, timeout = 0, retries = 0}
        res = 0
#5  0x00007fc96a17ccc2 in gf_io_run (name=0x0, handlers=0x7ffd0d0ad4c0, data=0x0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/gf-io.c:516
        engine = 0x7fc96a3e87e0 <gf_io_engine_legacy>
        i = 0
        res = 0
        __FUNCTION__ = "gf_io_run"
#6  0x000000000040c50f in ?? ()
No symbol table info available.
#7  0x0000000000000000 in ?? ()
No symbol table info available.

Thread 1 (Thread 0x7fc95c244700 (LWP 21129)):
#0  _gf_log (domain=0x7fc96a1bb236 "logging-infra", file=0x7fc96a1babf0 "/home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c", function=0x7fc96a1bb5d0 <__FUNCTION__.12655> "gf_log_flush_extra_msgs", line=1656, level=GF_LOG_DEBUG, fmt=0x7fc96a1bb1f8 "Log buffer size reduced. About to flush %d extra log messages") at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:2070
        basename = 0x7fc96a1bac34 "logging.c"
        new_logfile = 0x0
        ap = {{gp_offset = 48, fp_offset = 48, overflow_arg_area = 0x7fc95c2413b8, reg_save_area = 0x7fc95c2412e0}}
        timestr = "2024-05-29 09:58:57.503978 +0000", '\000' <repeats 223 times>
        tv = {tv_sec = 1716976737, tv_usec = 503978}
        logline = 0x0
        msg = 0x27f6d20 "Log buffer size reduced. About to flush 5 extra log messages"
        ret = 0
        fd = -1
        this = 0x28c0428
        ctx = 0x28c0428
        __PRETTY_FUNCTION__ = "_gf_log"
        __FUNCTION__ = "_gf_log"
#1  0x00007fc96a0e58b5 in gf_log_flush_extra_msgs (log=0x27842e0, new=0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:1653
        count = 5
        i = 5
        iter = 0x27842f0
        tmp = 0x27842f0
        copy = {next = 0x27d0128, prev = 0x27d0628}
        __FUNCTION__ = "gf_log_flush_extra_msgs"
#2  0x00007fc96a0e244c in gf_log_set_log_buf_size (ctx=0x2784000, buf_size=0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:258
        old = 5
#3  0x00007fc96a0e2896 in gf_log_disable_suppression_before_exit (ctx=0x2784000) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:402
No locals.
#4  0x00007fc96a0eca33 in gf_print_trace (signum=11, ctx=0x2784000) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/common-utils.c:650
        msg = '\000' <repeats 1023 times>
        timestr = '\000' <repeats 255 times>
        stack = 0x0
#5  0x000000000040b594 in ?? ()
No symbol table info available.
#6  0x0000000002782000 in ?? ()
No symbol table info available.
#7  0x0000000b67d30881 in ?? ()
No symbol table info available.
#8  0x00007fc95c242060 in ?? ()
No symbol table info available.
#9  <signal handler called>
No symbol table info available.
#10 0x0000000cdeadc0de in ?? ()
No symbol table info available.
#11 0x00007fc96a0dfbe1 in xlator_notify (xl=0x28c0428, event=6, data=0x28bf228) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/xlator.c:714
        old_THIS = 0x28bf228
        ret = 0
#12 0x00007fc96a1b8da0 in default_notify (this=0x28bf228, event=6, data=0x28b6828) at defaults.c:3387
        parent = 0x2b02528
        ret = 0
        victim = 0x28b6828
        graph = 0x28144a8
        __FUNCTION__ = "default_notify"
#13 0x00007fc95ab15d03 in notify (this=0x28bf228, event=6, data=0x28b6828) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/debug/io-stats/src/io-stats.c:4332
        ret = 0
        args = {type = IOS_DUMP_TYPE_NONE, u = {logfp = 0x0, dict = 0x0}}
        output = 0x7fc95ab154de <notify>
        dict = 0x28b6828
        op = 0
        list_cnt = 0
        throughput = 0
        time = 0
        is_peek = false
        ap = {{gp_offset = 32, fp_offset = 48, overflow_arg_area = 0x7fc95c242330, reg_save_area = 0x7fc95c242270}}
        up_data = 0x0
        up_ci = 0x0
        __FUNCTION__ = "notify"
#14 0x00007fc96a0dfbe1 in xlator_notify (xl=0x28bf228, event=6, data=0x28b6828) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/xlator.c:714
        old_THIS = 0x28b6828
        ret = 0
#15 0x00007fc96a1b8da0 in default_notify (this=0x28b6828, event=6, data=0x28b3228) at defaults.c:3387
        parent = 0x2803928
        ret = 0
        victim = 0x28b3228
        graph = 0x28144a8
        __FUNCTION__ = "default_notify"
#16 0x00007fc95adb988a in afr_notify (this=0x28b6828, event=6, data=0x28b3228, data2=0x7fc95adbea0e <notify>) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/cluster/afr/src/afr-common.c:6424
        priv = 0x27cce28
        child_xlator = 0x28b3228
        i = 2
        propagate = 1
        had_heard_from_all = 0
        have_heard_from_all = 1
        idx = 1
        ret = 0
        call_psh = 0
        up_child = -1
        input = 0x0
        output = 0x0
        had_quorum = false
        has_quorum = false
        halo_max_latency_msec = 0
        child_latency_msec = -1
        __FUNCTION__ = "afr_notify"
#17 0x00007fc95adbeb18 in notify (this=0x28b6828, event=6, data=0x28b3228) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/cluster/afr/src/afr.c:42
        ret = -1
        ap = {{gp_offset = 32, fp_offset = 48, overflow_arg_area = 0x7fc95c242730, reg_save_area = 0x7fc95c242670}}
        data2 = 0x7fc95adbea0e <notify>
#18 0x00007fc96a0dfbe1 in xlator_notify (xl=0x28b6828, event=6, data=0x28b3228) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/xlator.c:714
        old_THIS = 0x28b3228
        ret = 0
#19 0x00007fc96a1b8da0 in default_notify (this=0x28b3228, event=6, data=0x2858848) at defaults.c:3387
        parent = 0x2803828
        ret = 0
        victim = 0x2858848
        graph = 0x28144a8
        __FUNCTION__ = "default_notify"
#20 0x00007fc95afee5d6 in client_notify_dispatch (this=0x28b3228, event=6, data=0x2858848) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/protocol/client/src/client.c:146
        ret = -1
        ctx = 0x2784000
        conf = 0x27ce9c8
#21 0x00007fc95afee4a9 in client_notify_dispatch_uniq (this=0x28b3228, event=6, data=0x2858848) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/protocol/client/src/client.c:118
        conf = 0x27ce9c8
        ctx = 0x2784000
        graph = 0x28144a8
#22 0x00007fc95affd048 in client_rpc_notify (rpc=0x2858848, mydata=0x28b3228, event=RPC_CLNT_DISCONNECT, data=0x0) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/protocol/client/src/client.c:2275
        this = 0x28b3228
        conf = 0x27ce9c8
        is_parent_down = false
        ret = 0
        __FUNCTION__ = "client_rpc_notify"
#23 0x00007fc969e8596c in rpc_clnt_handle_disconnect (clnt=0x2858848, conn=0x2858878) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-lib/src/rpc-clnt.c:785
        ts = {tv_sec = 0, tv_nsec = 0}
        unref_clnt = false
        pre_notify_gen = 0
        post_notify_gen = 0
        __FUNCTION__ = "rpc_clnt_handle_disconnect"
#24 0x00007fc969e85c2b in rpc_clnt_notify (trans=0x2851ba8, mydata=0x2858878, event=RPC_TRANSPORT_DISCONNECT, data=0x2851ba8) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-lib/src/rpc-clnt.c:846
        conn = 0x2858878
        clnt = 0x2858848
        ret = -1
        req_info = 0x0
        pollin = 0x0
        clnt_mydata = 0x0
        old_THIS = 0x28b3228
        __FUNCTION__ = "rpc_clnt_notify"
#25 0x00007fc969e822ba in rpc_transport_notify (this=0x2851ba8, event=RPC_TRANSPORT_DISCONNECT, data=0x2851ba8) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-lib/src/rpc-transport.c:504
        ret = -1
        __FUNCTION__ = "rpc_transport_notify"
#26 0x00007fc95d8e422c in socket_event_poll_err (this=0x2851ba8, gen=1, idx=3) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-transport/socket/src/socket.c:1366
        priv = 0x27836a8
        socket_closed = true
#27 0x00007fc95d8e9124 in socket_event_handler (fd=12, idx=3, gen=1, data=0x2851ba8, poll_in=1, poll_out=0, poll_err=16, event_thread_died=0) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-transport/socket/src/socket.c:2821
        sa = 0x2851c68
        this = 0x2851ba8
        priv = 0x27836a8
        ret = -1
        ctx = 0x2784000
        socket_closed = false
        notify_handled = false
        __FUNCTION__ = "socket_event_handler"
#28 0x00007fc96a156d94 in event_dispatch_epoll_handler (event_pool=0x27d2028, event=0x7fc95c243050) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:614
        ev_data = 0x7fc95c243054
        slot = 0x27d6238
        handler = 0x7fc95d8e8ac8 <socket_event_handler>
        data = 0x2851ba8
        idx = 3
        gen = 1
        ret = 0
        fd = 12
        handled_error_previously = false
        __FUNCTION__ = "event_dispatch_epoll_handler"
#29 0x00007fc96a1572aa in event_dispatch_epoll_worker (data=0x2802de8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:725
        event = {events = 17, data = {ptr = 0x100000003, fd = 3, u32 = 3, u64 = 4294967299}}
        ret = 1
        ev_data = 0x2802de8
        event_pool = 0x27d2028
        myindex = 1
        timetodie = 0
        gen = 0
        poller_death_notify = {next = 0x0, prev = 0x0}
        slot = 0x0
        tmp = 0x0
        __FUNCTION__ = "event_dispatch_epoll_worker"
#30 0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#31 0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

mgmt-handshake-and-volume-sync-post-glusterd-restart-iteration-1.tar.gz

mykaul avatar May 30 '24 05:05 mykaul