grpc-rs icon indicating copy to clipboard operation
grpc-rs copied to clipboard

gRPC looks like hanging.

Open Percivalll opened this issue 4 years ago • 3 comments

Describe the bug In some unknown circumstances (more often the network environment is unstable), the grpc client seems to be deadlocked. To Reproduce Steps to reproduce the behavior: I am currently unable to reproduce this problem stably.But I may give some useful information. Strace the process:

pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=23833744} <unfinished ...>
[pid 1406645] <... nanosleep resumed> 0x7eff9557ce80) = 0
[pid 1406645] read(17, "Sep 20 10:37:01 n63-070-024 CRON"..., 8192) = 199
[pid 1406645] lseek(17, 4490814, SEEK_SET) = 4490814
[pid 1406645] read(17, "Sep 20 10:37:01 n63-070-024 CRON"..., 8192) = 94
[pid 1406645] lseek(17, 4490908, SEEK_SET) = 4490908
[pid 1406645] read(17, "", 8192)        = 0
[pid 1406645] open("/var/log/auth.log", O_RDONLY|O_CLOEXEC) = 23
[pid 1406645] fcntl(23, F_SETFD, FD_CLOEXEC) = 0
[pid 1406645] fstat(23, {st_mode=S_IFREG|0640, st_size=4490908, ...}) = 0
[pid 1406645] close(23)                 = 0
[pid 1406645] nanosleep({tv_sec=1, tv_nsec=0},  <unfinished ...>
[pid 1404722] <... futex resumed> )     = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=20861971}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=72882151}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=94858896}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=12843522}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=70883962}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=6857263}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=120898002}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=11840025}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=11861492}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=12879115}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=1887330}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=17889917}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=18891225}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=69881358}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=16867428}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=55887305}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=8875376}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=2903761}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=6908340}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=43913209}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=36861745}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=14870291}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=54882728}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=14870553}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=10885869}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=33904903}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=21884679}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=45882872}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=57875568} <unfinished ...>
[pid 1406777] <... nanosleep resumed> 0x7eff84a42400) = 0
[pid 1406777] nanosleep({tv_sec=1, tv_nsec=0},  <unfinished ...>
[pid 1404722] <... futex resumed> )     = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=24856687} <unfinished ...>
[pid 1406645] <... nanosleep resumed> 0x7eff9557ce80) = 0
[pid 1406645] read(17, "", 8192)        = 0
[pid 1406645] open("/var/log/auth.log", O_RDONLY|O_CLOEXEC) = 23
[pid 1406645] fcntl(23, F_SETFD, FD_CLOEXEC) = 0
[pid 1406645] fstat(23, {st_mode=S_IFREG|0640, st_size=4490908, ...}) = 0
[pid 1406645] close(23)                 = 0
[pid 1406645] nanosleep({tv_sec=1, tv_nsec=0},  <unfinished ...>
[pid 1404722] <... futex resumed> )     = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=20868529}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=72886230}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=94864432}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=12851355}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=70895179}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=6857331}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=120887438}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=11846056}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=11857190}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=12928711}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=1878889}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=17899797}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=18889984}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=69889745}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=16869166}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=55881447}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=8865246}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=2887460}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=6906162}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=43906956}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=36880049}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=14870471}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=54868615}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=14869292}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=10880533}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=33890803}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=21881601}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=45882705}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=56881574} <unfinished ...>
[pid 1406777] <... nanosleep resumed> 0x7eff84a42400) = 0
[pid 1406777] nanosleep({tv_sec=1, tv_nsec=0},  <unfinished ...>
[pid 1404722] <... futex resumed> )     = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=25907823} <unfinished ...>
[pid 1406645] <... nanosleep resumed> 0x7eff9557ce80) = 0
[pid 1406645] read(17, "", 8192)        = 0
[pid 1406645] open("/var/log/auth.log", O_RDONLY|O_CLOEXEC) = 23
[pid 1406645] fcntl(23, F_SETFD, FD_CLOEXEC) = 0
[pid 1406645] fstat(23, {st_mode=S_IFREG|0640, st_size=4490908, ...}) = 0
[pid 1406645] close(23)                 = 0
[pid 1406645] nanosleep({tv_sec=1, tv_nsec=0},  <unfinished ...>
[pid 1404722] <... futex resumed> )     = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=20879075}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=72889231}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=94869069}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=12852949}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=70892541}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=6867709}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=120903519}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=11848193}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=11845513}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=12898211}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=1888127}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=17911249}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=18890798}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=69890334}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=16878145}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=55884624}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=8868782}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=2877992}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=6907969}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=43893704}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=36877702}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=14863362}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=54855569}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=14826917}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=10871913}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=33891450}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=21884848}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=45868536}) = -1 ETIMEDOUT (Connection timed out)
[pid 1404722] futex(0x7eff967d6504, FUTEX_WAIT_PRIVATE, 2, {tv_sec=0, tv_nsec=55853343} <unfinished ...>

Perf the process: image The comm of process 1404722 is "grpc_global_tim" which I don't know anything about..

System information

  • CPU architecture:x86_64 unknown musl
  • Distribution and kernel version: 5.4.19-amd64 --

Percivalll avatar Sep 20 '20 06:09 Percivalll

grpc_global_tim is the thread that drives timer inside grpc. Can you get a dump of stacktrace when deadlock happens?

BusyJay avatar Sep 21 '20 06:09 BusyJay

[] futex_wait_queue_me+0xc1/0x120 [] futex_wait+0xf6/0x250 [] do_futex+0x2fb/0xb20 [] SyS_futex+0x7a/0x170 [] do_syscall_64+0x68/0x100 [] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [] 0xffffffffffffffff From /proc/$pid/stack,might be helpful?

Percivalll avatar Sep 22 '20 06:09 Percivalll

Better a full stack of all threads. You can use command gdb path/to/binary pid -batch -ex "info threads" -ex "thread apply all bt".

BusyJay avatar Sep 22 '20 13:09 BusyJay