resource-agents
resource-agents copied to clipboard
storage_mon: child processes remain when stuck in read() etc
When an I/O unresponsive failure occurs, the child process of storage_mon remains.
# ./storage_mon --device /dev/mapper/mpathc --score 1 --verbose
Testing device /dev/mapper/mpathc
/dev/mapper/mpathc: opened with O_DIRECT, size=536870912000
/dev/mapper/mpathc: reading from pos 727385088
Thread for device /dev/mapper/mpathc did not complete in time
Final score is 1
# ps -ef | grep storage_mon
root 710237 697460 0 13:17 pts/1 00:00:00 ./storage_mon --device /dev/mapper/mpathc --score 1 --verbose
root 710238 710237 0 13:17 pts/1 00:00:00 ./storage_mon --device /dev/mapper/mpathc --score 1 --verbose
# ps -ef | grep storage_mon
root 710238 1 0 13:17 pts/1 00:00:00 ./storage_mon --device /dev/mapper/mpathc --score 1 --verbose
I got strace -ff -tt -T -o storage_mon ./storage_mon --device /dev/mapper/mpathc --score 1.
- parent process exits,
# cat storage_mon.629992 14:45:20.173791 execve("./storage_mon", ["./storage_mon", "--device", "/dev/mapper/mpathc", "--score", "1", "--verbose"], 0x7fff94c9e6a0 /* 26 vars */) = 0 <0.000605> (snip) 14:45:20.193591 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fba97913250) = 629993 <0.000246> 14:45:20.194002 wait4(629993, 0x7fff320ea9a8, WNOHANG|WSTOPPED|WCONTINUED, NULL) = 0 <0.000023> 14:45:20.194104 nanosleep({tv_sec=0, tv_nsec=100000000}, NULL) = 0 <0.100166> (snip) 14:45:29.962911 wait4(629993, 0x7fff320ea9a8, WNOHANG|WSTOPPED|WCONTINUED, NULL) = 0 <0.000127> 14:45:29.963242 nanosleep({tv_sec=0, tv_nsec=100000000}, NULL) = 0 <0.100199> 14:45:30.063761 openat(AT_FDCWD, "/etc/localtime", O_RDONLY|O_CLOEXEC) = 3 <0.000131> 14:45:30.064143 fstat(3, {st_mode=S_IFREG|0644, st_size=318, ...}) = 0 <0.000112> 14:45:30.064481 fstat(3, {st_mode=S_IFREG|0644, st_size=318, ...}) = 0 <0.000116> 14:45:30.064734 read(3, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0\0"..., 4096) = 318 <0.000023> 14:45:30.064838 lseek(3, -185, SEEK_CUR) = 133 <0.000015> 14:45:30.064904 read(3, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0\0"..., 4096) = 185 <0.000079> 14:45:30.065075 close(3) = 0 <0.000023> 14:45:30.065195 socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 3 <0.000030> 14:45:30.065276 connect(3, {sa_family=AF_UNIX, sun_path="/dev/log"}, 110) = 0 <0.000032> 14:45:30.065366 sendto(3, "<27>Sep 21 14:45:30 storage_mon:"..., 110, MSG_NOSIGNAL, NULL, 0) = 110 <0.000080> 14:45:30.065705 write(2, "Thread for device /dev/mapper/mp"..., 62) = 62 <0.000039> 14:45:30.065860 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x1), ...}) = 0 <0.000018> 14:45:30.065974 write(1, "Final score is 1\n", 17) = 17 <0.000071> 14:45:30.066644 exit_group(1) = ? 14:45:30.067288 +++ exited with 1 +++ - but child process is waiting for read() to return.
# tail -F storage_mon.629993 14:45:20.194158 set_robust_list(0x7fba97913260, 24) = 0 <0.000063> 14:45:20.194448 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x1), ...}) = 0 <0.000065> 14:45:20.194634 write(1, "Testing device /dev/mapper/mpath"..., 34) = 34 <0.000207> 14:45:20.194958 openat(AT_FDCWD, "/dev/mapper/mpathc", O_RDONLY|O_DIRECT) = 3 <0.000152> 14:45:20.195227 ioctl(3, BLKGETSIZE64, [536870912000]) = 0 <0.000018> 14:45:20.195314 write(1, "/dev/mapper/mpathc: opened with "..., 60) = 60 <0.000026> 14:45:20.195423 getpid() = 629993 <0.000016> 14:45:20.195511 lseek(3, 277049344, SEEK_SET) = 277049344 <0.000016> 14:45:20.195578 write(1, "/dev/mapper/mpathc: reading from"..., 47) = 47 <0.000022> 14:45:20.195654 ioctl(3, BLKSSZGET, [512]) = 0 <0.000017> 14:45:20.195754 read(3,
As a result, if allow-unhealthy-nodes=true is set,
pcs property set node-health-strategy=only-green
pcs resource create storage-mon ocf:heartbeat:storage-mon \
drives="/dev/mapper/mpathc" \
monitor timeout=60s interval=10s on-fail=restart
pcs resource clone storage-mon storage-mon-clone
pcs resource meta storage-mon-clone allow-unhealthy-nodes=true
each time the monitor is run, there will be more child processes that do not exit.
# ps -ef | grep storage_mon
UID PID PPID C STIME TTY TIME CMD
root 627631 1 0 14:20 ? 00:00:00 /usr/libexec/heartbeat/storage_mon --device /dev/mapper/mpathc --score 1 --timeout 10
root 627757 1 0 14:20 ? 00:00:00 /usr/libexec/heartbeat/storage_mon --device /dev/mapper/mpathc --score 1 --timeout 10
root 627892 1 0 14:21 ? 00:00:00 /usr/libexec/heartbeat/storage_mon --device /dev/mapper/mpathc --score 1 --timeout 10
root 628005 1 0 14:21 ? 00:00:00 /usr/libexec/heartbeat/storage_mon --device /dev/mapper/mpathc --score 1 --timeout 10
root 628030 1 0 14:21 ? 00:00:00 /usr/libexec/heartbeat/storage_mon --device /dev/mapper/mpathc --score 1 --timeout 10
(snip)
The kernel doesnt allow us to kill stuck processes, so this is not something we can fix in storage_mon.