nginx-amplify-agent
nginx-amplify-agent copied to clipboard
Excessive CPU usage by nginx-amplify-agent
I am running several instances of nginx inside containers on a RHEL 8.3 host.
As recommended, I am running a separate instance of nginx-amplfiy-agent in each container.
However, each instance seems to be stuck in a busy-loop calling epoll_wait -- which immediately wakes up again as the fd it is waiting on with EPOLLOUT is a datagram socket and is always ready to write.
Dockerfile
FROM docker.io/nginxinc/nginx-unprivileged:1.20.0
USER root
# Install the NGINX Amplify Agent
RUN apt-get update \
&& apt-get install -qqy curl python apt-transport-https apt-utils gnupg1 procps \
&& echo 'deb https://packages.amplify.nginx.com/debian/ stretch amplify-agent' > /etc/apt/sources.list.d/nginx-amplify.list \
&& curl -fs https://nginx.org/keys/nginx_signing.key | apt-key add - > /dev/null 2>&1 \
&& apt-get update \
&& apt-get install -qqy nginx-amplify-agent \
&& apt-get purge -qqy curl apt-transport-https apt-utils gnupg1 \
&& rm -rf /etc/apt/sources.list.d/nginx-amplify.list \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /etc/amplify-agent /etc/nginx
COPY etc/ /etc/
RUN chown -R nginx:root /etc/amplify-agent /etc/nginx
USER nginx
COPY docker-entrypoint.sh /
strace output
# strace -r -p 77269
strace: Process 77269 attached
0.000000 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=661927298}) = 0
0.000081 getpid() = 85
0.000076 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=662078238}) = 0
0.000066 epoll_wait(3, [], 64, 89) = 0
0.089308 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=751462807}) = 0
0.000213 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=751672312}) = 0
0.000100 getpid() = 85
0.000078 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53626961657866}}) = 0
0.000085 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=751934606}) = 0
0.000079 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3232) = 1
0.000095 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752109774}) = 0
0.000088 getpid() = 85
0.000133 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752337006}) = 0
0.000090 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000146 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752571876}) = 0
0.000077 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3232) = 1
0.000087 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752732963}) = 0
0.000073 getpid() = 85
0.000150 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752979938}) = 0
0.000112 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000073 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753142936}) = 0
0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3231) = 1
0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753274964}) = 0
0.000072 getpid() = 85
0.000105 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753454622}) = 0
0.000084 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753608025}) = 0
0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3231) = 1
0.000067 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753742367}) = 0
0.000071 getpid() = 85
0.000133 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753977549}) = 0
0.000117 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754136673}) = 0
0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3230) = 1
0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754268796}) = 0
0.000072 getpid() = 85
0.000131 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754507688}) = 0
0.000113 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754658447}) = 0
0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3230) = 1
0.000067 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754792677}) = 0
0.000073 getpid() = 85
0.000104 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754971654}) = 0
0.000081 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000072 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755123842}) = 0
0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3229) = 1
0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755255059}) = 0
0.000071 getpid() = 85
0.000129 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755491612}) = 0
0.000114 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755643024}) = 0
0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3229) = 1
0.000067 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755777152}) = 0
0.000072 getpid() = 85
0.000134 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756019775}) = 0
0.000113 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000069 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756168069}) = 0
0.000064 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3228) = 1
0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756298871}) = 0
0.000070 getpid() = 85
0.000180 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756554572}) = 0
0.000084 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756705977}) = 0
0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3228) = 1
0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756837464}) = 0
0.000071 getpid() = 85
0.000087 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756997574}) = 0
0.000092 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=757090311}) = 0
0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 99) = 1
0.000066 epoll_ctl(3, EPOLL_CTL_DEL, 10, 0x5634213c2ec0) = 0
0.000062 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=757282743}) = 0
0.000061 getpid() = 85
0.000058 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=757402387}) = 0
0.000061 epoll_wait(3, ^Cstrace: Process 77269 detached
<detached ...>
N.B. file descriptor 10 is (according to lsof):
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
amplify-a 77269 362244 10u sock 0,9 0t0 582467 protocol: UDP