nginx-amplify-agent icon indicating copy to clipboard operation
nginx-amplify-agent copied to clipboard

Excessive CPU usage by nginx-amplify-agent

Open dtmdl opened this issue 3 years ago • 0 comments

I am running several instances of nginx inside containers on a RHEL 8.3 host.

As recommended, I am running a separate instance of nginx-amplfiy-agent in each container.

However, each instance seems to be stuck in a busy-loop calling epoll_wait -- which immediately wakes up again as the fd it is waiting on with EPOLLOUT is a datagram socket and is always ready to write.

Dockerfile
FROM docker.io/nginxinc/nginx-unprivileged:1.20.0

USER root

# Install the NGINX Amplify Agent
RUN apt-get update \
    && apt-get install -qqy curl python apt-transport-https apt-utils gnupg1 procps \
    && echo 'deb https://packages.amplify.nginx.com/debian/ stretch amplify-agent' > /etc/apt/sources.list.d/nginx-amplify.list \
    && curl -fs https://nginx.org/keys/nginx_signing.key | apt-key add - > /dev/null 2>&1 \
    && apt-get update \
    && apt-get install -qqy nginx-amplify-agent \
    && apt-get purge -qqy curl apt-transport-https apt-utils gnupg1 \
    && rm -rf /etc/apt/sources.list.d/nginx-amplify.list \
    && rm -rf /var/lib/apt/lists/* \
    && rm -rf /etc/amplify-agent /etc/nginx 

COPY etc/ /etc/

RUN chown -R nginx:root /etc/amplify-agent /etc/nginx

USER nginx
COPY docker-entrypoint.sh /
strace output
# strace -r -p 77269
strace: Process 77269 attached
     0.000000 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=661927298}) = 0
     0.000081 getpid()                  = 85
     0.000076 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=662078238}) = 0
     0.000066 epoll_wait(3, [], 64, 89) = 0
     0.089308 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=751462807}) = 0
     0.000213 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=751672312}) = 0
     0.000100 getpid()                  = 85
     0.000078 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53626961657866}}) = 0
     0.000085 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=751934606}) = 0
     0.000079 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3232) = 1
     0.000095 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752109774}) = 0
     0.000088 getpid()                  = 85
     0.000133 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752337006}) = 0
     0.000090 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000146 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752571876}) = 0
     0.000077 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3232) = 1
     0.000087 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752732963}) = 0
     0.000073 getpid()                  = 85
     0.000150 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=752979938}) = 0
     0.000112 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000073 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753142936}) = 0
     0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3231) = 1
     0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753274964}) = 0
     0.000072 getpid()                  = 85
     0.000105 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753454622}) = 0
     0.000084 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753608025}) = 0
     0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3231) = 1
     0.000067 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753742367}) = 0
     0.000071 getpid()                  = 85
     0.000133 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=753977549}) = 0
     0.000117 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754136673}) = 0
     0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3230) = 1
     0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754268796}) = 0
     0.000072 getpid()                  = 85
     0.000131 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754507688}) = 0
     0.000113 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754658447}) = 0
     0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3230) = 1
     0.000067 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754792677}) = 0
     0.000073 getpid()                  = 85
     0.000104 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=754971654}) = 0
     0.000081 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000072 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755123842}) = 0
     0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3229) = 1
     0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755255059}) = 0
     0.000071 getpid()                  = 85
     0.000129 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755491612}) = 0
     0.000114 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755643024}) = 0
     0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3229) = 1
     0.000067 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=755777152}) = 0
     0.000072 getpid()                  = 85
     0.000134 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756019775}) = 0
     0.000113 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000069 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756168069}) = 0
     0.000064 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3228) = 1
     0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756298871}) = 0
     0.000070 getpid()                  = 85
     0.000180 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756554572}) = 0
     0.000084 epoll_ctl(3, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT, {u32=10, u64=53631256625162}}) = -1 EEXIST (File exists)
     0.000071 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756705977}) = 0
     0.000065 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 3228) = 1
     0.000066 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756837464}) = 0
     0.000071 getpid()                  = 85
     0.000087 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=756997574}) = 0
     0.000092 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=757090311}) = 0
     0.000066 epoll_wait(3, [{EPOLLOUT, {u32=10, u64=53626961657866}}], 64, 99) = 1
     0.000066 epoll_ctl(3, EPOLL_CTL_DEL, 10, 0x5634213c2ec0) = 0
     0.000062 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=757282743}) = 0
     0.000061 getpid()                  = 85
     0.000058 clock_gettime(CLOCK_MONOTONIC, {tv_sec=11355, tv_nsec=757402387}) = 0
     0.000061 epoll_wait(3, ^Cstrace: Process 77269 detached
 <detached ...>

N.B. file descriptor 10 is (according to lsof):

COMMAND     PID     USER   FD      TYPE DEVICE SIZE/OFF      NODE NAME
amplify-a 77269   362244   10u     sock    0,9      0t0    582467 protocol: UDP

dtmdl avatar May 13 '21 16:05 dtmdl