nchan messages intermittent with redis (AWS Elasticache for redis)
Hello, I think there might be a bug with the code, although I cant confirm. We are looking to horizontally scale our nginx servers so we tried to attach redis. Unfortunately, it seems that messages are either lost, or intermittently not being understood. When using in-memory there are no issues. I'm not sure if this is because of elasticache or something else. I'm willing to help troubleshoot.
My Config files look like this: Server Include:
listen 80 ;
#listen [::]:80 ;
listen 443 ssl ;
ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
ssl_ciphers DEFAULT:+MEDIUM:!RC4:!3DES:!MD5!SHA1;
root /var/www/html;
autoindex off ;
index index.html index.htm ;
ssl_trusted_certificate /etc/nginx/iftapi_client_ca.crt ;
ssl_verify_client optional_no_ca ;
location ~ ^/a/([a-z]*)$ {
fastcgi_param QUERY_STRING $query_string ;
fastcgi_param REQUEST_METHOD $request_method ;
fastcgi_param CGI_FUNCTION $1 ;
fastcgi_param CGI_HTTP_AUTHORIZATION $http_authorization ;
fastcgi_param IP_ADDR $remote_addr ;
fastcgi_param USERAGENT $http_user_agent ;
fastcgi_param CLIENT_CERT_VERIFY $ssl_client_verify ;
fastcgi_param CLIENT_CERT_FINGERPRINT $ssl_client_fingerprint ;
fastcgi_pass unix:/var/www/sockets/fcgiserv-socket;
}
location ~ ^/a/([A-Z0-9]*)/applongpoll$ {
nchan_redis_pass primary_redis_cluster;
nchan_subscriber;
nchan_message_timeout 30s;
nchan_message_buffer_length 1;
nchan_subscriber_timeout 57;
nchan_subscriber_compound_etag_message_id on;
nchan_channel_id "ch_s_$1" ;
nchan_authorize_request "/a/$1/appauth" ;
}
location ~ ^/a/([A-Za-f0-9]*)/([a-z]*)$ {
fastcgi_param QUERY_STRING $query_string ;
fastcgi_param REQUEST_METHOD $request_method ;
fastcgi_param CGI_FUNCTION $2 ;
fastcgi_param CGI_SERIAL $1 ;
fastcgi_param CGI_AUTH none ;
fastcgi_param CGI_HTTP_AUTHORIZATION $http_authorization ;
fastcgi_param IP_ADDR $remote_addr ;
fastcgi_param USERAGENT $http_user_agent ;
fastcgi_pass unix:/var/www/sockets/fcgiserv-socket;
}
location ~ ^/a/([A-Z0-9]*)/([A-Z0-9]*)/longpoll$ {
nchan_redis_pass primary_redis_cluster;
nchan_subscriber;
nchan_message_timeout 60s;
nchan_message_buffer_length 10;
nchan_subscriber_timeout 295;
nchan_subscriber_compound_etag_message_id on;
nchan_channel_id "ch_$1" ;
nchan_authorize_request "/a/$1/$2/auth" ;
}
location ~ ^/a/([A-Z0-9]*)/([A-Z0-9]*)/longpoll2$ {
nchan_redis_pass primary_redis_cluster;
nchan_subscriber;
nchan_message_timeout 60s;
nchan_message_buffer_length 10;
nchan_subscriber_timeout 25;
nchan_subscriber_compound_etag_message_id on;
nchan_channel_id "ch_$1" ;
nchan_authorize_request "/a/$1/$2/auth" ;
}
location ~ ^/a/([A-Za-f0-9]*)/([A-Za-f0-9]*)/([a-z]*)$ {
fastcgi_param QUERY_STRING $query_string ;
fastcgi_param REQUEST_METHOD $request_method ;
fastcgi_param CGI_FUNCTION $3 ;
fastcgi_param CGI_SERIAL $1 ;
fastcgi_param CGI_AUTH $2 ;
fastcgi_param IP_ADDR $remote_addr ;
fastcgi_param USERAGENT $http_user_agent ;
fastcgi_pass unix:/var/www/sockets/fcgiserv-socket;
}
location / {
expires epoch ;
location /fw/ {
autoindex on ;
}
charset us-ascii;
try_files $uri $uri/ =404;
}
Publisher:
server {
listen unix:/var/www/sockets/pubsocket ;
root /doesnotexist;
server_name pub.iftapi.net;
nchan_message_timeout 10s;
nchan_message_buffer_length 10;
location ~ ^/a/([A-Z0-9]*)/applongpost$ {
nchan_redis_pass primary_redis_cluster;
nchan_publisher;
nchan_message_timeout 30s;
nchan_message_buffer_length 1;
nchan_channel_id "ch_s_$1" ;
}
#deprecated - delete this one
location ~ ^/a/([A-Z0-9]*)/([A-Z0-9]*)/longpost$ {
nchan_redis_pass primary_redis_cluster;
nchan_publisher;
nchan_message_timeout 60s;
nchan_message_buffer_length 10;
nchan_channel_id "ch_$1" ;
}
location ~ ^/a/([A-Z0-9]*)/longpost$ {
nchan_redis_pass primary_redis_cluster;
nchan_publisher;
nchan_message_timeout 60s;
nchan_message_buffer_length 10;
nchan_channel_id "ch_$1" ;
}
}
nginx.conf:
user www-data;
worker_processes auto;
pid /run/nginx.pid;
include /etc/nginx/modules-enabled/*.conf;
events {
worker_connections 768;
}
http {
##
# Basic Settings
##
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
default_type application/octet-stream;
##
# SSL Settings
##
ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE
ssl_prefer_server_ciphers on;
##
# Logging Settings
##
access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;
##
# nchan Redis Settings
##
upstream primary_redis_cluster {
nchan_redis_server REDISURL;
}
##
# Gzip Settings
##
gzip on;
##
# Virtual Host Configs
##
include /etc/nginx/conf.d/*.conf;
include /etc/nginx/sites-enabled/*;
}
Each website that allows sbscribers:
server {
server_name SERVERNAME;
include /etc/nginx/server_include.conf ;
}
Hello, we are experience a similar issue. We used nchan in the beginning on a single EC2 machine with in-memory. For high availability are testing running the nchan on multiple pods in a EKS with redis (AWS Elasticache v5.0.6) as the backend and a NLB loadbalance. Most of the time everything runs smooth. Just sometimes the SSE messages are not received by the subscriber, and we get a 202 response for POST request of the publisher. The SSE connection is created in the checkout process. The problems goes away after the subscriber connects again. I'm willing to help troubleshoot. We are currently using nchan version 1.2.8 and ngnix version 1.18.0
config:
upstream redis_cluster {
nchan_redis_server ${REDIS_URL};
}
server {
root /var/www/phalanx/html;
index index.html;
# Listen on the server_name as specified via the ENV
server_name ${SERVER_NAME};
location / {
try_files $uri $uri/ =404;
}
#pubsub
location ~ /sub/((\w+|\-)+)$ {
nchan_subscriber;
nchan_channel_id $1;
nchan_redis_pass redis_cluster;
}
location ~ /pub/((\w+|\-)+)$ {
nchan_publisher;
nchan_channel_id $1;
nchan_message_timeout 10s;
nchan_message_buffer_length 20;
nchan_redis_pass redis_cluster;
}
location /nchan_stub_status {
nchan_stub_status;
}
listen 80;
}
Please try version 1.3.5, many Redis-related issues were fixed since 1.2.8