nginx flask uwsgi nginx-reverse-proxy nginx-config

NGINX/uWSGI reverse proxy 502 Bad Gateway under load

I'm using NGINX as a reverse proxy for a uWSGI server running a Flask app. Most requests are handled successfully but after about under load testing at 500 concurrent connections, some requests (but not all!) are dropped with a 502 Bad Gateway error.

According to the error log, it seems as though NGINX has trouble sending the connection through to uWSGI. I don't know why this fails, however, as I am easily running 100 uWSGI processes, each of which have a listen backlog of 300.

I've adjusted my net.core.somaxconn to 4096. My nginx.conf looks like this:

    user www-data;
    worker_processes auto;
    pid /run/nginx.pid;
    include /etc/nginx/modules-enabled/*.conf;
    worker_rlimit_nofile 65535;

    events {
        worker_connections 65535;
        multi_accept on;
    }

    http {

    ##
    # Basic Settings
    ##

    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout 65;
    types_hash_max_size 2048;
    # server_tokens off;

    # server_names_hash_bucket_size 64;
    # server_name_in_redirect off;

    include /etc/nginx/mime.types;
    default_type application/octet-stream;

    ##
    # SSL Settings
    ##

    ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE
    ssl_prefer_server_ciphers on;

    ##
    # Logging Settings
    ##

    access_log /var/log/nginx/access.log;
    error_log /var/log/nginx/error.log;

    ##
    # Gzip Settings
    ##

    gzip on;

    # gzip_vary on;
    # gzip_proxied any;
    # gzip_comp_level 6;
    # gzip_buffers 16 8k;
    # gzip_http_version 1.1;
    # gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;

    ##
    # Virtual Host Configs
    ##

    include /etc/nginx/conf.d/*.conf;
    include /etc/nginx/sites-enabled/*;

    }

My sites-available config looks like this (with the real url replaced with myservice.com):

server {
server_name myservice.com www.myservice.com;
client_header_buffer_size 64k;
large_client_header_buffers 8 64k;

location / {
    include uwsgi_params;
    uwsgi_pass unix:/home/ubuntu/api_master/api.sock;
    client_max_body_size 500M;
}

listen 443 ssl backlog=65535; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/myservice.com/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/myservice.com/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot




}
server {
if ($host = www.myservice.com) {
    return 301 https://$host$request_uri;
} # managed by Certbot


if ($host = myservice.com) {
    return 301 https://$host$request_uri;
} # managed by Certbot


listen 80;
server_name myservice.com www.myservice.com;
return 404; # managed by Certbot




}

Help would be greatly appreciated! It would be great if I could get to 4,096+ concurrent connections on this server; I'm sure the hardware can handle it.

Solution

Found the solution! Turns out this was an issue with uWSGI, not NGINX. My incorrect assumption was that the listen backlog was per process, as opposed to in aggregate, and increasing that limit resolved the issue.