Search code examples
csockets

How to gracefully handle accept() giving EMFILE and close the connection?


When a process runs out of file descriptors, accept() will fail and set errno to EMFILE. However the underlying connection that would have been accepted are not closed, so there appears to be no way to inform the client that the application code could not handle the connection.

The question is what is the proper action to take regarding accepting TCP connections when running out of file descriptors.

The following code demonstrates the issue that I want to learn how to best deal with(note this is just example code for demonstrating the issue/question, not production code)

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>


static void err(const char *str)
{
    perror(str);
    exit(1);
}


int main(int argc,char *argv[])
{
    int serversocket;
    struct sockaddr_in serv_addr;
    serversocket = socket(AF_INET,SOCK_STREAM,0);
    if(serversocket < 0)
        err("socket()");

    memset(&serv_addr,0,sizeof serv_addr);

    serv_addr.sin_family = AF_INET;
    serv_addr.sin_addr.s_addr= INADDR_ANY;
    serv_addr.sin_port = htons(6543);
    if(bind(serversocket,(struct sockaddr*)&serv_addr,sizeof serv_addr) < 0)
        err("bind()");

    if(listen(serversocket,10) < 0)
        err("listen()");

    for(;;) {
        struct sockaddr_storage client_addr;
        socklen_t client_len = sizeof client_addr;
        int clientfd;

        clientfd = accept(serversocket,(struct sockaddr*)&client_addr,&client_len);
        if(clientfd < 0)  {
            continue;
        }

    }

    return 0;
}

Compile and run this code with a limited number of file descriptors available:

gcc srv.c
ulimit -n 10
strace -t ./a.out 2>&1 |less

And in another console, I run

 telnet localhost 65432 &

As many times as needed until accept() fails:

The output from strace shows this to happen:

13:21:12 socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3
13:21:12 bind(3, {sa_family=AF_INET, sin_port=htons(6543), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
13:21:12 listen(3, 10)                  = 0
13:21:12 accept(3, {sa_family=AF_INET, sin_port=htons(43630), sin_addr=inet_addr("127.0.0.1")}, [128->16]) = 4
13:21:19 accept(3, {sa_family=AF_INET, sin_port=htons(43634), sin_addr=inet_addr("127.0.0.1")}, [128->16]) = 5
13:21:22 accept(3, {sa_family=AF_INET, sin_port=htons(43638), sin_addr=inet_addr("127.0.0.1")}, [128->16]) = 6
13:21:23 accept(3, {sa_family=AF_INET, sin_port=htons(43642), sin_addr=inet_addr("127.0.0.1")}, [128->16]) = 7
13:21:24 accept(3, {sa_family=AF_INET, sin_port=htons(43646), sin_addr=inet_addr("127.0.0.1")}, [128->16]) = 8
13:21:26 accept(3, {sa_family=AF_INET, sin_port=htons(43650), sin_addr=inet_addr("127.0.0.1")}, [128->16]) = 9
13:21:27 accept(3, 0xbfe718f4, [128])   = -1 EMFILE (Too many open files)
13:21:27 accept(3, 0xbfe718f4, [128])   = -1 EMFILE (Too many open files)
13:21:27 accept(3, 0xbfe718f4, [128])   = -1 EMFILE (Too many open files)
13:21:27 accept(3, 0xbfe718f4, [128])   = -1 EMFILE (Too many open files)
 ... and thousands upon thousands of more accept() failures.

Basically at this point:

  • the code will call accept() as fast as possible failing to accept the same TCP connection over and over again, churning CPU.
  • the client will stay connected, (as the TCP handshake completes before the application accepts the connection) and the client gets no information that there is an issue.

So,

  1. Is there a way to force the TCP connection that caused accept() to fail to be closed (so e.g. the client can be quickly informed and perhaps try another server )

  2. What is the est practice to prevent the server code to go into an infinite loop when this situation arises (or to prevent the situation altogether)


Solution

  • You can set aside an extra fd at the beginning of your program and keep track of the EMFILE condition:

    int reserve_fd;
    _Bool out_of_fd = 0;
    
    if(0>(reserve_fd = dup(1)))
        err("dup()");
    

    Then, if you hit the EMFILE condition, you can close the reserve_fd and use its slot to accept the new connection (which you'll then immediately close):

    clientfd = accept(serversocket,(struct sockaddr*)&client_addr,&client_len);
    if (out_of_fd){
        close(clientfd);
        if(0>(reserve_fd = dup(1)))
            err("dup()");
        out_of_fd=0;
    
        continue; /*doing other stuff that'll hopefully free the fd*/
    }
    
    if(clientfd < 0)  {
        close(reserve_fd);
        out_of_fd=1;
        continue;
    }
    

    Complete example:

    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <errno.h>
    #include <string.h>
    #include <sys/socket.h>
    #include <netinet/in.h>
    #include <arpa/inet.h>
    
    
    static void err(const char *str)
    {
        perror(str);
        exit(1);
    }
    
    
    int main(int argc,char *argv[])
    {
        int serversocket;
        struct sockaddr_in serv_addr;
        serversocket = socket(AF_INET,SOCK_STREAM,0);
        if(serversocket < 0)
            err("socket()");
        int yes;
        if ( -1 == setsockopt(serversocket, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)) )
            perror("setsockopt");
    
    
        memset(&serv_addr,0,sizeof serv_addr);
    
        serv_addr.sin_family = AF_INET;
        serv_addr.sin_addr.s_addr= INADDR_ANY;
        serv_addr.sin_port = htons(6543);
        if(bind(serversocket,(struct sockaddr*)&serv_addr,sizeof serv_addr) < 0)
            err("bind()");
    
        if(listen(serversocket,10) < 0)
            err("listen()");
    
        int reserve_fd;
        int out_of_fd = 0;
    
        if(0>(reserve_fd = dup(1)))
            err("dup()");
    
    
        for(;;) {
            struct sockaddr_storage client_addr;
            socklen_t client_len = sizeof client_addr;
            int clientfd;
    
    
            clientfd = accept(serversocket,(struct sockaddr*)&client_addr,&client_len);
            if (out_of_fd){
                close(clientfd);
                if(0>(reserve_fd = dup(1)))
                    err("dup()");
                out_of_fd=0;
    
                continue; /*doing other stuff that'll hopefully free the fd*/
            }
    
            if(clientfd < 0)  {
                close(reserve_fd);
                out_of_fd=1;
                continue;
            }
    
        }
    
        return 0;
    }
    

    If you're multithreaded, then I imagine you'd need a lock around fd-producing functions and take it when you close the extra fd (while expecting to accept the final connection) in order to prevent having the spare slot filled by another thread.

    All this should only makes sense if 1) the listening socket isn't shared with other processes (which might not have hit their EMFILE limit yet) and 2) the server deals with persistent connections (because if it doesn't, then you're bound to close some existing connection very soon, freeing up a fd slot for your next attempt at accept).