Search code examples
visual-studio-codewebserveresp32platformioesp-idf

ESP32 webserver slows/hangs with more than 1 device accessing pages


I'm writing a webserver for an ESP32, written primarily in C and using platformio and ESP-IDF on VSCode. With a single client, the server works great, but as soon as I try to load a page on more than 1 device (doesn't matter if all the same page or different), everything slows to a crawl. I can see the page requests arriving on the server nice and snappily, but the pages won't load/be sent for up to a minute.

When this is happening, the server seems to favour an arbitrary device; my android phone, for example, will load pages at normal speeds while my PC and iPhone will be stuck, or on another occasion the PC will be loading fast and the other two devices stuck. This makes me wonder if it's related to my server/socket handling task, but ESP-IDF is such a tangled web of dependencies I'm not sure where to begin looking. I've made a post on the Espressif forum, but haven't had any responses yet. It's not the wifi connection alone, as if I connect all 3 devices but only access the webpages from one, that device still loads everything fine.

Updates:

07/02 - I'm more and more convinced that the culprit is the socket task, as it doesn't seem to allow for multiple sockets simultaneously; after accepting one it handles everything for that one socket before looping back and accepting another one.

08/02 - I modified tcp_server_task to start a new task for each socket (below is now the modified function and additional task), but this if anything made things worse, leaving me more confused than before.

09/02 - I tried increasing the backlog argument of listen() from 1 to 32, this also made no difference.

Attempted (failed) solutions now include:

  • Setting keepAlive to 0

  • changing dest_addr_ip4 from INADDR_ANY to 192.168.4.1, the desired address for devices to connect to

  • pinning the socket handling task to a different core to the tcp_server_task

  • adding vTaskDelay(pdMS_TO_TICKS(x)) at various points with various values of x

Stuck devices are still consistently loading simultaneously after however long it takes (usually 30 to 60 seconds, sometimes longer)

10/02 - I forgot to mention, the ESP has bluetooth enabled as well, as we want users to be able to control stuff over both wireless methods.

socket handling task(s):

// wifi_functions.c
void tcp_server_task(void *pvParameters)
{
    char addr_str[128];
    int addr_family = (int)pvParameters;
    int ip_protocol = 0;
    int keepAlive = 0;
    int keepIdle = KEEPALIVE_IDLE;
    int keepInterval = KEEPALIVE_INTERVAL;
    int keepCount = KEEPALIVE_COUNT;
    struct sockaddr_storage dest_addr;

    if (addr_family == AF_INET) {
        struct sockaddr_in *dest_addr_ip4 = (struct sockaddr_in *)&dest_addr;
        dest_addr_ip4->sin_addr.s_addr = htonl(INADDR_ANY);
        dest_addr_ip4->sin_family = AF_INET;
        dest_addr_ip4->sin_port = htons(PORT);
        ip_protocol = IPPROTO_IP;
    }
#ifdef CONFIG_EXAMPLE_IPV6
    else if (addr_family == AF_INET6) {
        struct sockaddr_in6 *dest_addr_ip6 = (struct sockaddr_in6 *)&dest_addr;
        bzero(&dest_addr_ip6->sin6_addr.un, sizeof(dest_addr_ip6->sin6_addr.un));
        dest_addr_ip6->sin6_family = AF_INET6;
        dest_addr_ip6->sin6_port = htons(PORT);
        ip_protocol = IPPROTO_IPV6;
    }
#endif

    int listen_sock = socket(addr_family, SOCK_STREAM, ip_protocol);
    if (listen_sock < 0) {
        // ESP_LOGE(TAG, "Unable to create socket: errno %d", errno);
        vTaskDelete(NULL);
        return;
    }
    int opt = 1;
    setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
#if defined(CONFIG_EXAMPLE_IPV4) && defined(CONFIG_EXAMPLE_IPV6)
    // Note that by default IPV6 binds to both protocols, it is must be disabled
    // if both protocols used at the same time (used in CI)
    setsockopt(listen_sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt));
#endif

    // ESP_LOGI(TAG, "Socket created");

    int err = bind(listen_sock, (struct sockaddr *)&dest_addr, sizeof(dest_addr));
    if (err != 0) {
        // ESP_LOGE(TAG, "Socket unable to bind: errno %d", errno);
        // ESP_LOGE(TAG, "IPPROTO: %d", addr_family);
        goto CLEAN_UP;
    }
    // ESP_LOGI(TAG, "Socket bound, port %d", PORT);

    err = listen(listen_sock, 1);
    if (err != 0) {
        // ESP_LOGE(TAG, "Error occurred during listen: errno %d", errno);
        goto CLEAN_UP;
    }

    while (1) {

        // ESP_LOGI("socket task", "Socket listening");

        struct sockaddr_storage source_addr; // Large enough for both IPv4 or IPv6
        socklen_t addr_len = sizeof(source_addr);
        int sock = accept(listen_sock, (struct sockaddr *)&source_addr, &addr_len);
        if (sock < 0) {
            // ESP_LOGE(TAG, "Unable to accept connection: errno %d", errno);
            break;
        }

        set_sock(sock);

        // Set tcp keepalive option
        setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &keepAlive, sizeof(int));
        setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keepIdle, sizeof(int));
        setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keepInterval, sizeof(int));
        setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keepCount, sizeof(int));
        // Convert ip address to string
        if (source_addr.ss_family == PF_INET) {
            inet_ntoa_r(((struct sockaddr_in *)&source_addr)->sin_addr, addr_str, sizeof(addr_str) - 1);
        }
#ifdef CONFIG_EXAMPLE_IPV6
        else if (source_addr.ss_family == PF_INET6) {
            inet6_ntoa_r(((struct sockaddr_in6 *)&source_addr)->sin6_addr, addr_str, sizeof(addr_str) - 1);
        }
#endif
        // ESP_LOGI(TAG, "Socket accepted ip address: %s", addr_str);
        xTaskCreate(socket_handler_task, "socket_handler", 10*1024, &sock, 10, NULL);
    }

CLEAN_UP:
    close(listen_sock);
    vTaskDelete(NULL);
}

void socket_handler_task(void *params)
{
    int sock = *((int*)params);
    do_retransmit(sock);

    shutdown(sock, 0);
    close(sock);
    vTaskDelete(NULL);
}

...

// main.c
xTaskCreatePinnedToCore(tcp_server_task, "tcp_server1", 4096, (void*)AF_INET, 10, NULL, 1);

Changing priority or switching to core 0 doesn't appear to have any impact.

This function is part of the hppt server that handles file/page requests:

// file_server.c
/* Handler to download a file kept on the server */
static esp_err_t download_get_handler(httpd_req_t *req)
{
    char filepath[FILE_PATH_MAX];
    FILE *fd = NULL;
    struct stat file_stat;

    const char *filename = get_path_from_uri(filepath, ((struct file_server_data *)req->user_ctx)->base_path,
                                             req->uri, sizeof(filepath));

    esp_log_buffer_char_internal("path", filename, strlen(filename), ESP_LOG_INFO);
    if (!filename) {
        // ESP_LOGE(TAG, "Filename is too long");
        /* Respond with 500 Internal Server Error */
        httpd_resp_send_err(req, HTTPD_500_INTERNAL_SERVER_ERROR, "Filename too long");
        return ESP_FAIL;
    }
    if(strlen(filename) == 1 || strcmp(filename, "/index.html") == 0){
        strcpy(filepath, "/spiffs/index.htm");
    }
    /* If name has trailing '/', respond with directory contents */
    if (filename[strlen(filename) - 1] == '/') {
        return http_resp_dir_html(req, filepath);
    }
    if(strcmp(filename, "/setup.txt") == 0){
        httpd_resp_send_err(req, HTTPD_403_FORBIDDEN, "This file is not accessible");
        return ESP_FAIL;
    }
    if (stat(filepath, &file_stat) == -1) {
        /* If file not present on SPIFFS check if URI
         * corresponds to one of the hardcoded paths */
        if (strcmp(filename, "/favicon.ico") == 0) {
            return favicon_get_handler(req);
        } else if (strcmp(filename, "/config") == 0) {
            return config_get_handler(req);
        } else if (strcmp(filename, "/rits") == 0) {
            return rits_get_handler(req);
        } else if (strcmp(filename, "/setup") == 0) {
            return setup_get_handler(req);
        } else if (strcmp(filename, "/access") == 0) {
            return access_get_handler(req);
        } else if (strcmp(filename, "/login") == 0) {
            return login_get_handler(req);
        } else if (strcmp(filename, "/check") == 0) {
            return debug_get_handler(req);
        } else if (strcmp(filename, "/funeral") == 0) {
            return debug_get_handler(req);
        } else if (strcmp(filename, "/imageEdit") == 0) {
            // No need to do anything
            return debug_get_handler(req);
        } else if (strcmp(filename, "/restart") == 0) {
            return restart_handler(req);
        }
        // ESP_LOGE(TAG, "Failed to stat file : %s", filepath);
        /* Respond with 404 Not Found */
        httpd_resp_send_err(req, HTTPD_404_NOT_FOUND, "File does not exist");
        return ESP_FAIL;
    }

    fd = fopen(filepath, "r");
    if (!fd) {
        // ESP_LOGE(TAG, "Failed to read existing file : %s", filepath);
        /* Respond with 500 Internal Server Error */
        httpd_resp_send_err(req, HTTPD_500_INTERNAL_SERVER_ERROR, "Failed to read existing file");
        return ESP_FAIL;
    }

    // ESP_LOGI(TAG, "Sending file : %s (%ld bytes)...", filename, file_stat.st_size);
    set_content_type_from_file(req, filename);

    /* Retrieve the pointer to scratch buffer for temporary storage */
    char *chunk = ((struct file_server_data *)req->user_ctx)->scratch;
    size_t chunksize;
    do {
        /* Read file in chunks into the scratch buffer */
        chunksize = fread(chunk, 1, SCRATCH_BUFSIZE, fd);

        if (chunksize > 0) {
            /* Send the buffer contents as HTTP response chunk */
            if (httpd_resp_send_chunk(req, chunk, chunksize) != ESP_OK) {
                fclose(fd);
                // ESP_LOGE(TAG, "File sending failed!");
                /* Abort sending file */
                httpd_resp_sendstr_chunk(req, NULL);
                /* Respond with 500 Internal Server Error */
                httpd_resp_send_err(req, HTTPD_500_INTERNAL_SERVER_ERROR, "Failed to send file");
               return ESP_FAIL;
           }
        }

        /* Keep looping till the whole file is sent */
    } while (chunksize != 0);

    /* Close file after sending complete */
    fclose(fd);
    // ESP_LOGI(TAG, "File sending complete");

    /* Respond with an empty chunk to signal HTTP response completion */
#ifdef CONFIG_EXAMPLE_HTTPD_CONN_CLOSE_HEADER
    httpd_resp_set_hdr(req, "Connection", "close");
#endif
    httpd_resp_send_chunk(req, NULL, 0);
    return ESP_OK;
}

Solution

  • Fixed it! The solution that worked in the end wasn't actually anything to do with sockets, all I did was set the lru_purge_enable of my httpd_config_t to true. All 3 devices are now very happily loading everything. I'd still be curious if anyone knows why the http (connections?) weren't being dropped and required purging.