Search code examples
c++libcurlftp-client

How to apply libcurl's persistent connection option


I am using libcurl to download a file in chunked blocks from FTP server. I wrote it using the easy interface. the pseudo code looks like this:

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <unistd.h>
#include <curl/curl.h>

static size_t
WriteCallback(void *buffer, size_t size, size_t nmemb, void *user_param)
{
    size_t realsize = size * nmemb;
    std::ostream *ftp_file_p = static_cast<std::ofstream*>(user_param);
    if (realsize && ftp_file_p)
    {
        ftp_file_p->write(static_cast<char *>(buffer), realsize);
    }
    return realsize;
}

std::string
get_file_name_from_url(const std::string& url)
{
    size_t pos = url.rfind('/');
    return url.substr(pos + 1);

    return "";
}

void easy_setup_constants(CURL *curl, std::ostream& results_file)
{
    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
    curl_easy_setopt(curl, CURLOPT_HEADER, 1L);

    curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
    curl_easy_setopt(curl, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_SINGLECWD);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, write_response);
    curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, WriteCallback);
    curl_easy_setopt(curl, CURLOPT_HEADERDATA, static_cast<void*>(&results_file));
}

CURLcode
download_file(CURL *curl, const std::string& url)
{
    const size_t BLOCK_SIZE = 0x10000;
    if (!curl)
    {
        return CURLE_OK;
    }

    const std::string file_name = get_file_name_from_url(url);
    std::ofstream ftp_file(file_name.c_str(), std::ofstream::binary);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, static_cast<void*>(&ftp_file));
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());


    double filesize = 0.0;
    CURLcode res = curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &filesize);
    filesize = 205901924;
    size_t file_full_size = static_cast<size_t>(filesize);
    size_t remaining = file_full_size;

    curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);

    for (size_t actual_offset = 0; CURLE_OK == res && actual_offset < file_full_size; actual_offset += BLOCK_SIZE, remaining -= BLOCK_SIZE)
    {
        size_t length = std::min(BLOCK_SIZE, remaining);
        size_t range_end = actual_offset + length - 1;
        std::string range_buf = std::to_string(static_cast<long long>(actual_offset)) + "-" + std::to_string(static_cast<long long>(range_end));
        std::clog << "range_buf: " << range_buf << std::endl;
        curl_easy_setopt(curl, CURLOPT_RANGE, range_buf.c_str());
        res = curl_easy_perform(curl);
    } 


    if (CURLE_OK != res)
    {
        std::cerr << __FUNCTION__ << "[" << __LINE__ << "]" << ":: curl failed " << res << " '" << curl_easy_strerror(res) << "'" << std::endl;
    }

    return res;
}

int main(void)
{
    std::ofstream log_file("persistent.log");
    std::ofstream results_file("persistent.results");
    std::cout.rdbuf(log_file.rdbuf());
    std::cerr.rdbuf(log_file.rdbuf());
    std::clog.rdbuf(log_file.rdbuf());
    std::vector<std::string> urls = {"ftp://user_name:user_pass@127.0.0.1/some/path/Very_Large.File"};
    curl_global_init(CURL_GLOBAL_ALL);

    CURL *curl = curl_easy_init();
    if(curl) {
        easy_setup_constants(curl, results_file);

        for (auto it = urls.begin(); it != urls.end(); ++it)
        {
            /* Perform the request, res will get the return code */
            CURLcode res = download_file(curl, *it);

            /* Check for errors */
            if(res != CURLE_OK)
                std::cerr << __FUNCTION__ << "[" << __LINE__ << "]" << "::curl_easy_perform() failed: " << curl_easy_strerror(res) << std::endl;
        }

        /* always cleanup */
        curl_easy_cleanup(curl);
  }

  curl_global_cleanup();
  return 0;
}

Compilation command line:

g++ -g -Wall -std=c++0x -lcurl persistent.cpp -o persistent -lstdc++

However, using tcpdump I notice that every time I read a block, there is a login-read-logout. For some reason the connection does not remain persistent. Even though the libcurl is wrapped in a class and the curl_easy_cleanup is performed in the destructor, I don't use curl_easy_reset at all, and I use curl_easy_init in the constructor

I switched the verbose mode on and saw that for every chunk transferred, I get this:

* About to connect() to 127.0.0.1 port 21 (#0)
*   Trying 127.0.0.1... * connected
* Connected to 127.0.0.1 (127.0.0.1) port 21 (#0)
< 220 (vsFTPd 2.2.2)
> user_name
< 331 Please specify the password.
> user_pass
< 230 Login successful.
> PWD
< 257 "/data"
* Entry path is '/data'
> CWD some/path
< 250 Directory successfully changed.
> EPSV
* Connect data stream passively
< 229 Entering Extended Passive Mode (|||63137|).
*   Trying 127.0.0.1... * connected
* Connecting to 127.0.0.1 (127.0.0.1) port 63137
> TYPE I
< 200 Switching to Binary mode.
> SIZE Very_Large.File
< 213 205901924
> RETR Very_Large.File
< 150 Opening BINARY mode data connection for Very_Large.File (205901924 bytes).
* Maxdownload = 65536
* Getting file with size: 65536
* Remembering we are in dir "some/path/"
< 426 Failure writing network stream.
* Connection #0 to host 127.0.0.1 left intact
* Re-using existing connection! (#0) with host 127.0.0.1
* Connected to 127.0.0.1 (127.0.0.1) port 21 (#0)
* Request has same path as previous transfer
> EPSV
* Connect data stream passively
< 229 Entering Extended Passive Mode (|||36365|).
*   Trying 127.0.0.1... * connected
* Connecting to 127.0.0.1 (127.0.0.1) port 36365
> SIZE Very_Large.File
< 213 205901924
* Instructs server to resume from offset 65536
> REST 65536
< 350 Restart position accepted (65536).
> RETR Very_Large.File
< 150 Opening BINARY mode data connection for Very_Large.File (205901924 bytes).
* Maxdownload = 65536
* Getting file with size: 65536
* Remembering we are in dir "some/path/"
< 426 Failure writing network stream.
* Connection #0 to host 127.0.0.1 left intact
* Re-using existing connection! (#0) with host 127.0.0.1
* Connected to 127.0.0.1 (127.0.0.1) port 21 (#0)
* Request has same path as previous transfer
> EPSV
* Connect data stream passively
< 229 Entering Extended Passive Mode (|||49089|).
*   Trying 127.0.0.1... * connected
* Connecting to 127.0.0.1 (127.0.0.1) port 49089
> SIZE Very_Large.File
< 213 205901924
* Instructs server to resume from offset 131072
> REST 131072
< 350 Restart position accepted (131072).
> RETR Very_Large.File
< 150 Opening BINARY mode data connection for Very_Large.File (205901924 bytes).
* Maxdownload = 65536
* Getting file with size: 65536
* Remembering we are in dir "some/path/"
< 426 Failure writing network stream.
< 226 Transfer complete.
* Connection #0 to host 127.0.0.1 left intact
> QUIT
< 221 Goodbye.
* Closing connection #0

Solution

  • You probably get extra many disconnects/reconnects because you use CURLOPT_RANGE with FTP to only get a part of the remote file, and when doing so libcurl can't safely reuse the control connection again after a transfer like that. (Due to how some FTP servers (don't) handle the premature end of transfer.)