EDIT - Short question: Does httr
have a finalizer that closes the FTP connection?
I'm downloading climate projections files from the ftp server of the NASA NEX project using the httr
package.
My script is:
library(httr)
var = c("pr", "tasmin", "tasmax")
rcp = c("rcp45", "rcp85")
mod= c("inmcm4", "GFDL-CM3")
year=c(seq(2040,2080,1))
for (v in var) {
for (r in rcp) {
url<- paste0( 'ftp://ftp.nccs.nasa.gov/BCSD/', r, '/day/atmos/', v, '/r1i1p1/v1.0/', sep='')
for (m in mod) {
for (y in year) {
nfile<- paste0(v,'_day_BCSD_',r,"_r1i1p1_",m,'_',y,'.nc', sep='')
url1<- paste0(url,nfile, sep='')
destfile<-paste0('mypath',r,'/',v,'/',nfile, sep='')
GET(url=url1, authenticate(user='NEXGDDP', password='', type = "basic"), write_disk(path=destfile, overwrite = FALSE ))
Sys.sleep(0.5)
}}}}
After a while, the server stops my connection with the following error: "421 There are too many connections from your internet address".
I read here that this is due to the number of connections open and that I should close them at each iteration (I'm not sure this does really make sense tho!).
Is there a way to close the ftp with the httr
package?
Proposed solution - set the maximum number of connections to the ftp server for httr
> config(CURLOPT_MAXCONNECTS=5)
<request>
Options:
* CURLOPT_MAXCONNECTS: 5
The httr
package is a wrapper for curl
. This is important as it abstracts the curl interface. In this case, we wish to modify the curl
behaviour by modifying curls configuration via the httr
abstraction.
httr
by default handles automatic connection sharing across requests to the same website (by default, curl handles are managed automatically), cookies are maintained across requests, and a up-to-date root-level SSL certificate store is used.In this context we do not control the FTP server, only the client request to the server. Hence, we can modify curl's default behaviour via httr:config
to reduce the number of simultaneous FTP requests.
To retrieve current options we can execute the following command:
>httr_options("ftp")
httr libcurl type
49 ftp_account CURLOPT_FTP_ACCOUNT string
50 ftp_alternative_to_user CURLOPT_FTP_ALTERNATIVE_TO_USER string
51 ftp_create_missing_dirs CURLOPT_FTP_CREATE_MISSING_DIRS integer
52 ftp_filemethod CURLOPT_FTP_FILEMETHOD integer
53 ftp_response_timeout CURLOPT_FTP_RESPONSE_TIMEOUT integer
54 ftp_skip_pasv_ip CURLOPT_FTP_SKIP_PASV_IP integer
55 ftp_ssl_ccc CURLOPT_FTP_SSL_CCC integer
56 ftp_use_eprt CURLOPT_FTP_USE_EPRT integer
57 ftp_use_epsv CURLOPT_FTP_USE_EPSV integer
58 ftp_use_pret CURLOPT_FTP_USE_PRET integer
59 ftpport CURLOPT_FTPPORT string
60 ftpsslauth CURLOPT_FTPSSLAUTH integer
196 tftp_blksize CURLOPT_TFTP_BLKSIZE integer
to access the libcurl documentation we can call curl_docs("CURLOPT_FTP_ACCOUNT")
.
httr
configuration of requestsYou either can modify the httr
global curl configuration using set_config()
or simply wrap your request using with_config()
. In this instance we wish to limit the maximum number of connections to the ftp server.
thus:
httr_options("max")
httr libcurl type
95 max_recv_speed_large CURLOPT_MAX_RECV_SPEED_LARGE number
96 max_send_speed_large CURLOPT_MAX_SEND_SPEED_LARGE number
97 maxconnects CURLOPT_MAXCONNECTS integer
98 maxfilesize CURLOPT_MAXFILESIZE integer
99 maxfilesize_large CURLOPT_MAXFILESIZE_LARGE number
100 maxredirs CURLOPT_MAXREDIRS integer
we can now look up curl_docs("CURLOPT_MAXCONNECTS")
- ok this is what we want.
Now we have to set it.
> config(CURLOPT_MAXCONNECTS=5)
<request>
Options:
* CURLOPT_MAXCONNECTS: 5
ref: https://cran.r-project.org/web/packages/httr/httr.pdf
I know this is slightly superfluous, I included it to provide an alternate approach. Why? There is a subtle issue here due to network bandwidth... Running multiple simultaneous FTP sessions may be slower than running them in series. My alternate approach would be to run an R script below or go directly to using curl via the Unix shell command line.
require(RCurl)
require(stringr)
opts = curlOptions(userpwd = "NEXGDDP:", netrc = TRUE)
rcpDir = c("rcp45", "rcp85")
varDir = c("pr", "tasmin", "tasmax")
for (rcp in rcpDir ) {
for (var in varDir ) {
url <- paste0( 'ftp://ftp.nccs.nasa.gov/BCSD/', rcp, '/day/atmos/', var, '/r1i1p1/v1.0/', sep = '')
print(url)
filenames = getURL(url, ftp.use.epsv = FALSE, dirlistonly = TRUE, .opts = opts)
filelist <- unlist(str_split(filenames, "\n"))
filelist <- filelist[!filelist == ""]
filesavg <- str_detect(filelist,
"inmcm4_20[4-8]0|GFDL-CM3_20[4-8]0")
filesavg <- filelist[filesavg]
filesavg
urlsavg <- str_c(url, filesavg)
for (file in seq_along(urlsavg)) {
fname <- str_c("data/", filesavg[file])
if (!file.exists(fname)) {
print(urlsavg[file])
bin <- getBinaryURL(urlsavg[file], .opts = opts)
writeBin(bin, fname)
Sys.sleep(1)
}
}
}
}
> require(RCurl)
> require(stringr)
> opts = curlOptions(userpwd = "NEXGDDP:", netrc = TRUE)
> rcpDir = c("rcp45", "rcp85")
> varDir = c("pr", "tasmin", "tasmax")
> for (rcp in rcpDir ) {
+ for (var in varDir ) {
+ url <- paste0( 'ftp://ftp.nccs.nasa.gov/BCSD/', rcp, '/day/atmos/', var, '/r1i1p1/v1.0/', sep = '')
+ print(url)
+ filenames = getURL(url, ftp.use.epsv = FALSE, dirlistonly = TRUE, .opts = opts)
+ filelist <- unlist(str_split(filenames, "\n"))
+ filelist <- filelist[!filelist == ""]
+ filesavg <- str_detect(filelist,
+ "inmcm4_20[4-8]0|GFDL-CM3_20[4-8]0")
+ filesavg <- filelist[filesavg]
+ filesavg
+ urlsavg <- str_c(url, filesavg)
+
+ for (file in seq_along(urlsavg)) {
+ fname <- str_c("data/", filesavg[file])
+ if (!file.exists(fname)) {
+ print(urlsavg[file])
+ bin <- getBinaryURL(urlsavg[file], .opts = opts)
+ writeBin(bin, fname)
+ Sys.sleep(1)
+ }
+ }
+ }
+ }
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_inmcm4_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_inmcm4_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_inmcm4_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp45_r1i1p1_inmcm4_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_inmcm4_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_inmcm4_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_inmcm4_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_inmcm4_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp45_r1i1p1_inmcm4_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_GFDL-CM3_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_inmcm4_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_inmcm4_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_inmcm4_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_inmcm4_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp45/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp45_r1i1p1_inmcm4_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_inmcm4_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_inmcm4_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_inmcm4_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_inmcm4_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/pr/r1i1p1/v1.0/pr_day_BCSD_rcp85_r1i1p1_inmcm4_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_inmcm4_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_inmcm4_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_inmcm4_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_inmcm4_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmin/r1i1p1/v1.0/tasmin_day_BCSD_rcp85_r1i1p1_inmcm4_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2080.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_inmcm4_2040.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_inmcm4_2050.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_inmcm4_2060.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_inmcm4_2070.nc"
[1] "ftp://ftp.nccs.nasa.gov/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_inmcm4_2080.nc"