Pardon me as the code is messy. I'm still learning.
I need to download the image with the URL scan from a CSV file. However i have 2000+ of URL with the same domain, and i don't think the server will let me to pull everything in a go hence i always get error after some images. Problem that i need to solve -
1) How to make sure the images are downloaded completely then only the code move on to the next URL
2) How to write a better code
Your help is appreciated. Thank You
var csv = require('fast-csv');
var Promise = require('bluebird');
var fs = require('fs');
var request = require('request');
var path = "test.csv";
var promiseCSV = Promise.method(function(path, options) {
return new Promise(function(resolve, reject) {
var records = [];
csv
.fromPath(path, options)
.on('data', function(record) {
records.push(record);
})
.on('end', function() {
resolve(records);
console.log('done');
});
});
});
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
promiseCSV(path).then(function (records) {
for(i=0;i<records.length;i++)
{
download(records[i][0],'img/'+records[i][1], function(){
});
}
});
This will throttle your requests to one at a time. Another option is to use throttled-request to limit by requests per unit time.
var i = 0;
promiseCSV(path).then(function (records) {
next();
function next(){
download(records[i][0],'img/'+records[i][1], function(){
i++;
if (i < records.length) next();
});
}
});
Also, your records variable is out of scope, you need to move it out in order to access it:
var records = []; // move out to global scope to access from elsewhere
var promiseCSV = Promise.method(function(path, options) {
return new Promise(function(resolve, reject) {
csv
.fromPath(path, options)
.on('data', function(record) {
records.push(record);
})
.on('end', function() {
resolve(records);
console.log('done');
});
});
});