Search code examples
node.jscsvweb-crawlernpm-request

Download Image with Node-Request with URL scan from CSV


Pardon me as the code is messy. I'm still learning. I need to download the image with the URL scan from a CSV file. However i have 2000+ of URL with the same domain, and i don't think the server will let me to pull everything in a go hence i always get error after some images. Problem that i need to solve -
1) How to make sure the images are downloaded completely then only the code move on to the next URL
2) How to write a better code
Your help is appreciated. Thank You

var csv = require('fast-csv');
var Promise = require('bluebird');
var fs = require('fs'); 
var request = require('request');
var path = "test.csv";

var promiseCSV = Promise.method(function(path, options) {
  return new Promise(function(resolve, reject) {
    var records = [];
    csv
      .fromPath(path, options)
      .on('data', function(record) {
        records.push(record);
      })
      .on('end', function() {
        resolve(records);
        console.log('done');
      });
  });
});



var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){

    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};


promiseCSV(path).then(function (records) {

for(i=0;i<records.length;i++)
  {     
        download(records[i][0],'img/'+records[i][1], function(){

        });
    }

});

Solution

  • This will throttle your requests to one at a time. Another option is to use throttled-request to limit by requests per unit time.

    var i = 0;
    promiseCSV(path).then(function (records) {
      next();
      function next(){
        download(records[i][0],'img/'+records[i][1], function(){
          i++;
          if (i < records.length) next();
        });
      }
    });
    

    Also, your records variable is out of scope, you need to move it out in order to access it:

    var records = []; // move out to global scope to access from elsewhere
    var promiseCSV = Promise.method(function(path, options) {
      return new Promise(function(resolve, reject) {
        csv
          .fromPath(path, options)
          .on('data', function(record) {
            records.push(record);
          })
          .on('end', function() {
            resolve(records);
            console.log('done');
          });
      });
    });