I followed this tutorial, the async documentation and came up with this code.
// ASYNC JOBS
var async = require('async'),
spawn = require('child_process').spawn
maxjobs = 4;
function setup_R_job(opts,done)
{
log('starting '+opts.params.pop());
var params = opts.params;
delete opts['params'];
var R = spawn('phantomjs', params, opts);
R.on('exit',function(code)
{
log('got exit code: '+code)
return null;
})
return null;
}
var course_queue=async.queue(setup_R_job, maxjobs);
I instantiate the jobs using this function.
//instaciante jobs
var jobForCourses = function(urls)
{
for (var i = urls.length - 1; i >= 0; i--) {
var opts = { cwd: __dirname,
env: process.env,
params: ['../getCourseLinks.js','--course']
};
opts.params.push(urls[i]);
log("queueing: "+opts.params.join(' '));
course_queue.push(opts);
};
};
jobForCourses
is called as the following.
var urls = [url1, url2, url3, url4, url5, url6];
jobForCourses(urls);
It should create a job for each of the urls, and execute 4 jobs at a time. When a job is done, it should start the next one. And so on until there is no more jobs in the queue. At least that's what tell the async documentation about it:
queue(worker, concurrency)
Creates a queue object with the specified concurrency. Tasks added to the queue are processed in parallel (up to the concurrency limit). If all workers are in progress, the task is queued until one becomes available. Once a worker completes a task, that task's callback is called.
However, it starts the 4 first jobs, finish them, and do not instantiate the others.
Of course if I set maxjobs
to 50
, it will execute all of them, but this is not what I want.
if it can help understand, I get the following output:
queueing: ../getCourseLinks.js --course /courses/url1
queueing: ../getCourseLinks.js --course /courses/url2
queueing: ../getCourseLinks.js --course /courses/url3
queueing: ../getCourseLinks.js --course /courses/url4
queueing: ../getCourseLinks.js --course /courses/url5
queueing: ../getCourseLinks.js --course /courses/url6
queueing: ../getCourseLinks.js --course /courses/url7
queueing: ../getCourseLinks.js --course /courses/url8
queueing: ../getCourseLinks.js --course /courses/url9
...
starting /courses/url1
starting /courses/url2
starting /courses/url3
starting /courses/url4
got exit code: 0
got exit code: 0
got exit code: 0
got exit code: 0
You'll need to invoke done
after a process has finished. Try this:
function setup_R_job(opts,done) {
log('starting '+opts.params.pop());
var params = opts.params;
delete opts['params'];
var R = spawn('phantomjs', params, opts);
R.on('exit',function(code)
{
log('got exit code: '+code)
done();
})
}
You could also add some error handling there ie. but this might serve as a starting point.