In second function in the async waterfall, the eachSeries
callback (urlCallback
) in my code executes after the waterfall callback (waterfallCallback
), for reasons I cannot suss out.
async.waterfall([
function(callback) {
request(website, function (error, response, html) {
if (!error && response.statusCode == 200) {
pageUrls = getPageUrls(html)
callback(null, pageUrls)
}
})
},
function (pageUrls, waterfallCallback) {
async.eachSeries(pageUrls, function (url, urlCallback) {
console.log('SET ' + url)
request(url, function (err, response, body) {
var $ = cheerio.load(body)
$('#div').children().each(function(){
console.log($(this).children("a").attr("href"));
itemUrl = $(this).children("a").attr("href")
itemUrls.push(itemUrl)
})
urlCallback(null,itemUrls)
})
},
waterfallCallback(null, itemUrls))
}
],
function(err, results) {
console.log("results: " + results)
})
AFAIK, the async.eachSeries takes three arguments (array,functionToBeExecuteOnEachItem,callback) and execute them in that order. Somehow not here.
The parameters for async.eachSeries
are function definitions. Something like waterfallCallback
or function(err,result){}
.
When you call waterfallCallback(null, itemUrls)
, that is not a function definition, that is running the function itself!
Changing to simply waterfallCallback
should do the trick.
Update: Also, .eachSeries
does not return the values as an array, its final callback is just function(err)
. Checkout .mapSeries
link instead, which will return the resulting array in the final callback function(err,finalArray)
. (Be aware that each return of a .map
will be an element in the array, so if you return an array, you'll get data structures like [ [], [], [] ]
)