I have written a function that gets a list of hyperlink anchors through webscraping.
I want to push all these anchors onto an object array, which will later be serialized to a Json string.
The Api.GetCourseSubmenuUrl
method and the Api.FilterSubmenuContentList
both return promises.
The following code however keeps running without waiting for the array to be filled in the .each()
cheerio function. Why does this happen?
Please note that the each method in cheerio is synchronous.
My code uses the packages:
Code:
Connection.prototype.FillCourseWithSubmenuContent = function(course){
var self = this; //This class
var submenuItems = [];
return new BPromise(function(resolve, reject){
return Api.GetCourseSubmenuUrl(ApiConnection.authToken).then(function(response){
return request.get({url: self.url + response.url + course.id, followRedirect: false, jar: cookiejar}, function(err,httpResponse,body){
if(err){
reject(err);
}
var cheerio = require('cheerio');
var dashboardhtml = cheerio.load(body, {
normalizeWhitespace: true,
decodeEntities: true
}
);
//Find all the links on the page
dashboardhtml('a').each(function(i, elem) {
console.log("Object:");
console.log({"text":dashboardhtml(elem).text(), "url":dashboardhtml(elem).attr('href')});
submenuItems.push({"text":dashboardhtml(elem).text().trim(), "url":dashboardhtml(elem).attr('href')});
});
resolve();
});
}).then(function(){
console.log(submenuItems);
return Api.FilterSubmenuContentList(ApiConnection.authToken, submenuItems);
});
}).catch(function(error){
return reject(error);
});
};
The problem was fixed by taking the Promise constructor antipattern
into account, pointed out by @Bergi.
Since the Request library doesn't have promise support, I still had to wrap it inside a (Bluebird) promise.
Please note that it is also possible to promisify
libraries, which makes life a lot easier. But for the demonstration of the solution I went the promise wrapping route.
The solution:
Connection.prototype.FillCourseWithSubmenuContent = function(course){
var self = this; //This class
var submenuItems = [];
return Api.GetCourseSubmenuUrl(ApiConnection.authToken).then(function(response){
console.log(self.url + response.url + course.id);
return new BPromise(function(resolve, reject){
request.get({url: self.url + response.url + course.id, followRedirect: false, jar: cookiejar}, function(err,httpResponse,body){
if(err){
reject(err);
}
var cheerio = require('cheerio');
var dashboardhtml = cheerio.load(body, {
normalizeWhitespace: true,
decodeEntities: true
}
);
//Find all the links on the page
dashboardhtml('a').each(function(i, elem) {
// console.log("Object");
// console.log({"text":dashboardhtml(elem).text(), "url":dashboardhtml(elem).attr('href')});
submenuItems.push({"text":dashboardhtml(elem).text().trim(), "url":dashboardhtml(elem).attr('href')});
});
return resolve();
});
});
}).then(function(){
console.log(submenuItems);
return Api.FilterSubmenuContentList(ApiConnection.authToken, submenuItems);
});
};