Search code examples
javascriptpromisebluebirdcheerio

Promise returns empty list


I have written a function that gets a list of hyperlink anchors through webscraping.

I want to push all these anchors onto an object array, which will later be serialized to a Json string.

The Api.GetCourseSubmenuUrl method and the Api.FilterSubmenuContentList both return promises.

The following code however keeps running without waiting for the array to be filled in the .each() cheerio function. Why does this happen?

Please note that the each method in cheerio is synchronous.

My code uses the packages:

Code:

Connection.prototype.FillCourseWithSubmenuContent = function(course){
    var self = this; //This class
    var submenuItems = [];
    return new BPromise(function(resolve, reject){
      return Api.GetCourseSubmenuUrl(ApiConnection.authToken).then(function(response){
        return request.get({url: self.url + response.url + course.id, followRedirect: false, jar: cookiejar}, function(err,httpResponse,body){
          if(err){
            reject(err);
          }
          var cheerio = require('cheerio');
          var dashboardhtml = cheerio.load(body, {
                  normalizeWhitespace: true,
                  decodeEntities: true
              }
          );
          //Find all the links on the page
          dashboardhtml('a').each(function(i, elem) {
              console.log("Object:");
              console.log({"text":dashboardhtml(elem).text(), "url":dashboardhtml(elem).attr('href')});
              submenuItems.push({"text":dashboardhtml(elem).text().trim(), "url":dashboardhtml(elem).attr('href')});
          });
          resolve();
        });
      }).then(function(){
        console.log(submenuItems);
        return Api.FilterSubmenuContentList(ApiConnection.authToken, submenuItems);
      });
    }).catch(function(error){
      return reject(error);
    });
};

Solution

  • The problem was fixed by taking the Promise constructor antipattern into account, pointed out by @Bergi.

    Since the Request library doesn't have promise support, I still had to wrap it inside a (Bluebird) promise.

    Please note that it is also possible to promisify libraries, which makes life a lot easier. But for the demonstration of the solution I went the promise wrapping route.

    The solution:

    Connection.prototype.FillCourseWithSubmenuContent = function(course){
        var self = this; //This class
        var submenuItems = [];
        return Api.GetCourseSubmenuUrl(ApiConnection.authToken).then(function(response){
          console.log(self.url + response.url + course.id);
          return new BPromise(function(resolve, reject){
            request.get({url: self.url + response.url + course.id, followRedirect: false, jar: cookiejar}, function(err,httpResponse,body){
              if(err){
                reject(err);
              }
              var cheerio = require('cheerio');
              var dashboardhtml = cheerio.load(body, {
                      normalizeWhitespace: true,
                      decodeEntities: true
                  }
              );
              //Find all the links on the page
              dashboardhtml('a').each(function(i, elem) {
                  // console.log("Object");
                  // console.log({"text":dashboardhtml(elem).text(), "url":dashboardhtml(elem).attr('href')});
                  submenuItems.push({"text":dashboardhtml(elem).text().trim(), "url":dashboardhtml(elem).attr('href')});
              });
              return resolve();
            });
          });
        }).then(function(){
          console.log(submenuItems);
          return Api.FilterSubmenuContentList(ApiConnection.authToken, submenuItems);
        });
    };