Search code examples
node.jsweb-scrapingeventemitter

Inconsistent method result in node js


I have been trying to figure the following for the last couple of days and just can't seem to figure out the answer. I am new to node and JS (only experience is online tutorials).

I am trying to create a class (function) to scrape the source code from websites. I want to read in a url from the command line and return the html content. However, I seem to be getting different results when running the code different ways (which I think I should be getting the same results).

I have been reading about events in node and so I have used them a little in the code. One listener event prompts the me for the url and then after setting the url it (the listener function) emits a message, which is picked up by another listener which goes out and fetches the html content.

The problem I am having is that when I create an instance of the object, it seems like the request portion of the code does not execute. However, if I call the method from the instance I get the print out of the html content of the page.

Any help is appreciated. Thanks.

function test() {
  var events = require('events').EventEmitter;
  var request = require('request');
  var util = require('util');

  var that = this;
  that.eventEmitter = new events();
  that.url = 'http://www.imdb.com/';

  that.eventEmitter.on('setURL',that.setUrl = function(){
    console.log("Input the URL: ");
    process.stdin.resume();
    process.stdin.setEncoding('utf8');

    process.stdin.on('data', function (text) {
      that.url = util.inspect(text);
      that.url = that.url.substr(1, that.url.length - 4);
      that.eventEmitter.emit('Get url html');
      process.exit();
    });
  });

  that.eventEmitter.on('Get url html',that.httpGet = function() {
    console.log("Fetching... " + that.url);

    request(that.url, function (error, response, body) {
      if (!error && response.statusCode == 200) {
        console.log(body) // Show the HTML for the Google homepage.
      } else {
        console.log("Error Encountered");
      }
    });
  });

  that.eventEmitter.emit('setURL');
}

var scrapper = new test(); //This asks me for the url and then only executes to first line of that.httpGet.

scrapper.httpGet(); // This gives the desired results from that.httpGet

Solution

  • I solved using the Prompt library https://www.npmjs.com/package/prompt

    function test() {
      var events = require('events').EventEmitter;
      var prompt = require('prompt');
      var request = require('request');
      var util = require('util');
    
      var that = this;
      that.eventEmitter = new events();
      that.url = 'http://www.imdb.com/';
    
      that.eventEmitter.on('setURL',that.setUrl = function(){
        prompt.start();
        process.stdin.setEncoding('utf8');
    
        prompt.get(['url'], function( err, result ) {
          that.url = result.url;
          that.eventEmitter.emit('Get url html');
        } );
      });
    
      that.eventEmitter.on('Get url html',that.httpGet = function() {
        console.log("Fetching... " + that.url);
    
        request(that.url, function (error, response, body) {
          if (!error && response.statusCode == 200) {
            console.log(body); // Show the HTML for the Google homepage.
          } else {
            console.log("Error Encountered");
          }
        });
      });
    
      that.eventEmitter.emit('setURL');
    }
    
    var scrapper = new test(); //This asks me for the url and then only executes to first line of that.httpGet.
    
    // scrapper.httpGet(); // This gives the desired results from that.httpGet
    

    I ran the script from the commandline, input http://www.google.com and it retrieved the results without the additional call to scrapper.httpGet();