I have been trying to figure the following for the last couple of days and just can't seem to figure out the answer. I am new to node and JS (only experience is online tutorials).
I am trying to create a class (function) to scrape the source code from websites. I want to read in a url from the command line and return the html content. However, I seem to be getting different results when running the code different ways (which I think I should be getting the same results).
I have been reading about events in node and so I have used them a little in the code. One listener event prompts the me for the url and then after setting the url it (the listener function) emits a message, which is picked up by another listener which goes out and fetches the html content.
The problem I am having is that when I create an instance of the object, it seems like the request portion of the code does not execute. However, if I call the method from the instance I get the print out of the html content of the page.
Any help is appreciated. Thanks.
function test() {
var events = require('events').EventEmitter;
var request = require('request');
var util = require('util');
var that = this;
that.eventEmitter = new events();
that.url = 'http://www.imdb.com/';
that.eventEmitter.on('setURL',that.setUrl = function(){
console.log("Input the URL: ");
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', function (text) {
that.url = util.inspect(text);
that.url = that.url.substr(1, that.url.length - 4);
that.eventEmitter.emit('Get url html');
process.exit();
});
});
that.eventEmitter.on('Get url html',that.httpGet = function() {
console.log("Fetching... " + that.url);
request(that.url, function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body) // Show the HTML for the Google homepage.
} else {
console.log("Error Encountered");
}
});
});
that.eventEmitter.emit('setURL');
}
var scrapper = new test(); //This asks me for the url and then only executes to first line of that.httpGet.
scrapper.httpGet(); // This gives the desired results from that.httpGet
I solved using the Prompt library https://www.npmjs.com/package/prompt
function test() {
var events = require('events').EventEmitter;
var prompt = require('prompt');
var request = require('request');
var util = require('util');
var that = this;
that.eventEmitter = new events();
that.url = 'http://www.imdb.com/';
that.eventEmitter.on('setURL',that.setUrl = function(){
prompt.start();
process.stdin.setEncoding('utf8');
prompt.get(['url'], function( err, result ) {
that.url = result.url;
that.eventEmitter.emit('Get url html');
} );
});
that.eventEmitter.on('Get url html',that.httpGet = function() {
console.log("Fetching... " + that.url);
request(that.url, function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body); // Show the HTML for the Google homepage.
} else {
console.log("Error Encountered");
}
});
});
that.eventEmitter.emit('setURL');
}
var scrapper = new test(); //This asks me for the url and then only executes to first line of that.httpGet.
// scrapper.httpGet(); // This gives the desired results from that.httpGet
I ran the script from the commandline, input http://www.google.com and it retrieved the results without the additional call to scrapper.httpGet();