I am trying to scrape Google dictionary and create an unofficial API. I tried to use cheerio and request package for Node.js to implement this functionality.
Here is my Code:
var cheerio = require("cheerio");
var request = require('request');
request({
method: 'GET',
url: 'https://www.google.co.in/search?q=define+love'
}, function(err, response, body) {
if(err){
return console.error(err)
}
var $ = cheerio.load(body);
var a = $(".vk_ans span").text();
console.log(a);
});
I initially tried to scrap this page "https://www.google.co.in/search?q=define+love", I tried to scrap the bold love written, it was written in a span which was in div with class vk_ans
.
But when I console.log
the answer, it results in an empty line, all other places I am doing same and Cheerio is working good. What am I missing?
You need a user agent header in order not to be identified as a bot. Try this:
var cheerio = require("cheerio");
var request = require('request');
request({
method: 'GET',
url: 'https://www.google.co.in/search?q=define+love',
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
}, function(err, response, body) {
if (err) {
return console.error(err);
}
var $ = cheerio.load(body);
var a = $(".mw").text();
console.log(a);
});