javascript node.js web-scraping user-agent cheerio

Scraping Google Dictionary

I am trying to scrape Google dictionary and create an unofficial API. I tried to use cheerio and request package for Node.js to implement this functionality.

Here is my Code:

var cheerio = require("cheerio");
var request = require('request');

request({
    method: 'GET',
    url: 'https://www.google.co.in/search?q=define+love'
}, function(err, response, body) {

    if(err){
        return console.error(err)
    }


    var $ = cheerio.load(body);

    var a = $(".vk_ans span").text();
    console.log(a);

});

I initially tried to scrap this page "https://www.google.co.in/search?q=define+love", I tried to scrap the bold love written, it was written in a span which was in div with class vk_ans.

But when I console.log the answer, it results in an empty line, all other places I am doing same and Cheerio is working good. What am I missing?

Solution

You need a user agent header in order not to be identified as a bot. Try this:

var cheerio = require("cheerio");
var request = require('request');

request({
  method: 'GET',
  url: 'https://www.google.co.in/search?q=define+love',
  headers: {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
  }
}, function(err, response, body) {

  if (err) {
    return console.error(err);
  }

  var $ = cheerio.load(body);

  var a = $(".mw").text();
  console.log(a);

});