Search code examples
nightmare

nightmarejs parament is not defined


I want to scrapy and get the html by each herf in the page,then output to a csv.now I first time to use nightmare.so I have a problem about parament not defined.

var Nightmare = require('nightmare');
    var nightmare = Nightmare({ show: true });
    var fs  = require('fs');
    var result ;
    nightmare
      .goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
      .wait(1000)
      .evaluate(function () {
        var divs = document.querySelectorAll('a[target="_blank"]'),i;
        for (i = 0,result = ""; i < divs.length; ++i) {
            result += divs[i].href.toString()+"\n";
            }
        return divs;
      })
      .end()
      .then(function (divs) {
        console.log(divs)
        ///////////////////////////////////////////////////
        fs.writeFile('8.csv', result, function (err) {
      if (err) throw err;
      console.log('It\'s saved!');
    });
    ////////////////////////////////////////////////////
      })
      .then(function() {
        //since Nightmare has an internal `.then()`, return the instance returned by the final call in the chain
        return nightmare
          //click the next button to get the next page of search results
          .goto(divs[0].href)
          //get the first HREF from the second page of results
          .evaluate(function() {
            return document.querySelector('div[class="outputDate"]');
          })
      })
      .catch(function (error) {
        console.error('Search failed:', error);
      });

The second goto() have a Error: divs is not defined. Thanks a lot.


Solution

  • The error message says it all: divs is not defined. That means that there isn't a divs variable in the scope of the second then callback.

    You could "merge" the two then callbacks you have there into one, and have something like this:

    var Nightmare = require('nightmare');
        var nightmare = Nightmare({ show: true });
        var fs  = require('fs');
        var result ;
        nightmare
          .goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
          .wait(1000)
          .evaluate(function () {
            var divs = document.querySelectorAll('a[target="_blank"]'),i;
            for (i = 0,result = ""; i < divs.length; ++i) {
                result += divs[i].href.toString()+"\n";
                }
            return divs;
          })
          .then(function (divs) {
            console.log(divs)
            ///////////////////////////////////////////////////
            fs.writeFile('8.csv', result, function (err) {
              if (err) throw err;
              console.log('It\'s saved!');
            });
            ////////////////////////////////////////////////////
            nightmare
              //click the next button to get the next page of search results
              console.log('this is the link I want to navigate to', divs[0].href)
              .goto(divs[0].href)
              //get the first HREF from the second page of results
              .evaluate(function() {
                return document.querySelector('div[class="outputDate"]');
              })
              .then(function(div) {
                console.log('I got the "div", now Ill do something with it', div)
              })
          })
          .catch(function (error) {
            console.error('Search failed:', error);
          });
    
          nightmare.end()
    

    But this code will also fail, because your divs list is full of empty objects. I have changed a bit the code to get it to half work for you:

    var Nightmare = require('nightmare');
        var nightmare = Nightmare({ show: true });
        var fs  = require('fs');
        var result ;
        nightmare
          .goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
          .wait(1000)
          .evaluate(function () {
            var divs = document.querySelectorAll('a[target="_blank"]'),i;
            var links = []
            for (i = 0,result = ""; i < divs.length; ++i) {
                links.push(divs[i].href)
                result += divs[i].href.toString()+"\n";
                }
            return links;
          })
          .then(function (links) {
            console.log(links)
            ///////////////////////////////////////////////////
            fs.writeFile('8.csv', links.join('\n'), function (err) {
              if (err) throw err;
              console.log('It\'s saved!');
            });
            ////////////////////////////////////////////////////
            console.log('this is the link I want to navigate to', links[0])
            nightmare
              //click the next button to get the next page of search results
              .goto(links[0])
              .wait(5000)
              //get the first HREF from the second page of results
              .evaluate(function() {
                return document.querySelector('div[class="outputDate"]');
              })
              .end()
              .then(function(div) {
                console.log('I got the "div", now Ill do something with it', div)
              })
          })
          .catch(function (error) {
            console.error('Search failed:', error);
          });
    

    Now, you will have to iterate trough the links list and navigate to each and everyone of the links. This is a bit tricky to do with Nightmare, take a look at this resource and this answer. I believe you will find them quite helpful.