Search code examples
javascriptnode.jscronnightmare

Cron and nightmarejs


I can't manage to run my cron with nightmarejs.

The first iteration of the function get_data() works great but after that the cron restarts and the function will not be triggered again.

Also "crawl ended" is never logged.

Do you know what's wrong with my code?

Logs

1
cron
data fetched
2
cron
3
cron

-

var Nightmare = require('nightmare')
var nightmare = Nightmare({
  typeInterval: 300,
  show: true,
  executionTimeout: 120000,
  gotoTimeout: 120000
});
let data = ""

-

var get_data = function(){
  return new Promise(function(resolve, reject) {
    nightmare
    .goto('https://url.com')
    .type('[name=email]', '')
    .wait(1000)
    .type('[name=email]', 'myemail')
    .wait(1000)
    .type('[name=password]', '')
    .wait(1000)
    .type('[name=password]', 'mypassword')
    .click('[type=submit]')
    .wait(5000)
    .goto('https://url.com')
    .wait(25000)

    .evaluate(function (page, done) {

      return document.body.innerText
      done()
    })
    .end()
    .then(function (result) {
      data = result
    })
    .then(function(data){
      return fs.writeFile("./data.txt", data, function(err) {
        if(err) {
          console.log(err)
          reject(err)
        }
        resolve(data)
      });
    })
    .catch(function(error){
      reject(error)
    })
  })
}

-

var i = 0
var job = new CronJob('0 */20 * * * *', function() {
    ++i
    console.log(i)
    console.log("cron")
    get_data()
  }, function () {
    console.log("crawl ended")
  },
  true
);

job.start();

Solution

  • Couple of things that jump out right away.

    .evaluate(function (page, done) {
    
          return document.body.innerText
          done()
        })
    

    This won't do what you expect it to do, and will likely never return and cause a timeout error. You're not passing in an argument for page, which means done will be undefined. Change the above to:

    .evaluate(function (done) {
    
          return document.body.innerText
          done()
        })
    

    Second, this:

    .then(function(data){
          return fs.writeFile("./data.txt", data, function(err) {
            if(err) {
              console.log(err)
              reject(err)
            }
            resolve(data)
          });
        })
    

    ... redefines data. I don't think you're putting out the data variable set in the previous then, this should always output undefined, I'd think. Be careful with your closures.

    Third, and perhaps most importantly:

    .evaluate(function (page, done) {
    
          return document.body.innerText
          done()
        })
        .end() // <== this might be a problem
        .then(function (result) {
          data = result
        })
    

    Since nightmare is only defined once, you're ending the only instance you have. It will not be recreated, and will not work properly if you try to execute actions on the ended instance in the second iteration of your loop. Either take the .end() out and move it to the end of your scripts, or create a new Nightmare instance for every iteration.