Search code examples
node.jsweb-scrapingnightmare

web scrapy by nightmare about loop ,output is not same every time


var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs  = require('fs');
vo = require('vo');
var result;
nightmare
    .goto('http://jufa-kyusyu.jp/teams/')
    .wait(1000)
    .evaluate(function () {
     var options = document.querySelectorAll('option'),i;
     var values =[]
     for (i = 0; i < options.length; ++i) {
        values.push(options[i].value)
        }
        return values;
    })
    .then(function (values) {
      for (var i = 0; i < values.length; i++) {
        if(values[i] == "#") values[i] = "/teams/181.html";
    nightmare
      .goto("http://www.jufa-kyusyu.jp"+values[i])
      .evaluate(function () {
      var abc =     document.querySelector('iframe[class="autoHeight"]').src.toString()
      return abc;
    }) 
      .then(function (result) {
console.log(result)
  })
    .catch(function (error) {
    console.error('Search failed:', error);
  });} 
  })
  .catch(function (error) {
    console.error('Search failed:', error);
  });

I want to scrapy the web information by nightmarejs looply.I dont know why have two result link is same and the result is changed in running every time.thank you.


Solution

  • You have to be careful when working with async calls inside a loop with Nightmare

    Check this answer and this detailed explanation about the concept.

    The main idea can be sumarized by this sentence:

    Executing the operations in series requires arranging them to execute in sequential order

    The documentation shows how to achieve that using plain, vanilla js and also with vo

    Here is a sneak peek on how to solve this loop issue with plain Javascript:

    var urls = ['http://example1.com', 'http://example2.com', 'http://example3.com'];
    urls.reduce(function(accumulator, url) {
      return accumulator.then(function(results) {
        return nightmare.goto(url)
          .wait('body')
          .title()
          .then(function(result){
            results.push(result);
            return results;
          });
      });
    }, Promise.resolve([])).then(function(results){
        console.dir(results);
    });
    

    Basically what you need to do is queue all your calls in a list and trigger them using Promise.resolve