Search code examples
javascriptphantomjscasperjs

How to for loop in casperjs


I am trying to click a 'next' button N number of times and grab the page source each time. I understand that I can run an arbitrary function on the remote website, so instead of click() I just use the remote function nextPage() How do I run the following, an arbitrary number of times:

var casper = require('casper').create();

casper.start('http://www.example.com', function() {

    this.echo(this.getHTML());
    this.echo('-------------------------');

    var numTimes = 4, count = 2;

    casper.repeat(numTimes, function() {
        this.thenEvaluate(function() {
            nextPage(++count);
        });

        this.then(function() {
            this.echo(this.getHTML());
            this.echo('-------------------------');
        });
    });

});

'i' here is an index I tried to use in a javascript for loop.

So tl;dr: I want lick 'next', print pages source, click 'next', print page source, click 'next'... continue that N number of times.


Solution

  • First, you can pass a value to the remote page context (i.e. to thenEvaluate function like this:

        this.thenEvaluate(function(remoteCount) {
            nextPage(remoteCount);
        }, ++count);
    

    However, Casper#repeat might not be a good function to use here as the loop would NOT wait for each page load and then capture the content.

    You may rather devise a event based chaining.

    The work-flow of the code would be:

    1. Have a global variable (or at-least a variable accessible to the functions mentioned below) to store the count and the limit.

    2. listen to the load.finished event and grab the HTML here and then call the next page.

    A simplified code can be:

    var casper = require('casper').create();
    
    var limit = 5, count = 1;
    
    casper.on('load.finished', function (status) {
        if (status !== 'success') {
            this.echo ("Failed to load page.");
        }
        else {
            this.echo(this.getHTML());
            this.echo('-------------------------');
        }
    
    
    
        if(++count > limit) {
            this.echo ("Finished!");
    
        }
        else {
            this.evaluate(function(remoteCount) {
                nextPage(remoteCount);
                // [Edit the line below was added later]
                console.log(remoteCount);
                return remoteCount;
            }, count);
    
        }
    
    });
    
    casper.start('http://www.example.com').run();
    

    NOTE: If you pages with high load of JS processes etc. you may also want to add a wait before calling the nextPage :

    this.wait( 
       1000, // in ms
       function () {
            this.evaluate(function(remoteCount) {
                nextPage(remoteCount);
            }, count);
       }
    );     
    

    [EDIT ADDED] The following event listeners will help you debug.

    // help is tracing page's console.log 
    casper.on('remote.message', function(msg) { 
        console.log('[Remote Page] ' + msg); 
    }); 
    
    // Print out all the error messages from the web page 
    casper.on("page.error", function(msg, trace) { 
        casper.echo("[Remote Page Error] " + msg, "ERROR"); 
        casper.echo("[Remote Error trace] " + JSON.stringify(trace, undefined, 4)); 
    });