Search code examples
javascriptcasperjs

casper.waitForSelector, timeout and error handling


I made a quick script to capture a screen shot from a list of Reddit posts. The script fetches the reddit urls from a json file(example shown below) and then visits each page to capture a screenshot.

The script works well for the most. However, every once in a while it will hang/continue to run if casper.waitForSelector criteria is not met. Eventually it will stall the whole server. I thought I was doing sufficient error handling. How can I modify the script to ignore any page that doesn't fit the casper.waitForSelector criteria and move on to the next item for screen capture?

JS

var casper = require('casper').create({
    verbose: true,
    viewportSize: {
        width: 1280,
        height: 720
    },
    logLevel: 'error',
    pageSettings: {
        ignoreSslErrors: true,
        loadImages: true, // load images
        loadPlugins: true, // do not load NPAPI plugins (Flash, Silverlight, ...)
        webSecurityEnabled: false, // ajax
        userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
    }
});

//Create random names for files
function randString(x) {
    var s = "";
    while (s.length < x && x > 0) {
        var r = Math.random();
        s += (r < 0.1 ? Math.floor(r * 100) : String.fromCharCode(Math.floor(r * 26) + (r > 0.5 ? 97 : 65)));
    }
    return s;
}


function getReddit(reddit, filename) {
    casper.thenOpen(reddit, function() {
        if (casper.exists("#thing_" + filename)) {
            casper.waitForSelector("#thing_" + filename, function() {
                casper.captureSelector(randString(10) + '.png', "#thing_" + filename, {
                    quality: 100
                });
            });
        } else {
            console.log("Reddit Error: " + filename);
        }
    });
}

var link = 'http://localhost/test.json';

casper.start(link);

casper.on("resource.received", function(resource) {
    var results = this.evaluate(function(url) {
        return __utils__.sendAJAX(url, "GET");
    }, resource.url);
    var x = JSON.parse(results);
    //this.echo(x.length);
    for (var i = 0; i < x.length; ++i) {
        var reddit = x[i].post_url;
        var filename = x[i].id;
        getReddit(reddit, filename);
    }
});

casper.run();

JSON

{
post_url: "https://www.reddit.com/r/todayilearned/comments/4marhg/til_that_in_the_16th_century_christians_called/",
bit_id: "l6KE0vzMmgQ",
id: "t3_4marhg"
},
{
post_url: "https://www.reddit.com/r/videos/comments/4mbbab/man_ignores_museum_rules_touches_priceless_clock/",
bit_id: "2GK22rGYWKx",
id: "t3_4mbbab"
},
{
post_url: "https://www.reddit.com/r/space/comments/4mc1av/an_alien_world_67p_as_seen_by_rosetta_two_days_ago/",
id: "t3_4mc1av"
},
{
post_url: "https://www.reddit.com/r/worldnews/comments/4mc8uv/young_fish_become_hooked_on_eating_plastic_in_the/",
id: "t3_4mc8uv"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mcda3/til_that_the_giant_tortoise_did_not_receive_a/",
id: "t3_4mcda3"
},
{
post_url: "https://www.reddit.com/r/science/comments/4mcl0y/a_new_study_has_shown_that_mothers_who_are/",
id: "t3_4mcl0y"
},
{
post_url: "https://www.reddit.com/r/news/comments/4mcveg/bp_agrees_to_pay_175_million_to_settle_claims_by/",
id: "t3_4mcveg"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mdddw/til_that_when_a_british_captive_officer/",
id: "t3_4mdddw"
}

Solution

  • If you provide the onTimeout callback function to waitForSelector (3rd argument), then it will be executed instead of the default behavior (stop script) on error.

    You can pass in an empty function or a function with some logging:

    casper.waitForSelector("#thing_" + filename, function _then() {
        this.captureSelector(randString(10) + '.png', "#thing_" + filename, {
            quality: 100
        });
    }, function _onTimeout(){
        this.echo("#thing_" + filename + " not found", "WARNING");
    });
    

    You can also change the option casper.options.silentErrors to true if you want this behavior for every function.