I made a quick script to capture a screen shot from a list of Reddit posts. The script fetches the reddit urls from a json file(example shown below) and then visits each page to capture a screenshot.
The script works well for the most. However, every once in a while it will hang/continue to run if casper.waitForSelector
criteria is not met. Eventually it will stall the whole server. I thought I was doing sufficient error handling. How can I modify the script to ignore any page that doesn't fit the casper.waitForSelector
criteria and move on to the next item for screen capture?
JS
var casper = require('casper').create({
verbose: true,
viewportSize: {
width: 1280,
height: 720
},
logLevel: 'error',
pageSettings: {
ignoreSslErrors: true,
loadImages: true, // load images
loadPlugins: true, // do not load NPAPI plugins (Flash, Silverlight, ...)
webSecurityEnabled: false, // ajax
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
//Create random names for files
function randString(x) {
var s = "";
while (s.length < x && x > 0) {
var r = Math.random();
s += (r < 0.1 ? Math.floor(r * 100) : String.fromCharCode(Math.floor(r * 26) + (r > 0.5 ? 97 : 65)));
}
return s;
}
function getReddit(reddit, filename) {
casper.thenOpen(reddit, function() {
if (casper.exists("#thing_" + filename)) {
casper.waitForSelector("#thing_" + filename, function() {
casper.captureSelector(randString(10) + '.png', "#thing_" + filename, {
quality: 100
});
});
} else {
console.log("Reddit Error: " + filename);
}
});
}
var link = 'http://localhost/test.json';
casper.start(link);
casper.on("resource.received", function(resource) {
var results = this.evaluate(function(url) {
return __utils__.sendAJAX(url, "GET");
}, resource.url);
var x = JSON.parse(results);
//this.echo(x.length);
for (var i = 0; i < x.length; ++i) {
var reddit = x[i].post_url;
var filename = x[i].id;
getReddit(reddit, filename);
}
});
casper.run();
JSON
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4marhg/til_that_in_the_16th_century_christians_called/",
bit_id: "l6KE0vzMmgQ",
id: "t3_4marhg"
},
{
post_url: "https://www.reddit.com/r/videos/comments/4mbbab/man_ignores_museum_rules_touches_priceless_clock/",
bit_id: "2GK22rGYWKx",
id: "t3_4mbbab"
},
{
post_url: "https://www.reddit.com/r/space/comments/4mc1av/an_alien_world_67p_as_seen_by_rosetta_two_days_ago/",
id: "t3_4mc1av"
},
{
post_url: "https://www.reddit.com/r/worldnews/comments/4mc8uv/young_fish_become_hooked_on_eating_plastic_in_the/",
id: "t3_4mc8uv"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mcda3/til_that_the_giant_tortoise_did_not_receive_a/",
id: "t3_4mcda3"
},
{
post_url: "https://www.reddit.com/r/science/comments/4mcl0y/a_new_study_has_shown_that_mothers_who_are/",
id: "t3_4mcl0y"
},
{
post_url: "https://www.reddit.com/r/news/comments/4mcveg/bp_agrees_to_pay_175_million_to_settle_claims_by/",
id: "t3_4mcveg"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mdddw/til_that_when_a_british_captive_officer/",
id: "t3_4mdddw"
}
If you provide the onTimeout
callback function to waitForSelector
(3rd argument), then it will be executed instead of the default behavior (stop script) on error.
You can pass in an empty function or a function with some logging:
casper.waitForSelector("#thing_" + filename, function _then() {
this.captureSelector(randString(10) + '.png', "#thing_" + filename, {
quality: 100
});
}, function _onTimeout(){
this.echo("#thing_" + filename + " not found", "WARNING");
});
You can also change the option casper.options.silentErrors
to true
if you want this behavior for every function.