Using casperjs
, I know that I'm able to execute JavaScript in the context of the visited page by using this.evaluate
. It seems to work well, except that I can't find anything with document.querySelector
or document.querySelectorAll
, no matter what the starting URL is.
To investigate this, I created two files: test.html
and test.js
, serving locally with python3 -m http.server
on port 8000
. These are the files:
test.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8"/>
<title>Page title</title>
</head>
<body>
<p>A test to see</p>
<p>if casperjs can grab elements</p>
<p>evaluating JS</p>
<p>in the context of the page</p>
</body>
</html>
test.js
function grabTitle(){
return document.title;
}
function grabParagraphs(){
var pars = document.querySelectorAll("p");
for (var i=0; i<pars.length; i++){
texts.push(pars[i].textContent);
}
}
var casper = require("casper").create();
var url = "http://localhost:8000/test.html";
var texts = [];
casper.start(url,function(){
this.echo("Begin");
});
casper.then(function(){
var title = this.evaluate(grabTitle);
this.echo("The title is: " + title);
this.evaluate(grabParagraphs);
this.echo(texts.length + " paragraphs found:\n" + texts);
});
casper.run(function(){
this.echo("Done").exit();
});
Running casperjs test.js
gave me this output:
Begin
The title is: Page title
0 paragraphs found:
Done
It finds the title, so grabTitle
works correctly, executing in the context of the page, but no paragraphs are found. I thought that maybe i didn't wait enough for the page to load, so i tried casper.waitForSelector("p",function(){ ... });
and even casper.wait(10000,function ... )
, waiting for 10 seconds to let the page load, with no result whatsoever.
Modifying grabParagraphs
using document.getElementsByTagName
doesn't work either. I can't figure out what's the problem, every example that I can find showcases the usage of querySelector
, so it should work.
I'm using phantomjs 2.1.1
and casperjs 1.1.4
on Linux Mint
EDIT
Following the suggestion given by @Mario Nikolaus, I changed test.js
: instead of pushing the results to the array texts
defined in the global context of test.js
, now I define texts
in grabParagraphs
and then return the result:
function grabParagraphs(){
var pars = document.getElementsByTagName("p");
var texts = [];
for (var i=0; i<pars.length; i++){
texts.push(pars[i].textContent);
}
return texts;
}
casper.then(function(){
var title = this.evaluate(grabTitle);
this.echo("The title is: "+title);
var texts = this.evaluate(grabParagraphs);
this.echo(texts.length+" paragraphs found:\n"+texts)
});
Originally I assumed that I could push the results in the variable texts
because, since it was defined in the context of test.js
, it was globally available. However, it isn't available in the context of the page, so that was the problem!
You are not returning value from evaluate in your grabParagraphs
function.
Create another array variable within the browser context, return that array to you casper context and assign it to array.
function grabTitle(){
return document.title;
}
function grabParagraphs(){
var arr = [];
var pars = document.querySelectorAll("p");
for (var i=0; i<pars.length; i++){
arr.push(pars[i].textContent);
}
return arr;
}
var casper = require("casper").create();
var url = "http://localhost:8001/a.html";
casper.start(url,function(){
this.echo("Begin");
});
casper.then(function(){
var title = this.evaluate(grabTitle);
this.echo("The title is: " + title);
var texts = this.evaluate(grabParagraphs);
this.echo(texts.length + " paragraphs found:\n" + texts);
});
casper.run(function(){
this.echo("Done").exit();
});
Hope that helps!