I'm using Casperjs to scrape a website, and am trying to extract a specific div that contains a list. A simplified version of the page I'm scraping would be:
<div id="col1" style="margin-top: 1px; ">
<div class="portlet_my_classes" id="my_classes">
<div class="eclass_list">
<ul>
<li>First item</li>
</ul>
</div>
</div>
</div>
My casper script is as follows:
var casper = require('casper').create()
var classes = []
function getClasses() {
var classes = document.querySelector(".eclass_list")
return Array.prototype.map.call(classes, function(e) {
return e.textContent
})
}
casper.start('https://redacted', function() {
this.echo(this.getTitle());
this.fill('form[action="/do/account/login"]', {login: "redacted", password: "redacted"}, true)
});
casper.then(function() {
this.echo(this.getHTML())
})
casper.thenOpen('https://redacted', function() {
this.echo(this.getTitle())
this.echo(this.getHTML())
this.echo(this.evaluate(getClasses))
this.echo(this.exists(".eclass_list"))
require('utils').dump(this.getElementInfo(".eclass_list"))
classes = this.evaluate(getClasses)
})
casper.run(function() {
this.echo(classes).exit()
});
this.exists(".eclass_list")
returns true, but classes is always null.
document.querySelector(".eclass_list")
in the getClasses
function will only return the first element that has this class, not as an array but as an element. So the subsequent Array.prototype.map
call on a DOM element will fail, because a DOM element doesn't have a length
property and cannot be interpreted as an array. Since there is an exception (SyntaxError: Illegal return statement
in Chrome) inside the page context, the return value will be null
.
What you want is document.querySelectorAll(".eclass_list")
.
If you would have added the remote.message
and page.error
events, you would have seen the problem.