Search code examples
javascriptxpathphantomjscasperjsselectors-api

Printing elements from XPath results in more than the elements


All I want to do is return a string from an xpath I determined but I'm having trouble using the getElementsByXPath function in CasperJS.

var casper = require('casper').create({
verbose: false,
logLevel: 'debug'
});
casper.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)     Chrome/28.0.1500.72 Safari/537.36");
casper.start();
casper.thenOpen('http://www.uky.edu', function(){
    content = casper.evaluate(function () {
       return __utils__.getElementsByXPath('//div[@id=\'container\']/section[@id=\'content\']/aside[@class=\'socialTab clearfix\']/div[@id=\'usual2\']/div[@id=\'tabs1\']/figure[@class=\'youTube\']/h4/a');
   });
});

casper.run(function() {
    this.echo(JSON.stringify(content)); 
    this.echo('completed').exit();
});

Doesn't stop returning data. And it is definitely not returning that particular string I'm looking for, it seems like it's returning the data for the entire webpage instead.

I ended up trying this with a practically blank webpage with just one div and got the same issue I used //div for the xpath and received the following

[{"align":"","attributes":{"length":0},"baseURI":"http://download2012.ad.uky.edu/caspertest.php","childElementCount":0,"childNodes":{"0":null,"length":1},"child
ren":{"length":0},"classList":{"length":0},"className":"","clientHeight":20,"cli
entLeft":0,"clientTop":0,"clientWidth":384,"contentEditable":"inherit","dataset"
:{},"dir":"","draggable":false,"firstChild":null,"firstElementChild":"","hidden"
:false,"id":"","innerHTML":"Test2","innerText":"Test2","isContentEditable":false
,"lang":"","lastChild":{"attributes":"","baseURI":"http://download2012.ad.uky.ed
u/caspertest.php","childNodes":{"length":0},"data":"Test2","firstChild":"","last
Child":"","length":5,"localName":"","namespaceURI":"","nextSibling":"","nodeName
":"#text","nodeType":3,"nodeValue":"Test2","ownerDocument":null,"parentElement":
null,"parentNode":null,"prefix":"","previousSibling":"","textContent":"Test2","w
holeText":"Test2"},"lastElementChild":"","localName":"div","namespaceURI":"http:
//www.w3.org/1999/xhtml","nextElementSibling":"","nextSibling":null,"nodeName":"
DIV","nodeType":1,"nodeValue":"","offsetHeight":20,"offsetLeft":8,"offsetParent"
:{"aLink":"","attributes":{"length":0},"background":"","baseURI":"http://downloa
d2012.ad.uky.edu/caspertest.php","bgColor":"","childElementCount":1,"childNodes"
:{"0":null,"1":null,"2":null,"length":3},"children":{"0":null,"length":1},"class
List":{"length":0},"className":"","clientHeight":300,"clientLeft":0,"clientTop":
0,"clientWidth":400,"contentEditable":"inherit","dataset":{},"dir":"","draggable
":false,"firstChild":{"attributes":"","baseURI":"http://download2012.ad.uky.edu/
caspertest.php","childNodes":{"length":0},"data":"Test\n","firstChild":"","lastC
hild":"","length":5,"localName":"","namespaceURI":"","nextSibling":null,"nodeNam
e":

And so on.


Solution

  • You cannot return DOM elements from page context in CasperJS. If document.querySelectorAll is used instead of __utils__.getElementsByXPath (with adjusted CSS selector), the result will be an array of undefined values. This is not the case for __utils__.getElementsByXPath. Snapshots of DOM elements are returned which are actually partially serializable. The problem is that they contain circular references to the document and will grow continuously.

    From the docs:

    Note: The arguments and the return value to the evaluate function must be a simple primitive object. The rule of thumb: if it can be serialized via JSON, then it is fine.

    Closures, functions, DOM nodes, etc. will not work!

    You need to do everything that you want to do in the page context. For example the element as a string:

    content = casper.evaluate(function () {
       return __utils__.getElementByXPath(someSelector).outerHTML;
    });