Search code examples
javascriptcasperjsselectors-api

CasperJS querySelectorAll + map.call


html file

<table id="tbl_proxy_list">
...........
 <tr>
   ......
    <td align="left">
        <time class="icon icon-check">1 min</time>
    </td>
    <td align="left">
        <div class="progress-bar" data-value="75" title="4625"></div>
    </td>
</tr>
</table>

ip.js file

casper.start('http://www.proxynova.com/proxy-server-list/', function() {
    var info_text = this.evaluate(function() {
        var nodes = document.querySelectorAll('table[id="tbl_proxy_list"] tr');
        return [].map.call(nodes, function(node) { 
            //return node.innerText;
            return node;
        });
    });

    var tr_data = info_text.map(function(str) {
        var elements = str;
        var data = {
            ip        : elements,
            port      : elements[1],
            lastcheck : elements[2],
            speed     : elements[3], // <== value is 75..
        };
        return data;
    });

    utils.dump(tr_data);
});

casper.run();

return node.innerText is only text.

  • ip is a text value
  • port is a text value
  • lastcheck is a text value
  • speed is not a text value (data-value="75")

I want to import data-value="75" (speed value is 75).

I do not know what to do.

========================================

It's work.. good. thank you Artjom.

but tr_data echo error.

first, you code modify..

return {
    "ip":        tr.children[0].innerText.trim(),
    "port":      tr.children[1].innerText.trim(),
    "lastcheck": tr.children[2].innerText.trim(),
    "speed":     tr.children[3].children[0].getAttribute("data-value")
};

and echo..

//this.echo(tr_data.length);
for(var ii=0; ii<tr_data.length; ii++)
{
    this.echo(tr_data[ii]['ip']);
}

at run, blow error..

TypeError: 'null' is not an object (evaluating 'tr_data.length'); what is problem?

I need your help.. thanks.


Solution

  • You cannot pass DOM elements from the page context (inside evaluate callback).

    From the docs:

    Note: The arguments and the return value to the evaluate function must be a simple primitive object. The rule of thumb: if it can be serialized via JSON, then it is fine.

    Returning an array of DOM elements will result in an array of as many undefined values. That means you need to map everything inside the page context and then return the resulting array. You also need only one map.

    var tr_data = this.evaluate(function() {
        var nodes = document.querySelectorAll('table[id="tbl_proxy_list"] tbody tr');
        return Array.prototype.map.call(nodes, function(tr, i) { 
            if (tr.children.length != 6) {
                return null; // skip ads
            }
            return {
                ip:        tr.children[0].innerText.trim(),
                port:      tr.children[1].innerText.trim(),
                lastcheck: tr.children[2].innerText.trim(),
                speed:     tr.children[3].children[0].getAttribute("data-value")
            };
        }).filter(function(data){
            return data !== null; // filter the null out
        });;
    });
    

    You also might want to trim the excess white space.