Search code examples
web-scrapingcasperjs

How do scrape table from the provided website using casperjs?


The final goal is to retrieve stock data in table form from provided broker website and save it to some text file. Here is the code, that I managed to compile so far by reading few tutorials:

var casper = require("casper").create();
var url = 'https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';

var terminate = function() {
    this.echo("Exiting ...").exit();
};

var processPage = function() {

    var rows = document.querySelectorAll('#mCSB_3_container > table'); //get table from broker site (copy/paste via copy selector in chrome tools)
    //var nodes = document.getElementsByClassName('mCSB_container');

    this.echo(rows);
    this.echo(rows.length);
    for (var i = 0; i < rows.length; i++)
    {
        var cell = rows[i].querySelector('.quotes-table-result__date');
        this.echo(cell); //print each cell
    }  

};

casper.start(url);
casper.waitForSelector('#mCSB_3_container', processPage, terminate);
casper.run();

This code should retrieve the stock price table and print out each cell. However, all what I get is 'undefined', which likely means that I got no objects returned by queryselector call. And please assume that I don't know any web programming (HTML,CSS).


Solution

  • First of all, on problem is that the waitFor wasn't set so good, you have to wait for the rows/cells.
    The Nodes you get out on this page are a bit wired,if anybody got a more abstract solution where ChildNodes are better handled that in my solution i would be really interested:

    var casper = require('casper').create();
    var url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
    var length;
    
    casper.start(url);
    
    casper.then(function() {
        this.waitForSelector('#mCSB_3_container table tbody tr');
    });
    
    function getCellContent(row, cell) {
        cellText = casper.evaluate(function(row, cell) {
            return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
        }, row, cell);
        return cellText;
    }
    
    casper.then(function() {
        var rows = casper.evaluate(function() {
            return document.querySelectorAll('table tbody tr');
        });
        length = rows.length;
        this.echo("table length: " + length);
    });
    
    // This part can be done nicer, but it's the way it should work ...
    casper.then(function() {
        for (var i = 0; i < length; i++) {
            this.echo("Date: " + getCellContent(i, 0));
            this.echo("Bid: " + getCellContent(i, 1));
            this.echo("Ask: " + getCellContent(i, 2));
            this.echo("Quotes: " + getCellContent(i, 3));
        }
    });
    
    casper.run();