Search code examples
javascripthtmliframecasperjs

Returning links inside multiple-levels iframe using a function in CasperJS


I am trying to get all links under multiple level iframes in CasperJS. There was a solution to resolve the case that there is one-level iframe. I am trying to put getLinksFromIFrame inside the getLinksFromIfram to do recurive traverse but failed.

For this code, how should I do it for multiple-level iframes?

function getLinksFromIframes( callback ) {
    var links = [];

    var iframes = this.evaluate( function() {
        var iframes = [];
        [].forEach.call(document.querySelectorAll("iframe"), function(iframe, i) { iframes.push( i ); });
        return iframes;
    });

    iframes.forEach( function( index ) {
        this.withFrame(index, function() {
            links = links.concat( this.getElementsAttribute( 'a', 'href' ) );
            console.log("works: " + links);
        });
    }, this);

    this.then(function(){
        callback.call(this, links);
    });
}

casper.start(url, function () {
    getLinksFromIframes.call(this, function(links){
        thelinks = links;
        console.log("Links: " + thelinks);
    });
})
.then(function(){
    console.log("Links later: " + thelinks);
})
.run();

Solution

  • Maybe like this:

    var casper = require("casper").create({
            //    verbose: true,
            //    logLevel: "debug",
            webSecurityEnabled : false
        });
    //page.onConsoleMessage = function(msg) {console.log(msg);};
    casper.on('remote.message', function (message) {
        this.echo(message);
    });
    
    casper.start("http://domu-test-2/node/1", function () {
        this.evaluate(function () {
            var i,
            x = document.querySelector("iframe#test") //First iframe
                .contentDocument.querySelector("iframe#test2") //Second iframe in the first
                .contentDocument.querySelectorAll("a"); //Links
            for (i = 0; i < x.length; i++) {
                console.log(x[i].href)
            }
        })
    }).wait(1000).run();
    

    It was hard, but i've created this script:

    var casper = require("casper").create({
            //  verbose: true,
            //    logLevel: "debug",
            webSecurityEnabled : false
        });
    var links = [];
    function get_links(obj) {
        return obj.evaluate(function () {
            var i,
                l = document.querySelectorAll("a"),
                l2 = [];
            for (i = 0; i < l.length; i++) {
                l2[i] = l[i].href;
            }
            return l2
        });
    }
    function unique(arr) {
        var obj = {};
        for (var i = 0; i < arr.length; i++) {
            if (/http(.*)?/.test(arr[i])) {
                var str = arr[i];
                obj[str] = true;
            }
        }
        return Object.keys(obj);
    }
    
    function getLinksFromIframes(callback) {
        this.echo("Here we come: " + this.getCurrentUrl() + "\n");
        function to_frame(obj) {
            var iframes = to_evaluate(obj);
            iframes.forEach(function (index) {
                this.withFrame(index, function () {
                    this.echo("We are here: " + this.getCurrentUrl());
                    var l = unique(get_links(this));
                    var i;
                    for (i = 0; i < l.length; i++) {
                        console.log(l[i]);
                        links.push(l[i])
                    }
                    links = unique(links);
                    console.log("");
                    to_frame(this) //multi lvl
                }); //The first iframe
            }, obj);
        }
        function to_evaluate(obj) {
            return obj.evaluate(function () {
                var iframes = [];
                [].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
                    iframes.push(i);
                });
                return iframes;
            })
        }
        to_frame(this);
        this.then(function () {
            callback.call(this);
        });
    }
    
    casper.start("http://domu-test-2/node/1", function () {
        getLinksFromIframes.call(this, function () {
            console.log("Done!\n");
            var i;
            for (i = 0; i < links.length; i++) {
                console.log(links[i]);
            }
        });
    }).then(function () {}).run();
    

    Note:
    Now we have a full multi lvl.

    ./casperjs test.js >>/dev/stdout
    Here we come: http://domu-test-2/node/1
    
    We are here: http://domu-test-2/node/2
    http://link_1_inside_iframe(1.1)_from_main_frame
    
    We are here: http://domu-test-2/node/3
    http://link_1_inside_iframe(2.1)_from_1.1
    
    We are here: http://domu-test-2/node/5
    http://link_1_inside_iframe(2.2)_from_1.1
    
    We are here: http://domu-test-2/node/4
    http://link_1_inside_iframe(1.2)_from_main_frame
    
    We are here: http://domu-test-2/node/6
    http://link_1_inside_iframe(2.1)_from_1.2