Search code examples
javascriptweb-scrapingcasperjs

CasperJS - Loop over popup links on Page A and return to Page A after scraping all popups?


I have Page A which have 9 popup links, and I need to extract the info from each popup. After this, I need to return to Page A and continue to work on it.

I used the waitForPopup and withPopup in a for loop, and I think it is the right way to open and close each popup. but the weird thing is that I got the info from the first popup repeated 9 times. I spent the whole night to get it right, but was stuck. Please take a look at my code below. Thanks.

Update I found a link state the exact problem. Casperjs - Is ThenClick Or Any 'then Function' Synchronized Function?

Inside my script, there is a while loop, I discover my script will jump to the end of while loop just before the thenClick

For your information:

1> The code is part of the var suite_1 = function() { ...my code... }

2> The regex /fulltext/ here is based on the popup links 
(only the Gid value varies), like url=http://www.***.com/fulltext_form.aspx&Gid=788...

3> I also have some debug info.

[debug] [phantom] Mouse event 'mousedown' on selector: xpath selector: (//a[@class="main-ljwenzi"])[9]
[debug] [phantom] Mouse event 'mouseup' on selector: xpath selector: (//a[@class="main-ljwenzi"])[9]
[debug] [phantom] Mouse event 'click' on selector: xpath selector: (//a[@class="main-ljwenzi"])[9]
[debug] [phantom] Navigation requested: url=http://www.***.com/fulltext_form.aspx&Gid=788, type=LinkClicked, willNavigate=true, isMainFrame=false

...And ...

[info] [phantom] Step anonymous 119/122 http://www.***.com/fulltext_form.aspx&Gid=252923 (HTTP 200)

The program is expected to open the link with Gid=788, but after some _step, it still open the Gid=252923, which is the first popup.

Code

this.then(function() {

    var count = this.getElementsInfo('a.main').length;
    this.echo(count + ' fulltext links found:', 'INFO');

    for (var i = 1; i < count + 1; i++) {

    //According to the output,
the program will run the this.capture 9 times,
before run the following lines of code 9 times

        this.capture('before the click - ' + i + '.png');

        this.thenClick(x('(//a[@class="main"])[' + i + ']'));

        this.waitForPopup(/fulltext/, function() {
            this.echo('Popup opened', 'INFO');
            this.echo(this.getTitle(), 'INFO');
        }, null, 10000);

        this.withPopup(/fulltext/, function() {

            this.echo(this.getTitle(), 'INFO');
            this.waitForSelector('#tbl_content_main', function() {
                // do something
            });
        });

        this.then(function() {
            this.echo('Back to the main page' + this.getTitle(), 'INFO_BAR');
        });
    }
});

Solution

  • I made a simple example to test your case:

    HTML(popup.html):

    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
    </head>
    <body>
    <div>
        <button class="popup" id="casperjs" onclick="javascript:window.open('http://casperjs.org/')">casperjs</button>
        <button class="popup" id="phantomjs" onclick="javascript:window.open('http://phantomjs.org/')">phantomjs</button>
        <button class="popup" id="nodejs" onclick="javascript:window.open('https://nodejs.org/')">nodejs</button>
    </div>
    </body>
    </html>
    

    Code:

    var casper = require('casper').create();
    
    casper.start('http://localhost:63342/popup.html')
    
    casper.then(function () {
        var count = casper.getElementsInfo('.popup').length
        var counter = 1
        for (var i = 0; i < count; i++) {
            casper.then(function () {
                var selector = 'body > div > button:nth-child(' + counter + ')';
                var text = casper.getElementInfo(selector).text
                casper.click(selector)
                casper.waitForPopup(text, undefined, undefined, 20000)
                casper.withPopup(text, function () {
                    casper.echo(casper.getTitle())
                })
                counter ++
            })
        }
    })
    
    casper.run()
    

    Output:

    $ casperjs popup.js 
    CasperJS, a navigation scripting and testing utility for PhantomJS and SlimerJS
    PhantomJS | PhantomJS
    Node.js
    

    It works as expected.


    So, your code have 2 problems:

    1. should wrap the whole code inside the for-loop into a then.
    2. should use another counter instead of i to make selector(you can echo the value of i to see it's always count. In my case, that's 3).

    And there is nothing to do with thenClick. You can replace the click to thenClick in my example code, and everything remains the same... If you take a look at the implementation of thenClick, you will find the reason behind my statement...

    Try this:

    this.then(function() {
    
        var count = this.getElementsInfo('a.main').length;
        this.echo(count + ' fulltext links found:', 'INFO');
        var counter = 1 // +
        for (var i = 1; i < count + 1; i++) {
            this.then(function() { //+
                this.capture('before the click - ' + counter + '.png');//edit
    
                this.thenClick(x('(//a[@class="main"])[' + counter + ']'));//edit
    
                this.waitForPopup(/fulltext/, function() {
                    this.echo('Popup opened', 'INFO');
                    this.echo(this.getTitle(), 'INFO');
                }, null, 10000);
    
                this.withPopup(/fulltext/, function() {
    
                    this.echo(this.getTitle(), 'INFO');
                    this.waitForSelector('#tbl_content_main', function() {
                        // do something
                    });
                });
    
                this.then(function() {
                    this.echo('Back to the main page' + this.getTitle(), 'INFO_BAR');
                });
                counter ++ //+
            })//+
        }
    });
    

    If you are curious about why should use another counter, you may read my answer on another SO post.