Search code examples
perlparsingwebkitgtk3

Parsing a dynamic page with Perl and Gtk3::WebKit


I am using the Gtk3::WebKit to parse a website which loads content to itself with JavaScript. When the site is loaded it adds multiple divs with content to the DOM:

<div class="product-card">...</div>

How do I get this content with Gtk3::WebKit? How to get the nested tag's content? Is there a normal documentation about Gtk3::WebKit, because everything I've already seen is very poor documented.


Solution

  • Here is an example using Gtk3::WebKit2 :

    use feature qw(say);
    use strict;
    use warnings;
    use Gtk3 -init;
    use Gtk3::WebKit2;
    use Gtk3::JavaScriptCore;
    
    {
        my $url = 'https://metacpan.org/pod/Gtk3::WebKit2';
        my $window = Gtk3::Window->new('toplevel');
        $window->set_default_size(800, 600);
        $window->signal_connect(destroy => sub { Gtk3->main_quit() });
        my $ctx = Gtk3::WebKit2::WebContext::get_default();
        my $view = Gtk3::WebKit2::WebView->new_with_context($ctx);
        $view->signal_connect('load-changed', sub {
            my ($view, $load_event) = @_;
            if ($load_event eq 'finished') {
                run_javascript(
                    $view,
                    'document.getElementsByClassName("logged_out")[1].innerHTML;'
                );
            }
        });
        $view->load_uri($url);
        my $scrolls = Gtk3::ScrolledWindow->new();
        $scrolls->add($view);
        $window->add($scrolls);
        $window->show_all();
        Gtk3::main_iteration while Gtk3::events_pending;
        Gtk3->main;
    }
    
    sub run_javascript {
        my ($view, $javascript_string) = @_;
    
        my $done = 0;
    
        $view->run_javascript($javascript_string, undef, sub {
            my ($object, $result, $user_data) = @_;
            my $value = $view->run_javascript_finish($result)->get_js_value;
            say $value->to_string;
            $done = 1;
            return "ok";
        }, undef);
    
        Gtk3::main_iteration while Gtk3::events_pending and not $done;
    }
    

    Output:

    <a href="" onclick="alert('Please sign in to add favorites'); return false" class="favorite highlight" title="Add to favorites">
    <span>2</span> ++</a>