Search code examples
pythonpyqtpyqt5qwebenginepage

Cannot use QUrl


I am trying to learn Dynamic Web scraping on PyQt5. I was looking up the tutorials meant for PyQt4 so have some different libraries in Qt5.

import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEnginePage
import bs4 as bs
import urllib.request

class Client(QWebEnginePage):
    def _init_(self, url):
        self.app=QApplication(sys.argv)
        QWebPage._init_(self)
        self.loadFinished.connect(self.on_page_load)
        self.mainFrame().load(QUrl(url))
        self.app.exec_()
    def on_page_load(self):
        self.app.quit()

url='https://pythonprogramming.net/parsememcparseface/'
client_response=Client(url)
source=client_response.mainFrame().toHtml()

#sauce=urllib.request.urlopen('https://pythonprogramming.net/sitemap.xml').read()
soup=bs.BeautifulSoup(sauce,'xml')
js_test=soup.find('p',class_='jstest')
print(js_test.text)

The following error is being show:

Traceback (most recent call last):
  File "jsp.py", line 19, in <module>
    client_response=Client(url)
TypeError: arguments did not match any overloaded call:
  QWebEnginePage(parent: QObject = None): argument 1 has unexpected type 'str'
  QWebEnginePage(QWebEngineProfile, parent: QObject = None): argument 1 has unexpected

Someone help me!


Solution

  • Your code has several errors:

    • init must have 2 underscores before and after.
    • QWebEnginePage does not have a mainFrame() as a method, now you have to load it directly
    • Another change that arose was that the toHtml() function is no longer synchronous so it will ask you for a callback to get the html, but with my modification it is again synchronous.

    Code:

    import sys
    from PyQt5.QtWidgets import QApplication
    from PyQt5.QtCore import QUrl, pyqtSignal, QEventLoop
    from PyQt5.QtWebEngineWidgets import QWebEnginePage
    
    class Client(QWebEnginePage):
        toHtmlFinished = pyqtSignal()
    
        def __init__(self, url):
            self.app=QApplication(sys.argv)
            QWebEnginePage.__init__(self)
            self.loadFinished.connect(self.on_page_load)
            self.load(QUrl(url))
            self.app.exec_()
    
        def on_page_load(self):
            self.app.quit()
    
        def store_html(self, html):
            self.html = html
            self.toHtmlFinished.emit()
    
        def get_html(self):
            self.toHtml(self.store_html)
            loop = QEventLoop()
            self.toHtmlFinished.connect(loop.quit)
            loop.exec_()
            return self.html
    
    url='https://pythonprogramming.net/parsememcparseface/'
    client_response=Client(url)
    source=client_response.get_html()
    
    print(source)
    

    References: