Search code examples
pythonpyqt4pysideqtwebkit

PySide.QtWebKit.QWebFrame load method with incorrect parameters


I'm trying to play with a chess online playing site crawler available in this github link:

https://github.com/Rseiji/ChessCommentaryGeneration (a fork I created from the original repo)

It uses Python2 and PyQt4, whose module QtWebKit4 is no longer available.

So, I found this link:

Python 2.7.11 - ImportError: cannot import name QtWebKit - Kali Linux / Debian 8

I didn't understand it well (what is sparta?), but I understood there is a library called PySide which has a module QtWebKit which could be used.

So I tried to modify the crawler's code, simply changing the import lines

import sys  
from PyQt4.QtGui import *  
from PyQt4.QtCore import *  
#from PyQt4.QtWebKit import *  
from PySide.QtWebKit import *
from lxml import html 
import pickle
import time
from PyQt4 import QtGui, QtCore
import functools
import sys


import argparse
def parseArguments():
   parser = argparse.ArgumentParser()
   #parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
   parser.add_argument("-i", type=int, dest="i", help="i")
   parser.add_argument("-num", type=int, dest="num", help="num")
   args = parser.parse_args()  
   return args
params = parseArguments()
#typ = params.typ


#Take this class for granted.Just use result of rendering.
class Render(QWebPage):  
 def __init__(self, url):  
   self.app = QApplication(sys.argv)  
   QWebPage.__init__(self)  
   self.loadFinished.connect(self._loadFinished)  
   qurl = QUrl(url)
   func = functools.partial(self.mainFrame().load, qurl )  
   timer = QtCore.QTimer()
   timer.timeout.connect(func)
   timer.start(10000)
   self.app.exec_()  

 def _loadFinished(self, result):  
   self.frame = self.mainFrame()  
   self.app.quit()  

def save_all():
   global cur_url
   global html_doc
   all_links = pickle.load( open("./saved_files/saved_links.p", "r") )
   #extra_links = pickle.load( open("extra_pages.p", "r") )
   print "len(all_links) = ",len(all_links)
   num = sys.argv[1]

   i = params.i
   print "i = ",type(i)
   num = params.num
   url = all_links[i]
   if num!=0:
       url+="&pg="+str(num)
   print "i, url = ",i,url
   #This step is important.Converting QString to Ascii for lxml to process
   #archive_links = html.fromstring(str(result.toAscii()))

   cur_url = url
   error_count = 0
   try:
       r = Render(cur_url)
       result = r.frame.toHtml()
       html_doc = result.toAscii()

       if num==0:
           fw = open("./saved_files/saved"+str(i)+".html", "w")
       else:
           fw = open("./saved_files/saved"+str(i)+"_" + str(num) + ".html", "w")
       fw.write(html_doc)
       fw.close()
       print "---- SLEEPING ---- "
       time.sleep(10)
   except:
       print "ERROR!!"
       error_count+=1
       print "error_count = ",error_count
   ##if i>4:
   ##  break

if __name__=="__main__":
   save_all()

Before, when executing the code with python run_all.py 0 11577 1, the error was that the QtWebKit module, but now, I get:

TypeError: 'PySide.QtWebKit.QWebFrame.load' called with wrong argument types:
  PySide.QtWebKit.QWebFrame.load(QUrl)
Supported signatures:
  PySide.QtWebKit.QWebFrame.load(PySide.QtNetwork.QNetworkRequest, PySide.QtNetwork.QNetworkAccessManager.Operation = QNetworkAccessManager.GetOperation, PySide.QtCore.QByteArray = QByteArray())
  PySide.QtWebKit.QWebFrame.load(PySide.QtCore.QUrl)

It doesn't indicate any code line, and repeats this message continuously.

What can I do?

Thank you!


Solution

  • Although PyQt4 and PySide are Qt4 wrappers, they are not compatible with each other, and that is the reason for the error. The solution is to use PyQt4 or use PySide, not both. In this case the code for PySide is:

    import argparse
    import functools
    from lxml import html
    import pickle
    import sys
    import time
    
    
    # from PyQt4 import QtCore, QtGui, QtWebKit
    from PySide import QtCore, QtGui, QtWebKit
    
    
    def parseArguments():
        parser = argparse.ArgumentParser()
        # parser.add_argument("-typ", dest="typ", help="home or subsequent", default='home')
        parser.add_argument("-i", type=int, dest="i", help="i")
        parser.add_argument("-num", type=int, dest="num", help="num")
        args = parser.parse_args()
        return args
    
    
    params = parseArguments()
    # typ = params.typ
    
    
    # Take this class for granted.Just use result of rendering.
    class Render(QtWebKit.QWebPage):
        def __init__(self, url):
            self.app = QtGui.QApplication(sys.argv)
            QtWebKit.QWebPage.__init__(self)
            self.loadFinished.connect(self._loadFinished)
            qurl = QtCore.QUrl(url)
            func = functools.partial(self.mainFrame().load, qurl)
            timer = QtCore.QTimer()
            timer.timeout.connect(func)
            timer.start(10000)
            self.app.exec_()
    
        def _loadFinished(self, result):
            self.frame = self.mainFrame()
            self.app.quit()
    
    
    def save_all():
        global cur_url
        global html_doc
        all_links = pickle.load(open("./saved_files/saved_links.p", "r"))
        # extra_links = pickle.load( open("extra_pages.p", "r") )
        print("len(all_links) = ", len(all_links))
        num = sys.argv[1]
    
        i = params.i
        print("i = ", type(i))
        num = params.num
        url = all_links[i]
        if num != 0:
            url += "&pg=" + str(num)
        print("i, url = ", i, url)
        # This step is important.Converting QString to Ascii for lxml to process
        # archive_links = html.fromstring(str(result.toAscii()))
    
        cur_url = url
        error_count = 0
        try:
            r = Render(cur_url)
            result = r.frame.toHtml()
            html_doc = result.toAscii()
    
            if num == 0:
                fw = open("./saved_files/saved" + str(i) + ".html", "w")
            else:
                fw = open("./saved_files/saved" + str(i) + "_" + str(num) + ".html", "w")
            fw.write(html_doc)
            fw.close()
            print("---- SLEEPING ---- ")
            time.sleep(10)
        except:
            print("ERROR!!")
            error_count += 1
            print("error_count = ", error_count)
        ##if i>4:
        ##  break
    
    
    if __name__ == "__main__":
        save_all()