Search code examples
pythonloopspyqt5qtimer

Use QTimer to run functions in an infinte loop


I'm new to Python and still trying to learn, but I couldn't handle this problem. I want to run some functions (which are in class) in an infinite loop. Because it is a QApplication, I've learned that I should do it with a QTimer. However, when exploring how to do it, I couldn't find a working alternative. A common solution is:

timer = QTimer()
timer.timeout.connect(function)
timer.start(60000)

But when I insert these into my code it makes no difference. I have tried to insert it under functions, class, etc, but couldn't get a result. My functions to loop are here:

__author__ = 'pc'
import requests
from bs4 import BeautifulSoup
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import sqlite3
import sys, getopt, time
from PyQt5.QtCore import QTimer

records = []
def scrape_page(url, html):
    soup = BeautifulSoup(html, 'html.parser')
    data = soup.find('div', class_='tablo_dual_board')
    try:
        datas = data.text.splitlines()
        datas1 = list(filter(None, datas))
        records.append(datas1)
    except:
        pass

def process_records():
    # add record to database ...
    print('process records:', len(records))

def generate_urls():
    onexurl = "https://1xbahis19.com/en/live/Football/"
    reply = requests.get(onexurl)
    soup = BeautifulSoup(reply.content, "html.parser")
    income = soup.find_all("ul", {"id":"games_content"})
    links = soup.find_all("a", {"class": "c-events__name"})
    urls = []
    for matchlink in links:
        urls.append("https://1xbahis19.com/en/"+(matchlink.get("href")))
    return urls

class WebPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super(WebPage, self).__init__()
        self.loadFinished.connect(self.handleLoadFinished)

    def start(self, urls):
        self._urls = iter(urls)
        self.fetchNext()

    def fetchNext(self):
        try:
            url = next(self._urls)
        except StopIteration:
            return False
        else:
            self.load(QtCore.QUrl(url))
        return True

    def processCurrentPage(self, html):
        scrape_page(self.url().toString(), html)
        if not self.fetchNext():
            process_records()
            print(records)
            QtWidgets.qApp.quit()

    def handleLoadFinished(self):
        self.toHtml(self.processCurrentPage)

app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(generate_urls())
timer = QTimer()
timer.timeout.connect(WebPage)
timer.start(60000)
app.exec_()

Can anyone help with this please?


Solution

  • I assume you want to run the scraper at regular intervals. The script below will scrape all the urls once every 60 seconds. The signal part is there to provide a way to terminate the infinite loop - just do Ctrl+C (i.e. KeyboardInterrupt), and it will stop immediately.

    import requests
    from bs4 import BeautifulSoup
    from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
    import sqlite3
    import sys, getopt, time
    from PyQt5.QtCore import QTimer
    
    import signal
    # press Ctrl+C to stop the script
    signal.signal(signal.SIGINT, signal.SIG_DFL)
    
    records = []
    def scrape_page(url, html):
        print('scraping page:', url)
        soup = BeautifulSoup(html, 'html.parser')
        data = soup.find('div', class_='tablo_dual_board')
        try:
            datas = data.text.splitlines()
            datas1 = list(filter(None, datas))
            records.append(datas1)
        except:
            pass
    
    def process_records():
        # add record to database ...
        print('processed records:', len(records))
        # clear the current records
        del records[:]
        # re-run after a timeout
        QTimer.singleShot(60000, run)
    
    def run():
        print('running scraper...')
        webpage.start(generate_urls())
    
    def generate_urls():
        print('generating urls...')
        onexurl = "https://1xbahis19.com/en/live/Football/"
        reply = requests.get(onexurl)
        soup = BeautifulSoup(reply.content, "html.parser")
        income = soup.find_all("ul", {"id":"games_content"})
        links = soup.find_all("a", {"class": "c-events__name"})
        urls = []
        for matchlink in links:
            urls.append("https://1xbahis19.com/en/"+(matchlink.get("href")))
        return urls
    
    class WebPage(QtWebEngineWidgets.QWebEnginePage):
        def __init__(self):
            super(WebPage, self).__init__()
            self.loadFinished.connect(self.handleLoadFinished)
    
        def start(self, urls):
            self._urls = iter(urls)
            self.fetchNext()
    
        def fetchNext(self):
            try:
                url = next(self._urls)
            except StopIteration:
                return False
            else:
                self.load(QtCore.QUrl(url))
            return True
    
        def processCurrentPage(self, html):
            scrape_page(self.url().toString(), html)
            if not self.fetchNext():
                process_records()
    
        def handleLoadFinished(self):
            self.toHtml(self.processCurrentPage)
    
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    run()
    app.exec_()