I'm new to Python and still trying to learn, but I couldn't handle this problem. I want to run some functions (which are in class) in an infinite loop. Because it is a QApplication, I've learned that I should do it with a QTimer. However, when exploring how to do it, I couldn't find a working alternative. A common solution is:
timer = QTimer()
timer.timeout.connect(function)
timer.start(60000)
But when I insert these into my code it makes no difference. I have tried to insert it under functions, class, etc, but couldn't get a result. My functions to loop are here:
__author__ = 'pc'
import requests
from bs4 import BeautifulSoup
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import sqlite3
import sys, getopt, time
from PyQt5.QtCore import QTimer
records = []
def scrape_page(url, html):
soup = BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
try:
datas = data.text.splitlines()
datas1 = list(filter(None, datas))
records.append(datas1)
except:
pass
def process_records():
# add record to database ...
print('process records:', len(records))
def generate_urls():
onexurl = "https://1xbahis19.com/en/live/Football/"
reply = requests.get(onexurl)
soup = BeautifulSoup(reply.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis19.com/en/"+(matchlink.get("href")))
return urls
class WebPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
scrape_page(self.url().toString(), html)
if not self.fetchNext():
process_records()
print(records)
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(generate_urls())
timer = QTimer()
timer.timeout.connect(WebPage)
timer.start(60000)
app.exec_()
Can anyone help with this please?
I assume you want to run the scraper at regular intervals. The script below will scrape all the urls once every 60 seconds. The signal
part is there to provide a way to terminate the infinite loop - just do Ctrl+C (i.e. KeyboardInterrupt
), and it will stop immediately.
import requests
from bs4 import BeautifulSoup
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import sqlite3
import sys, getopt, time
from PyQt5.QtCore import QTimer
import signal
# press Ctrl+C to stop the script
signal.signal(signal.SIGINT, signal.SIG_DFL)
records = []
def scrape_page(url, html):
print('scraping page:', url)
soup = BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
try:
datas = data.text.splitlines()
datas1 = list(filter(None, datas))
records.append(datas1)
except:
pass
def process_records():
# add record to database ...
print('processed records:', len(records))
# clear the current records
del records[:]
# re-run after a timeout
QTimer.singleShot(60000, run)
def run():
print('running scraper...')
webpage.start(generate_urls())
def generate_urls():
print('generating urls...')
onexurl = "https://1xbahis19.com/en/live/Football/"
reply = requests.get(onexurl)
soup = BeautifulSoup(reply.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis19.com/en/"+(matchlink.get("href")))
return urls
class WebPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
scrape_page(self.url().toString(), html)
if not self.fetchNext():
process_records()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
run()
app.exec_()