Search code examples
pythonuser-interfaceweb-scrapingfreeze

Webscraping and GUI in python. App freezing


Hi I am getting trouble running my app. I am new to coding so any advice on my code could would be appreciated. I am trying to make a GUI for my webscraper. Without the GUI it works perfectly fine and scrapes all the info into a .csv file. Then when i created the GUI with buttons to make it run, it freezes. I tried changing the sleep to root.after o even deleting that row because I read in other threads that it is the main reason for the GUI to freeze, but still couldn't get it fixed.

Also, on the functions I defined, im not sure if it is ok to put any random word like "prices" because I am not using it or what would be the correct way.

Here is my code, thanks in advance.

import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint
import csv
import os
from tkinter import *
from tkinter import ttk

root = Tk()
root.title("Mercados App")
root.geometry("500x500")
app = Frame(root)


def updating(prices):

switch = True
if switch:
    f = open("ListaDePreciosTodos.csv", "wt", newline="")
    writer = csv.writer(f)

    file_lista_productosPV = open("ListaPV", "r")
    lista_productosPV = file_lista_productosPV.readlines()

    file_lista_productosT = open("ListaT", "r")
    lista_productosT = file_lista_productosT.readlines()

    file_lista_productosW = open("ListaW", "r")
    lista_productosW = file_lista_productosW.readlines()
    writer.writerow(["PV Producto", "PV Precio", "T Producto", "T Precio",
                     "W Producto", "W Precio"])

    os.system('say "Price updates starting now"')

    for searchPV, searchT, searchW in zip(lista_productosPV, lista_productosT, lista_productosW):
        urlPV = "https://www.pv.com/" + searchPV.strip() + "/p"
        rPV = requests.get(urlPV)
        soupPV = BeautifulSoup(rPV.content, "lxml")
        urlT = "http://www.T.com.pe/t/product/" + searchT.strip() + "?navAction=jump&navCount=2"
        rT = requests.get(urlT)
        soupT = BeautifulSoup(rT.content, "lxml")
        urlW = "https://www.w.com/" + searchW.strip() + "/p"
        rW = requests.get(urlW)
        soupW = BeautifulSoup(rW.content, "lxml")

        try:
            productoPV = soupPV.find(["div"], {"class": ["g-nombre-prod"]}).text
            precioPV = soupPV.find(["strong"], {"class": ["skuBestPrice"]}).text

            productoT = soupT.find(["div"], {"class": ["title"]}).h5.text
            precioTxKG = soupT.find(["div"], {"class": ["price-unit"]}).text
            precioTxUN = soupT.find(["span"], {"class": ["active-price"]}).span.text

            if precioTxKG.strip() is "":
                precio_final_t = precioTxUN.replace("S/ ", "").strip()

            else:
                precio_final_t = precioTxKG.replace("/KG)","").replace("(","").replace("S/ ","").strip()

            productoW = soupW.find(["div"], {"class": ["name"]}).text
            precioW = soupW.find(["strong"], {"class": ["skuBestPrice"]}).text


            writer.writerow([productoPV, precioPV.replace("S/", ""),
                             productoT.replace("VERDURAS" or "FRUTAS" or "T", "").strip(),
                             precio_final_t, productoW, precioW.replace("S/. ", "")])

        except AttributeError:

            try:
                writer.writerow([productoPV, "No Disp.",
                                productoT.replace("VERDURAS" or "FRUTAS" or "T", "").strip(),
                                precio_final_t, productoW, precioW.replace("S/. ", ""), ])

            except AttributeError:

                try:
                    writer.writerow([productoPV, precioPV.replace("S/", ""),
                                    productoT.replace("VERDURAS" or "FRUTAS" or "T", "").strip(),
                                    "No Disp.", productoW, precioW.replace("S/. ", ""), ])

                except:
                    writer.writerow([productoPV, precioPV.replace("S/", ""),
                                     productoT.replace("VERDURAS" or "FRUTAS" or "T", "").strip(),
                                     precio_final_t, productoW, "No Disp."])

    f.close()
    os.system('say "your file is ready"')


def stopupdating(prices):
    global switch
    switch = False


app.grid()

Label(root, text="Bienvenidos").grid(row=0, column=1, sticky=W, padx=8)

startButton = Button(root, text="Start updating prices")
startButton.bind("<Button-1>", updating)
startButton.grid(row=1, column=1, sticky=W, padx=10)

stopButton = Button(root, text="Stop updating prices")
stopButton.bind("<Button-1>", stopupdating)
stopButton.grid(row=2, column=1, sticky=W, padx=10)

root.mainloop()

Solution

  • In your code, create a new method:

    def start_update():
        threading.Thread(target=updating).start()
    

    You will need to add import threading also.

    And bind your button to this method instead of directly to updating. I see your updating method has an argument of prices, but I don't see how that is assigned in your code or where it is even used.