Search code examples
python-3.xseleniumbeautifulsoupsmooth-scrolling

How can I stop an infinite loop While True in Python?


I want to parse the football odds from a website that uses JavaScript, so it doesn't download all the data in once and I have to use slow scrolling to load the rest of the page and then parse it. I'm using a function I found on this website to scroll down the page, but the function creates an infinite loop and I don't know how to stop it and go on with my code. I'd like that the scrolling stops when the page reached the part of web page I'm interested in and then goes on parsing the data.

I tried already making if statement ending with break but it didn't work.

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
import time
import pandas as pd

class wait_for_more_than_n_elements(object):
    def __init__(self, locator, count):
        self.locator = locator
        self.count = count

    def __call__(self, driver):
        try:
            count = len(ec._find_elements(driver, self.locator))
            return count >= self.count
        except StaleElementReferenceException:
            return False

#Apri la pagina
driver = webdriver.Firefox(executable_path='/Applications/Python 3.7/geckodriver')
driver.get('https://www.eurobet.it/it/scommesse/?splash=false#!/calcio/it-serie-a/')
time.sleep(5)

# Doppia chance
dc_button = driver.find_element_by_link_text('doppia chance')
dc_button.click()
time.sleep(5)

# Page source for changing page
source_dc = driver.page_source
soup_dc = BeautifulSoup(source_dc, 'lxml')

# Scrolling down the page
wait = WebDriverWait(driver, 60)
wait.until(ec.invisibility_of_element_located((By.CSS_SELECTOR, "div.box-row-event:nth-child(7)")))

while True:
    results = driver.find_elements_by_class_name("box-row-event")
    print("Results count: %d" % len(results))

    # scroll to the last element
    driver.execute_script("arguments[0].scrollIntoView();", results[-1])

    # wait for more results to load
    wait.until(wait_for_more_than_n_elements((By.CLASS_NAME, 'box-row-event'), len(results)))

I expect that the loop ends when it reaches the last element in the variable results, but unfortunately it goes on with the loop and printing always the same length of the variable results.


Solution

  • After many try outs I finally found out a solution that works for me:

    last_count = len(results)
    while True:
        results = driver.find_elements_by_class_name("box-row-event")
        print("Results count: %d" % len(results))
    
        # scroll to the last element
        driver.execute_script("arguments[0].scrollIntoView();", results[-1])
        time.sleep(1)
    
        # wait for more results to load
        wait.until(wait_for_more_than_n_elements((By.CLASS_NAME, 'box-row-event'), len(results)))
        wait.until(ec.visibility_of_any_elements_located((By.CLASS_NAME, 'box-row-event')))
        time.sleep(1)
    
        #new count
        new_count = len(driver.find_elements_by_class_name("box-row-event"))
    
        if new_count == last_count:
            break
    
        last_count = new_count
    

    When the page goes to the last result and finally downloaded all the results the loop is going to break.