I want to parse the football odds from a website that uses JavaScript, so it doesn't download all the data in once and I have to use slow scrolling to load the rest of the page and then parse it. I'm using a function I found on this website to scroll down the page, but the function creates an infinite loop and I don't know how to stop it and go on with my code. I'd like that the scrolling stops when the page reached the part of web page I'm interested in and then goes on parsing the data.
I tried already making if statement ending with break but it didn't work.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
import time
import pandas as pd
class wait_for_more_than_n_elements(object):
def __init__(self, locator, count):
self.locator = locator
self.count = count
def __call__(self, driver):
try:
count = len(ec._find_elements(driver, self.locator))
return count >= self.count
except StaleElementReferenceException:
return False
#Apri la pagina
driver = webdriver.Firefox(executable_path='/Applications/Python 3.7/geckodriver')
driver.get('https://www.eurobet.it/it/scommesse/?splash=false#!/calcio/it-serie-a/')
time.sleep(5)
# Doppia chance
dc_button = driver.find_element_by_link_text('doppia chance')
dc_button.click()
time.sleep(5)
# Page source for changing page
source_dc = driver.page_source
soup_dc = BeautifulSoup(source_dc, 'lxml')
# Scrolling down the page
wait = WebDriverWait(driver, 60)
wait.until(ec.invisibility_of_element_located((By.CSS_SELECTOR, "div.box-row-event:nth-child(7)")))
while True:
results = driver.find_elements_by_class_name("box-row-event")
print("Results count: %d" % len(results))
# scroll to the last element
driver.execute_script("arguments[0].scrollIntoView();", results[-1])
# wait for more results to load
wait.until(wait_for_more_than_n_elements((By.CLASS_NAME, 'box-row-event'), len(results)))
I expect that the loop ends when it reaches the last element in the variable results, but unfortunately it goes on with the loop and printing always the same length of the variable results.
After many try outs I finally found out a solution that works for me:
last_count = len(results)
while True:
results = driver.find_elements_by_class_name("box-row-event")
print("Results count: %d" % len(results))
# scroll to the last element
driver.execute_script("arguments[0].scrollIntoView();", results[-1])
time.sleep(1)
# wait for more results to load
wait.until(wait_for_more_than_n_elements((By.CLASS_NAME, 'box-row-event'), len(results)))
wait.until(ec.visibility_of_any_elements_located((By.CLASS_NAME, 'box-row-event')))
time.sleep(1)
#new count
new_count = len(driver.find_elements_by_class_name("box-row-event"))
if new_count == last_count:
break
last_count = new_count
When the page goes to the last result and finally downloaded all the results the loop is going to break.