Search code examples
pythonseleniumweb-scrapingfindelement

Selenium not getting text depending on the value of the id?


I'm currently working in this website: https://lawebdelaprimitiva.com/Primitiva/Historico%20de%20sorteos/1986.html Tring to obtain the values of the balls, so far i can only get the 7 first values using the for loop that i attach down below, but seems like reached that eigth member to be extracted, is like they dont have any text in their structure.

web='https://lawebdelaprimitiva.com/Primitiva/Historico%20de%20sorteos.html'
driver=webdriver.Chrome(path)
driver.get(web)
lispes=driver.find_element("id","anio")
opciones = lispes.find_elements(By.TAG_NAME, "option")
opciones[-2].click()
for bola in range(20):
    print("--------")
    name='id_sorteo' + str(bola)
    print(name)
    linea=driver.find_element(By.ID, name).text
    print(linea)
driver.close()

I also tried to get the value by not using the ID but the title, getting same results.On the 7 firsts members, it does work, but on the 8th and so on, it doesn't:

linea2=driver.find_element(By.XPATH, '//*[@title="Jueves 30 de Octubre de 1986"]').text
print(linea2)

Any idea? Thank you


Solution

  • Here is one possible solution:

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.select import Select
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    
    
    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
    
    service = Service(executable_path='path\to\your\chromedriver.exe')
    driver = webdriver.Chrome(service=service, options=options)
    wait = WebDriverWait(driver, 10)
    
    url = 'https://lawebdelaprimitiva.com/Primitiva/Historico%20de%20sorteos/1985.html'
    driver.get(url)
    # accept cookies
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[aria-label="Consentir"]'))).click()
    # get all dates
    dates = driver.find_elements(By.CSS_SELECTOR, 'select[name="anio"] option')
    
    for idx in reversed(range(len(dates))):
        # select date
        Select(driver.find_element(By.ID, 'anio')).select_by_index(idx)
        print(f"{driver.current_url.split('/')[-1].split('.')[0]} year")
        # get all raffles in the current date
        raffles = driver.find_elements(By.CSS_SELECTOR, 'div[class="title"] ul')
        for raffle in raffles:
            # scroll to current raffle
            driver.execute_script("arguments[0].scrollIntoView(true);", raffle)
            print(raffle.text)
    
    driver.quit()
    

    Output:

    1985 year
    J-19-12 5 28 30 34 39 44 15 0  4/2 10131  1/5  1  
    J-12-12 2 5 7 43 48 49 11 0  2/4 30003  3/3  0  
    J-5-12 4 14 23 27 30 40 12 0  4/2 11211  3/3  0  
    J-28-11 9 17 20 34 41 47 11 0  2/4 11112  3/3  0  
    J-21-11 6 15 35 44 46 48 38 0  4/2 11013  2/4  0  
    J-14-11 19 22 34 36 37 43 38 0  3/3 01131  2/4  3  
    J-7-11 2 9 14 19 34 43 49 0  3/3 22011  4/2  1  
    J-31-10 10 14 29 33 46 48 30 0  4/2 02112  2/4  1  
    J-24-10 5 16 18 29 43 44 2 0  3/3 12102  3/3  0  
    J-17-10 3 11 13 15 34 35 27 0  1/5 13020  4/2  
    1986 year
    J-18-12 10 16 17 22 39 48 15 0  4/2 03111  4/2  1  
    J-11-12 5 25 39 41 43 45 18 0  0/6 10113  2/4  0  
    J-4-12 2 6 9 10 24 48 11 0  5/1 31101  5/1  0  
    J-27-11 4 20 25 37 41 46 31 0  3/3 10212  3/3  1  
    J-20-11 3 4 10 19 23 30 26 0  3/3 22110  5/1  1  
    J-13-11 1 6 26 30 41 43 18 0  3/3 20112  2/4  0  
    J-6-11 17 22 28 40 42 46 27 0  5/1 01203  2/4  1  
    J-30-10 1 10 26 34 42 48 11 0  5/1 11112  2/4  1  
    J-23-10 2 12 21 30 35 48 42 0  4/2 11121  3/3  1   
    ...