Search code examples
pythonseleniumparsingdynamic-tables

Getting Dynamic Table Data With Selenium Python


So I am trying to parse this data from a dynamic table with selenium, it keeps getting the old data from page 1, I am trying to get gather pages 2's data, I've tried to search for other answers, but haven't found any, some say I need to add a wait period, and I did, however that didn't work.

 from selenium import webdriver

from bs4 import BeautifulSoup

from selenium.webdriver.support import expected_conditions as EC


browser = webdriver.Firefox()
browser.get('https://www.nyse.com/listings_directory/stock')

symbol_list=[]

table_data=browser.find_elements_by_xpath("//td");

def append_to_list(data):

    for element in data:

      symbol_list.append(element.text)


append_to_list(table_data)

pages=browser.find_elements_by_xpath('//a[@href="#"]')


for page in pages:

    if(page.get_attribute("rel")== "next"):

        if(page.text=="NEXT ›"):

            page.click()

            browser.implicitly_wait(100)

            for elem in browser.find_elements_by_xpath("//td"): //still fetchs the data from page 1

                print(elem.text)

            #print(symbol_list)

Solution

  • I modified your script as below.

    You should retrieve element in for loop or it will cause stale element reference exception.

    And using WebDriverWait to wait for elements to be visible before find element.

    from selenium import webdriver
    from bs4 import BeautifulSoup
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.common.by import By
    from time import sleep
    
    browser = webdriver.Chrome()
    browser.get('https://www.nyse.com/listings_directory/stock')
    
    symbol_list = []
    
    
    while True:
        try:
            table_data = WebDriverWait(browser, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//table//td")))
            for i in range(1, len(table_data)+1):
                td_text = browser.find_element_by_xpath("(//table//td)["+str(i)+"]").text
                print(td_text)
                symbol_list.append(td_text)
            next_page = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, '//a[@href="#" and contains(text(),"Next")]')))
            next_clickable = next_page.find_element_by_xpath("..").get_attribute("class")  # li
            if next_clickable == 'disabled':
                break
            print("Go to next page ...")
            next_page.click()
            sleep(3)
        except Exception as e:
            print(e)
            break