So I am trying to parse this data from a dynamic table with selenium, it keeps getting the old data from page 1, I am trying to get gather pages 2's data, I've tried to search for other answers, but haven't found any, some say I need to add a wait period, and I did, however that didn't work.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Firefox()
browser.get('https://www.nyse.com/listings_directory/stock')
symbol_list=[]
table_data=browser.find_elements_by_xpath("//td");
def append_to_list(data):
for element in data:
symbol_list.append(element.text)
append_to_list(table_data)
pages=browser.find_elements_by_xpath('//a[@href="#"]')
for page in pages:
if(page.get_attribute("rel")== "next"):
if(page.text=="NEXT ›"):
page.click()
browser.implicitly_wait(100)
for elem in browser.find_elements_by_xpath("//td"): //still fetchs the data from page 1
print(elem.text)
#print(symbol_list)
I modified your script as below.
You should retrieve element in for loop or it will cause stale element reference exception.
And using WebDriverWait to wait for elements to be visible before find element.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from time import sleep
browser = webdriver.Chrome()
browser.get('https://www.nyse.com/listings_directory/stock')
symbol_list = []
while True:
try:
table_data = WebDriverWait(browser, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//table//td")))
for i in range(1, len(table_data)+1):
td_text = browser.find_element_by_xpath("(//table//td)["+str(i)+"]").text
print(td_text)
symbol_list.append(td_text)
next_page = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, '//a[@href="#" and contains(text(),"Next")]')))
next_clickable = next_page.find_element_by_xpath("..").get_attribute("class") # li
if next_clickable == 'disabled':
break
print("Go to next page ...")
next_page.click()
sleep(3)
except Exception as e:
print(e)
break