Search code examples
pythonseleniumselenium-webdriverbeautifulsoupwebdriverwait

How to extract data from a dynamic table with selenium python?


I'm trying to extract data from a website. I need to enter the value in the search box and then find the details. it will generate a table. After generating the table, need to write the details to the text file or insert them into a database. I'm trying the following things.

Website: https://commtech.byu.edu/noauth/classSchedule/index.php Search text: "C S 142"

Sample Code

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

from selenium.webdriver.chrome.service import Service

from selenium.webdriver.chrome.options import Options
c_options = Options()
c_options.add_experimental_option("detach", True)

s = Service('C:/Users/sidat/OneDrive/Desktop/python/WebDriver/chromedriver.exe')



URL = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver = webdriver.Chrome(service=s, options=c_options)
driver.get(URL)
element = driver.find_element("id", "searchBar")
element.send_keys("C S 142", Keys.RETURN)
search_button = driver.find_element("id", "searchBtn")
search_button.click()

table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[@id='sectionTable']")))

rows = table.find_elements("xpath", "//tr")

for row in rows:
    cells = row.find_elements(By.TAG_NAME, "td")
    for cell in cells:
        print(cell.text)

I'm using PyCharm 2022.3 to code and test the result. There is nothing printing with my code. Please help me to solve this problem with to extract data to a text file and to an SQL database table.


Solution

  • The following code prints the content of the table you asked for.
    You need to wait for elements to be clickable in case you going to click them or send them a text or to wait for visibility in case you want to read their text content.

    from selenium import webdriver
    from selenium.webdriver import Keys
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    
    options = Options()
    options.add_argument("start-maximized")
    
    webdriver_service = Service('C:\webdrivers\chromedriver.exe')
    driver = webdriver.Chrome(options=options, service=webdriver_service)
    wait = WebDriverWait(driver, 30)
    
    url = "http://saasta.byu.edu/noauth/classSchedule/index.php"
    driver.get(url)
    
    wait.until(EC.element_to_be_clickable((By.ID, "searchBar"))).send_keys("C S 142", Keys.RETURN)
    wait.until(EC.element_to_be_clickable((By.ID, "searchBtn"))).click()
    
    table = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//*[@id='sectionTable']")))
    headers = table.find_elements("xpath", ".//thead//th")
    cells = table.find_elements("xpath", ".//tbody//td")
    
    headers_text = ""
    for header in headers:
        cell_text = header.text
        headers_text = headers_text + cell_text.ljust(10)
    
    cells_text = ""
    for cell in cells:
        c_text = cell.text
        cells_text = cells_text + c_text.ljust(10)
    
    print(headers_text)
    print(cells_text)
    

    The output is:

    Section   Type      Mode      InstructorCredits   Term      Days      Start     End       Location  Available Waitlist  
    002       DAY       Classroom           3.00                                              TBA       0/0       0