Search code examples
pythonhtmlselenium-webdriverxpathhtml-table

Pulling td's/ html elements with the right code - Selenium & Python


I have been trying to pull the ticker (short code for the stock), stock name, price, sector and market cap columns from https://www.tradingview.com/markets/stocks-turkey/market-movers-all-stocks/ Struggling to pull the correct html elements using the right code. I have tried using Selector Gadget to identify the Xpaths, however i am not very confident about HTML trees and rules. I noticed that the first 3 columns are considered as a single td within the webpage.Pasting the code below, which is pulling the entire rows at the moment. Thanks..

from selenium import webdriver
from selenium.webdriver.common.by import By
import re
import pandas as pd

from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
from selenium.common.exceptions import NoSuchElementException

driver = webdriver.Chrome()
website = 'https://www.tradingview.com/markets/stocks-turkey/market-movers-all-stocks/'
driver.get(website) #to open the website

while True:
    try:
        loadMoreButton = driver.find_element(By.XPATH,'//*[contains(concat( " ", @class, " " ), concat( " ", "content-D4RPB3ZC", " " ))]')
        time.sleep(2)
        loadMoreButton.click()
        time.sleep(5)
    except Exception as e:
        print (e)
        break
print ("Complete")
time.sleep(10)

matches = driver.find_elements(By.TAG_NAME,'tr')

ticker_symbol = []
ticker_name = []
ticker_price =[]
ticker_sector =[]
ticker_marketcap =[]

for match in matches:
    print(match.text)

driver.quit()


Solution

  • I fixed up a few issues

    1. Replaced .sleep()s with proper WebDriverWaits
    2. Updated locators

    The working code is below.

    from selenium import webdriver
    from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    
    url = 'https://www.tradingview.com/markets/stocks-turkey/market-movers-all-stocks/'
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)
    
    while True:
        try:
            driver.find_element(By.XPATH,'//span[text()="Load More"]').click()
        except StaleElementReferenceException:
            break
    
    wait = WebDriverWait(driver, 10)
    rows = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,'table[class="table-Ngq2xrcG"] tr.listRow')))
    for row in rows:
        ticker_symbol = row.find_element(By.XPATH, './td[1]//a').text
        ticker_name = row.find_element(By.XPATH, './td[1]//sup').text
        ticker_price = row.find_element(By.XPATH, './td[2]').text
        ticker_marketcap = row.find_element(By.XPATH, './td[6]').text
        try:
            ticker_sector = row.find_element(By.XPATH, './td[11]/a').text
        except NoSuchElementException:
            ticker_sector = "—"
    
        print(ticker_symbol, ticker_name, ticker_price, ticker_marketcap, ticker_sector)
    
    driver.quit()
    

    and the output is

    A1CAP A1 CAPITAL YATIRIM 24.76 TRY 3.38B TRY Finance
    ACSEL ACIPAYAM SELULOZ 99.7 TRY 1.104B TRY Process Industries
    ADEL ADEL KALEMCILIK 322.50 TRY 7.69B TRY Consumer Durables
    ...