I'm trying to scrape the contents in each link on this website: binance listings
However the code keeps throwing ElementNotInteractableException or StaleElementReferenceException. And when i try to deal with the error, i get thrown into this constant loop of new errors. This is my first time using Selenium, so any help would be deeply appreciated!
Here's the code:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
import re
import datetime
# Initialize the Chrome WebDriver (change the path to your WebDriver)
driver = webdriver.Chrome()
# Navigate to the webpage with your HTML content
driver.get("https://www.binance.com/en/support/announcement/new-cryptocurrency-listing?c=48&navId=48")
# Wait for the cookie consent pop-up to appear and then accept it
wait = WebDriverWait(driver, 20)
cookie_consent = wait.until(EC.presence_of_element_located((By.ID, 'onetrust-banner-sdk')))
cookie_consent.click()
# wait = WebDriverWait(driver, 20)
# Click on the accept button
accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler')))
# accept_button.send_keys("arguments[0].scrollIntoView();")
driver.execute_script("arguments[0].scrollIntoView();", accept_button)
accept_button.click()
# link = None
# Find all div elements within the specified class
div_elements = driver.find_elements(By.CSS_SELECTOR, '.css-148156o .css-1tl1y3y')
timeout = 20
# Extract links and titles from the div elements
for div_element in div_elements:
# Create a WebDriverWait instance with expected conditions
div_elements = driver.find_elements(By.CSS_SELECTOR, '.css-148156o .css-1tl1y3y')
wait = WebDriverWait(driver, timeout)
try:
# Define a function to perform the action and return True
# def click_checkout_link(driver):
# global link
link_element = div_element.find_element(By.CSS_SELECTOR, "a") # Find the link element
# link_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a'))) # Click the link
link = link_element.get_attribute('href') # Get the "href" attribute
print("Link: ", link)
link_element.click()
meta_element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//meta[@name="description"]'))
)
content = meta_element.get_attribute('content')
content = content.split(" ")
# print("OG: ", content)
content = [string for string in content if string.isupper() or string[0].isnumeric()]
content = tuple(content)
print(content)
timestamp = None
date = None
for string in content:
if ":" in string:
timestamp = string
else:
try:
if datetime.datetime.strptime(string, "%Y-%m-%d"):
date = string
except ValueError:
continue
print("Date: ", date)
print("Time: ", timestamp)
# Use the until method to wait for the condition to be met
# link_element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a')))
# link_element.click()
except StaleElementReferenceException as e:
# Handle the StaleElementReferenceException (if needed)
print("Element is stale, handle this case if necessary")
print(e)
continue
# link = div_element.find_element(By.CSS_SELECTOR, 'a')
# title = div_element.find_element(By.CSS_SELECTOR, 'head > title').text
# Click on the link to navigate to the linked page
# link_url = link.get_attribute('href')
# # driver.execute_script("arguments[0].click();", link)
# print(f"Link: {link_url}")
# Wait for the page to load (you can adjust the sleep time)
time.sleep(2)
# try:
# Find and extract data from the linked page
time.sleep(2) # Sleep for a while before going back (you can adjust this)
# Go back to the main page
driver.back()
# Close the WebDriver when done
driver.quit()
I've tried going through the same issues on StackOverflow and using try and except blocks to catch NoSuchElementException by using
accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler')))
and
wait = WebDriverWait(driver, 20)
cookie_consent = wait.until(EC.presence_of_element_located((By.ID, 'onetrust-banner-sdk')))
However when i try using wait.until(EC.presence_of_element_located
for this line link_element = div_element.find_element(By.CSS_SELECTOR, "a") # Find the link element
i get to the wrong website (binance.com) i have no idea why thats happening, it only happens if i use the wait.until() functions. It's supposed go back to the original website after scraping the individual links.
Also i don't get this error ElementNotInteractableException
when i'm running debugging mode. I know StaleElementReferenceException
it's something to do with relocating the same element after refreshing website when going back to the list of links, but I don't know how.
To avoid ElementNotInteractableException
you can use native JS click using JS executor.
StaleElementReferenceException
exception you are getting as far as when you init for in
loop, div_elements array are not re-inited by assigning new value to it, so you get old element reference.
To avoid this you can use for in range
where you re-init array and get array elements by index.
# previous code until getting div elements
div_elements = driver.find_elements(By.CSS_SELECTOR, '.css-148156o .css-1tl1y3y')
timeout = 20
for i in range(len(div_elements)):
div_elements = driver.find_elements(By.CSS_SELECTOR, '.css-148156o .css-1tl1y3y')
wait = WebDriverWait(driver, timeout)
link_element = div_elements[i].find_element(By.CSS_SELECTOR, "a")
link = link_element.get_attribute('href')
print("Link: ", link)
driver.execute_script('arguments[0].click();', link_element)
# your further code