Search code examples
pythonseleniumselenium-webdriverweb-scrapingiframe

Can not load <tbody>, python table


I am trying to scrape a table on a website. The problem is that there is no <tbody> in the html code. I tired with both requests and selenium, but always the same result. Does any body have any idea?

This is the code(with requests) website: https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances

from urllib.request import Request, urlopen
import bs4



link="https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances"


req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()

soup = bs4.BeautifulSoup(webpage,"html.parser" )
print(soup)

This is with selenium:

import time
import bs4
from selenium import webdriver
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver.get("https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances")
time.sleep(7)

html=driver.page_source
soup=bs4.BeautifulSoup(html,"lxml" )
print(soup)


WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".table > tbody:nth-child(2)")))
print(driver.page_source)

Solution

  • The table you are trying to access is inside iframe. You will need tos switch to that iframe in order to access that element:

    import time
    import bs4
    from selenium import webdriver
    from webdriver_manager.microsoft import EdgeChromiumDriverManager
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.common.by import By
    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    
    driver.get("https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances")
    time.sleep(7)
    
    html=driver.page_source
    soup=bs4.BeautifulSoup(html,"lxml" )
    print(soup)
    
    WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#tokeholdersiframe")))
    
    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".table > tbody:nth-child(2)")))
    print(driver.page_source)
    

    When finished, you will have to switch back to the default content with

    driver.switch_to.default_content()