python selenium web-scraping python-requests-html

Can't scrape dynamic table of bscscan. requests_html returning none and Selenium not working

Well, I have been trying this for a few days. I have to say that I am very new to Python and I don't fully understand Javascript (for now), so maybe is something stupid but I can't figure it out.

I wan't to scrape the different tables on BSCScan, because of simplicity and because all of them are almost the same I will show the code of holders. I want to store it in a dict, then append to list of dicts and convert to data frame with pandas, with Address, quantity and percentage: This is the approach I made with html_requests:

from requests_html import HTMLSession

contract = "0x84c0160d55a05a28a034e1e6776f84c5995aba3a"
url = ("https://bscscan.com/token/" + contract + "#balances")

session = HTMLSession()

r = session.get(url)
r.html.render(sleep=3)
print(r.content)
holders = r.html.xpath('//td', first = True)
print(holders)   #This returns None

This is the code I was making with requests_html, and with Selenium this is it:

driver_path = "/Users/XXX/Downloads/chromedriver"
driver = webdriver.Chrome(driver_path)   

 

def get_top_holders(driver, holders):
        list_holders = []
        driver.get(holders)
        tabla = driver.find_element_by_xpath('.//tbody').get_attribute('innerHTML') #I tried innerHTML just to see if it works in that way.
        for td in tabla.find_elements_by_xpath('.//tr'):
            name = td.find_element_by_xpath('.//td[2]/span/a').get_attribute('textContent')
            quantity = td.find_element_by_xpath('.//td[3]').get_attribute('textContent')
            percentage = td.find_element_by_xpath('//td[4]/text')
            dict = {
                'Addres': name,
                'Cantidad': quantity,
                'Porcentaje': percentage
            }
            os.system('clear')
            print(dict)
            driver.close()
            list_holders.append(dict)
        print(list_holders)
        holders_tabla =  pd.DataFrame(list_holders)
        
        return holders_tabla

I have tried with Selenium, letting it render and trying to extract, but I can't iterate from tbody. I have tried with Beautiful Soup but I don't get it completely and someone recommend me requests_html but it is returning none.

First time asking, thanks in advance!

Solution

You can use their API URL to get the data. For example (I'm using beautifulsoup to parse the result, but you could use different parser):

import requests
from bs4 import BeautifulSoup


api_url = "https://bscscan.com/token/generic-tokenholders2"
params = {
    "m": "normal",
    "a": "0x84c0160d55a05a28a034e1e6776f84c5995aba3a",
    "p": "1",
}

soup = BeautifulSoup(
    requests.get(api_url, params=params).content, "html.parser"
)

for row in soup.select("tr:has(td)"):
    tds = [td.get_text(strip=True) for td in row.select("td")]
    print(*tds)

Prints:

1 Burn Address 100,000,000 0.0000% 
2 PancakeSwap V2: COSP 57,731,808.58566958 0.0000% 
3 0xb545108f3ee074de830a21e09c6b9cce87b43edb 1,999,989.75962961 0.0000% 
4 0x5015219bbf9b1bacc912523f002e28d044942e90 1,983,225.07328063 0.0000% 
5 0x8081b65976ff685f9cd14ac7bd19ba2d324ffe68 1,945,354.7303935 0.0000% 
6 0x8806048c354ab50ed075f1ab1977c6dbd87fdc69 1,876,725.00624433 0.0000% 
7 0x7879086b868da3d6ae27a6f0011bec351d8c3dd5 1,800,360.0720144 0.0000% 
8 0xe56b4b0721584a03bbedf9e2f3f0e0f6a6be2202 1,800,360.0720144 0.0000% 
9 0x878f6540f71d3b2729c4854ce779e13dc91c48c3 1,800,360.0720144 0.0000% 
10 0x065ee8c70baebce22f1dafb1214273defaac133c 1,800,360.0720144 0.0000% 
11 0xd8e3c0d4a70cb50b586733f416c3dd4906dd4499 1,800,360.0720144 0.0000% 
12 0x3ee4dcc05e7f6eae071815ae95ed41df4d466cd4 1,800,360.0720144 0.0000% 
13 0xada3871e7c0b4a4fa2b11d7a09ce3d829a8b110a 1,800,360.0720144 0.0000% 
14 0x27504201a13001ce938149e3e8230da4a9ec0a54 1,800,270.0360045 0.0000% 
15 0x978089f31aa3fa2e6a0865dc4e3b795414a5f7c9 1,799,187.20065861 0.0000% 
16 0x4faf96b1278e8d7deeb6560a6a3cab657a6da33b 1,020,175.59821628 0.0000% 
17 0xb5e983f828814f306040564d71e0d94dc4721b91 664,109.04450073 0.0000% 
18 0x63abec77beb48423fb33a955c777e4e4276b95da 635,531.52955672 0.0000% 
19 0xd8b41003a4e62715286c67a4719dab652fbe1e71 592,815.06211795 0.0000% 
20 0xe498cfe840242a8f177dc5d09f27e3bc55e6cb12 475,778.39898666 0.0000% 
21 0x40cd447c147e2cabb67682314cf2d9d4b522fa5f 456,067.83519979 0.0000% 
22 0xec002d3385713f927ae847545c4791ce0bd2bf17 366,310.61997223 0.0000% 
23 0xd0f9259bd689c8e7d6713ab9f796792f828f2e7e 318,070.02799289 0.0000% 
24 0xf00848ae5a85da1e8b434abe5ea54666e89e64c0 273,028.06319406 0.0000% 
25 0x81f7c7b2a4997c9eed08011b8e00dbdbf6ec22f3 239,843.71466396 0.0000% 
26 0x5aa492a3bc1557c9ac39c3b99aeae31067ce771b 214,421.6622502 0.0000% 
27 0x50ed39a7c924ac9a10e383e9ecf22b3eaee4e8a4 211,162.09086189 0.0000% 
28 0x58b15e3514e6823ec80f4849c28254967027d287 180,580.62402781 0.0000% 
29 0x77abe93a18deddcc927cafa2cc95ec78b4095a53 180,023.86819365 0.0000% 
30 0x7f578c7c53eba69cb418215fb4801b4793efa071 179,891.14759961 0.0000% 
31 0xef9fa3d99c0ca765e360efcd7728be875aca1b8d 170,311.18508374 0.0000% 
32 0xfed8f16992a2cb954740cef9727e4f4b7ea15dec 154,927.71290792 0.0000% 
33 0x6e38208987a9f4d6e45d00f466b8c75ce2e0d0df 153,978.7079671 0.0000% 
34 0x7b075ac1bf5f29707341b7d60889fe21ec57904b 141,726.68977964 0.0000% 
35 0x76461f752c7aca9f038d29d86ec3173d840a1528 117,847.26898315 0.0000% 
36 0x961a9541bba7c239a568c27d86df780fc82b0480 116,828.01474016 0.0000% 
37 0xa774fdbd97d9e9da95a510c7ee28b74a19775982 115,377.54671336 0.0000% 
38 0x634cb24ffa5cf115b482d74db4b12d172170076f 108,001.29601555 0.0000% 
39 0x8e1c2ded34a56fa2514ea094fd2f447bb8ddaa5c 103,940.14738693 0.0000% 
40 0x797a05431cafcd9460e794859db9d9a957ca76c1 101,927.56484176 0.0000% 
41 0x74e5c7a7c45e77a947869281198c2806cf1f0959 100,797.9591702 0.0000% 
42 0x84c0160d55a05a28a034e1e6776f84c5995aba3a 96,580.45152327 0.0000% 
43 0x85c12ad9c49e19b8e737e8b9fede2454b99e6423 86,423.77670694 0.0000% 
44 0xfb0517c9000fda24ba3d0350d41104c6965b92ae 83,317.99957459 0.0000% 
45 0x3f3e2f0d8fa19722cd2de622e8dbee8ee2eb3821 78,824.71977489 0.0000% 
46 0x4e9668acf14048a82d91b4ac4c0a1a3f0e836cf6 74,783.27179304 0.0000% 
47 0x5856c515dbb1b07c525bde9a3da5fe22d7d0df2f 73,175.56242346 0.0000% 
48 0x56b2a275bf05536fd16f76fb2fb622553b130672 72,724.86583564 0.0000% 
49 0xca51b6bd912e8defc7273878ebd25db50973aabe 71,040.67745694 0.0000% 
50 0xc6134f75dc989ac58c37ebe6437c70d1832e517a 69,790.58597696 0.0000%