Search code examples
web-scrapingbeautifulsouppython-requestsstock

How can I webscrape these ticker symbols from barchart.com?


I am trying to use Beautiful Soup to webscrape the list of ticker symbols from this page: https://www.barchart.com/options/most-active/stocks

My code returns a lot of HTML from the page, but I can't find any of the ticker symbols with CTRL+F. Would be much appreciated if someone could let me know how I can access these!

Code:

from bs4 import BeautifulSoup as bs
import requests
headers = {'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"}

url = "https://www.barchart.com/options/most-active/stocks"
page = requests.get(url, headers=headers)
html = page.text
soup = bs(html, 'html.parser')
print(soup.find_all())

Solution

  • import requests
    from urllib.parse import unquote
    import pandas as pd
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
    }
    
    
    def main(url):
        with requests.Session() as req:
            req.headers.update(headers)
            r = req.get(url[:25])
            req.headers.update(
                {'X-XSRF-TOKEN': unquote(r.cookies.get_dict()['XSRF-TOKEN'])})
            params = {
                "list": "options.mostActive.us",
                "fields": "symbol,symbolType,symbolName,hasOptions,lastPrice,priceChange,percentChange,optionsImpliedVolatilityRank1y,optionsTotalVolume,optionsPutVolumePercent,optionsCallVolumePercent,optionsPutCallVolumeRatio,tradeTime,symbolCode",
                "orderBy": "optionsTotalVolume",
                "orderDir": "desc",
                "between(lastPrice,.10,)": "",
                "between(tradeTime,2021-08-03,2021-08-04)": "",
                "meta": "field.shortName,field.type,field.description",
                "hasOptions": "true",
                "page": "1",
                "limit": "500",
                "raw": "1"
            }
            r = req.get(url, params=params).json()
            df = pd.DataFrame(r['data']).iloc[:, :-1]
            print(df)
    
    
    main('https://www.barchart.com/proxies/core-api/v1/quotes/get?')
    

    Output:

        symbol  symbolType  ... tradeTime symbolCode
    0      AMD           1  ...  08/03/21        STK
    1     AAPL           1  ...  08/03/21        STK
    2     TSLA           1  ...  08/03/21        STK
    3      AMC           1  ...  08/03/21        STK
    4      PFE           1  ...  08/03/21        STK
    ..     ...         ...  ...       ...        ...
    495    BTU           1  ...  08/03/21        STK
    496   EVER           1  ...  08/03/21        STK
    497   VRTX           1  ...  08/03/21        STK
    498   MCHP           1  ...  08/03/21        STK
    499    PAA           1  ...  08/03/21        STK
    
    [500 rows x 14 columns]