Search code examples
pythonbeautifulsoupscreen-scraping

Scraping a website with BeautifulSoup


I'm trying to scrape a website with BeautifulSoup. More specifically I'm trying to get the string from a following tag:

<td class="Fz(s) Fw(500) Ta(end)" data-reactid=".17c0h26fqwq.1.$0.0.0.3.1.$main-0-Quote-Proxy.$main 0-Quote.2.0.0.0.1.0.0:$VALUATION_MEASURES.0.1.0.$MARKET_CAP_INTRADAY.1">4.39B</td>

However, when I try to look for the attrs of all td tags, BeautifulSoup can't find the one I want. This is the code:

from urllib.request import urlopen
source_code = urlopen('http://finance.yahoo.com/quote/IONS/key-statistics?p=IONS').read()
from bs4 import BeautifulSoup
yahoo_finance = BeautifulSoup(source_code, 'html.parser')
tds = yahoo_finance.find_all('td')
for td in tds:
    print(td.attrs)

This is the output:

{'class': ['W(100%)', 'Va(t)', 'Px(0)'], 'data-reactid': '.odbtogw33w.0.0.$uh.2.0.1.0.1.0.0.0'}
{'class': ['Va(t)', 'Tren(os)', 'W(10%)', 'Whs(nw)', 'Px(0)', 'Bdcl(s)'], 'data-reactid': '.odbtogw33w.0.0.$uh.2.0.1.0.1.0.0.1'}

So, it doesn't find 'class':['Fz(s)', 'Fw(500)', 'Ta(end)']

Does anyone have an idea why?

Goran


Solution

  • You can get the data just using requests, the content is generated from an ajax get to https://query1.finance.yahoo.com/v10/finance/quoteSummary/IONS:

    from pprint import pprint as pp
    import requests
    
    params = {"formatted": "true", "lang": "en-US", "region": "US",
              "modules": "defaultKeyStatistics,financialData,calendarEvents", "corsDomain": "finance.yahoo.com"}
    
    url = "http://finance.yahoo.com/quote/IONS/key-statistics?p=IONS"
    ajax = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/IONS"
    
    with requests.Session() as s:
        cont = requests.get(url).content
        data = s.get(ajax, params=params).json()
    
        pp(data[u'quoteSummary']["result"])
    

    That gives you:

    [{u'calendarEvents': {u'dividendDate': {},
                          u'earnings': {u'earningsAverage': {u'fmt': u'-0.53',
                                                             u'raw': -0.53},
                                        u'earningsDate': [{u'fmt': u'2016-08-09',
                                                           u'raw': 1470700800}],
                                        u'earningsHigh': {u'fmt': u'-0.39',
                                                          u'raw': -0.39},
                                        u'earningsLow': {u'fmt': u'-0.75',
                                                         u'raw': -0.75},
                                        u'revenueAverage': {u'fmt': u'37.69M',
                                                            u'longFmt': u'37,690,000',
                                                            u'raw': 37690000},
                                        u'revenueHigh': {u'fmt': u'56M',
                                                         u'longFmt': u'56,000,000',
                                                         u'raw': 56000000},
                                        u'revenueLow': {u'fmt': u'25.2M',
                                                        u'longFmt': u'25,200,000',
                                                        u'raw': 25200000}},
                          u'exDividendDate': {},
                          u'maxAge': 1},
      u'defaultKeyStatistics': {u'52WeekChange': {u'fmt': u'\u221e%',
                                                  u'raw': u'Infinity'},
                                u'SandP52WeekChange': {u'fmt': u'3.65%',
                                                       u'raw': 0.03645599},
                                u'annualHoldingsTurnover': {},
                                u'annualReportExpenseRatio': {},
                                u'beta': {u'fmt': u'2.35', u'raw': 2.35046},
                                u'beta3Year': {},
                                u'bookValue': {u'fmt': u'1.31', u'raw': 1.31},
                                u'category': None,
                                u'earningsQuarterlyGrowth': {},
                                u'enterpriseToEbitda': {u'fmt': u'-37.62',
                                                        u'raw': -37.618},
                                u'enterpriseToRevenue': {u'fmt': u'15.86',
                                                         u'raw': 15.864},
                                u'enterpriseValue': {u'fmt': u'4.09B',
                                                     u'longFmt': u'4,092,714,240',
                                                     u'raw': 4092714240},
                                u'fiveYearAverageReturn': {},
                                u'floatShares': {u'fmt': u'119.83M',
                                                 u'longFmt': u'119,833,635',
                                                 u'raw': 119833635},
                                u'forwardEps': {u'fmt': u'-1.14', u'raw': -1.14},
                                u'forwardPE': {u'fmt': u'-31.87',
                                               u'raw': -31.868423},
                                u'fundFamily': None,
                                u'fundInceptionDate': {},
                                u'heldPercentInsiders': {},
                                u'heldPercentInstitutions': {},
                                u'lastCapGain': {},
                                u'lastDividendValue': {},
                                u'lastFiscalYearEnd': {u'fmt': u'2015-12-31',
                                                       u'raw': 1451520000},
                                u'lastSplitDate': {},
                                u'lastSplitFactor': None,
                                u'legalType': None,
                                u'maxAge': 1,
                                u'morningStarOverallRating': {},
                                u'morningStarRiskRating': {},
                                u'mostRecentQuarter': {u'fmt': u'2016-03-31',
                                                       u'raw': 1459382400},
                                u'netIncomeToCommon': {u'fmt': u'-134.48M',
                                                       u'longFmt': u'-134,478,000',
                                                       u'raw': -134478000},
                                u'nextFiscalYearEnd': {u'fmt': u'2017-12-31',
                                                       u'raw': 1514678400},
                                u'pegRatio': {u'fmt': u'-0.76', u'raw': -0.76},
                                u'priceToBook': {u'fmt': u'27.73',
                                                 u'raw': 27.732826},
                                u'priceToSalesTrailing12Months': {},
                                u'profitMargins': {u'fmt': u'-52.12%',
                                                   u'raw': -0.52124},
                                u'revenueQuarterlyGrowth': {},
                                u'sharesOutstanding': {u'fmt': u'120.78M',
                                                       u'longFmt': u'120,783,000',
                                                       u'raw': 120783000},
                                u'sharesShort': {u'fmt': u'13.89M',
                                                 u'longFmt': u'13,890,400',
                                                 u'raw': 13890400},
                                u'sharesShortPriorMonth': {u'fmt': u'13.03M',
                                                           u'longFmt': u'13,032,400',
                                                           u'raw': 13032400},
                                u'shortPercentOfFloat': {u'fmt': u'13.66%',
                                                         u'raw': 0.13664},
                                u'shortRatio': {u'fmt': u'6.66', u'raw': 6.66},
                                u'threeYearAverageReturn': {},
                                u'totalAssets': {},
                                u'trailingEps': {u'fmt': u'-1.12',
                                                 u'raw': -1.119},
                                u'yield': {},
                                u'ytdReturn': {}},
      u'financialData': {u'currentPrice': {u'fmt': u'36.33', u'raw': 36.33},
                         u'currentRatio': {u'fmt': u'6.14', u'raw': 6.136},
                         u'debtToEquity': {u'fmt': u'302.79', u'raw': 302.793},
                         u'earningsGrowth': {},
                         u'ebitda': {u'fmt': u'-108.8M',
                                     u'longFmt': u'-108,796,000',
                                     u'raw': -108796000},
                         u'ebitdaMargins': {u'fmt': u'-42.17%',
                                            u'raw': -0.42169997},
                         u'freeCashflow': {u'fmt': u'15.13M',
                                           u'longFmt': u'15,127,875',
                                           u'raw': 15127875},
                         u'grossMargins': {u'fmt': u'-30.48%', u'raw': -0.30478},
                         u'grossProfits': {u'fmt': u'283.7M',
                                           u'longFmt': u'283,703,000',
                                           u'raw': 283703000},
                         u'maxAge': 86400,
                         u'numberOfAnalystOpinions': {u'fmt': u'8',
                                                      u'longFmt': u'8',
                                                      u'raw': 8},
                         u'operatingCashflow': {u'fmt': u'-11.82M',
                                                u'longFmt': u'-11,817,000',
                                                u'raw': -11817000},
                         u'operatingMargins': {u'fmt': u'-46.09%',
                                               u'raw': -0.46085998},
                         u'profitMargins': {u'fmt': u'-52.12%',
                                            u'raw': -0.52124},
                         u'quickRatio': {u'fmt': u'5.94', u'raw': 5.944},
                         u'recommendationKey': u'hold',
                         u'recommendationMean': {u'fmt': u'2.80', u'raw': 2.8},
                         u'returnOnAssets': {u'fmt': u'-8.12%',
                                             u'raw': -0.08116},
                         u'returnOnEquity': {u'fmt': u'-61.97%',
                                             u'raw': -0.6197},
                         u'revenueGrowth': {u'fmt': u'-41.10%', u'raw': -0.411},
                         u'revenuePerShare': {u'fmt': u'2.15', u'raw': 2.148},
                         u'targetHighPrice': {u'fmt': u'64.00', u'raw': 64.0},
                         u'targetLowPrice': {u'fmt': u'17.00', u'raw': 17.0},
                         u'targetMeanPrice': {u'fmt': u'39.13', u'raw': 39.13},
                         u'targetMedianPrice': {u'fmt': u'38.00', u'raw': 38.0},
                         u'totalCash': {u'fmt': u'723.51M',
                                        u'longFmt': u'723,507,008',
                                        u'raw': 723507008},
                         u'totalCashPerShare': {u'fmt': u'5.99', u'raw': 5.99},
                         u'totalDebt': {u'fmt': u'478.9M',
                                        u'longFmt': u'478,904,000',
                                        u'raw': 478904000},
                         u'totalRevenue': {u'fmt': u'257.99M',
                                           u'longFmt': u'257,993,984',
                                           u'raw': 257993984}}}]