import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd
symbol = 'AAPL'
url = 'https://finance.yahoo.com/quote/' + symbol + '/balance-sheet?p=' + symbol
page = requests.get(url)
tree = html.fromstring(page.content)
tableHeaders = tree.xpath('//*[@class="D(tbhg)"]//span')
Headers = []
for Header in tableHeaders:
Headers.append(Header.text)
df = pd.DataFrame()
df = pd.DataFrame(columns=Headers,index=[1])
Xpath1 = "//span[contains(.,'"+item1+"')]/parent::div/parent::div/following-sibling::div"
item1 = 'Long Term Debt'
row1 = []
row1.append(item1)
rowvalues1 = tree.xpath(Xpath1)
for value1 in rowvalues1:
row1.append(value1.text)
Xpath1 = Xpath1+"/span"
Childvalues1 = tree.xpath(Xpath1)
j=0
for i in range(len(row1)):
if(row1[i]==None):
row1[i] =Childvalues1[j].text
j=j+1
df.loc[1] = row1
df=df.fillna(0)
df[df=='-'] ='0'
long_term_debt=float(str(df.iloc[0,4]).replace(',',''))
When I run AAPL symbol I get an error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-63-fe5e79eabd51> in <module>
57 df[df=='-'] ='0'
58
---> 59 long_term_debt=float(str(df.iloc[0,4]).replace(',',''))
60
--
2007 l = len(ax)
2008 if key >= l or key < -l:
-> 2009 raise IndexError("single positional indexer is out-of-bounds")
2010
2011 def _getitem_tuple(self, tup):
IndexError: single positional indexer is out-of-bounds
I can't run value extracted from df
as in case of AAPL there is no year 2015 for the company. Yahoo finance closed that year for this company however for other companies there is year 2015.
What I can do to remove such error for this specific year and symbol. I tried using 'None' in function for long-term variable however it does not work. Any idea how to approach this case?
The formula tried to use :
def debt():
if df.iloc[0,4]== None : return 0
else: float(str(df.iloc[0,4]).replace(',',''))
However it does not work
You have to install YahooFinancials library to your python.
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd
symbol = 'ORCL'
url = 'https://finance.yahoo.com/quote/' + symbol + '/balance-sheet?p=' + symbol
page = requests.get(url)
tree = html.fromstring(page.content)
tableHeaders = tree.xpath('//*[@class="D(tbhg)"]//span')
Headers = []
for Header in tableHeaders:
Headers.append(Header.text)
df = pd.DataFrame()
#Adding columns
df = pd.DataFrame(columns=Headers,index=[1])
item5 = 'Inventory'
row5 = []
row5.append(item5)
Xpath = "//span[contains(.,'"+item5+"')]/parent::div/parent::div/following-sibling::div"
rowvalues5 = tree.xpath(Xpath) # identify all 4 high level nodes
# This will store the high level node values, but store 'None' if value is not present.
for value5 in rowvalues5:
row5.append(value5.text)
#if the value is None, we are going to fetch to next level node values using /span
Xpath = Xpath+"/span"
Childvalues = tree.xpath(Xpath) #Fetch low level nodes
j=0
for i in range(len(row5)):
if(row5[i]==None):
row5[i] =Childvalues[j].text
j=j+1
df.loc[1] = row5
print(df)