Search code examples
pythonpandasyahoo-financedatareaderpandas-datareader

builtin keyerror while using pandas datareader to extract data


I'm using a loop to extract data by using pandas datareader, the first two loops are working properly. But from the third loop, the code starts to return a builtin keyerror which is unexpected. i wonder since the first two loops are working properly, why from the third loop it starts to return error? and how to fix it?


import pandas as pd
import datetime as dt
import pandas_datareader as web
#====================================================
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
#############
prev=15
endDate=dt.datetime.today().date()
sDate=endDate-pd.to_timedelta(prev,unit='d')
#############

def get_price(tickers): #input is a list or Series
    result=pd.DataFrame()
    for i in tickers:
        df=pd.DataFrame()
        
        df['Adj Close']=web.DataReader(i,'yahoo',sDate,endDate)['Adj Close']
        df['MA']=df['Adj Close'].rolling(5).mean()
        df.sort_values(ascending=False,inplace=True,by="Date")
        df['Higher?']=df['Adj Close']>df['MA']
        df['Higher?']=df['Higher?'].astype(int)
        
    
        result['{}'.format(i)]=df['Higher?']
        
    return result



#=============================================================================

base_url = "http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportExcel?symbol="

data = {                    
    'Ticker' :      [ 'XLC','XLY','XLP','XLE','XLF','XLV','XLI','XLB','XLRE','XLK','XLU' ]          
,   'Name' :    [ 'Communication Services','Consumer Discretionary','Consumer Staples','Energy','Financials','Health Care','Industrials','Materials','Real Estate','Technology','Utilities' ]           
}                   

spdr_df = pd.DataFrame(data)     

print(spdr_df)




for i, row in spdr_df.iterrows():
    url =  base_url + row['Ticker']
    df_url = pd.read_excel(url)
    header = df_url.iloc[0]
    holdings_df = df_url[1:]
    holdings_df.set_axis(header, axis='columns', inplace=True)
    holdings_df=holdings_df['Symbol'].replace('.','-')
    
    
    a=get_price(holdings_df)
    print(a) 

the errors are listed below:

    a=get_price(holdings_df)
  File "C:/Users/austi/Desktop/stock&trading/get etf holdings Main Version.py", line 25, in <module>
    df['Adj Close']=web.DataReader(i,'yahoo',sDate,endDate)['Adj Close']
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\util\_decorators.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas_datareader\data.py", line 387, in DataReader
    session=session,
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas_datareader\base.py", line 251, in read
    df = self._read_one_data(self.url, params=self._get_params(self.symbols))
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas_datareader\yahoo\daily.py", line 165, in _read_one_data
    prices["Date"] = to_datetime(to_datetime(prices["Date"], unit="s").dt.date)
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
    indexer = self.columns.get_loc(key)
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\_libs\index.cp36-win32.pyd", line 111, in pandas._libs.index.IndexEngine.get_loc
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\_libs\index.cp36-win32.pyd", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\_libs\hashtable.cp36-win32.pyd", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
    Array of values of which unique will be calculated
  File "C:\Users\austi\Downloads\Python 3.6.3\Lib\site-packages\pandas\_libs\hashtable.cp36-win32.pyd", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
builtins.KeyError: 'Date'```   

Solution

  • For same tickers, there is no date column.

    To catch the error and continue, try this code:

    def get_price(tickers): #input is a list or Series
        result=pd.DataFrame()
        for i in tickers:
            try:
                df=pd.DataFrame()                
                df['Adj Close']=web.DataReader(i,'yahoo',sDate,endDate)['Adj Close']
                df['MA']=df['Adj Close'].rolling(5).mean()
                df.sort_values(ascending=False,inplace=True,by="Date")  # sometimes error
                df['Higher?']=df['Adj Close']>df['MA']
                df['Higher?']=df['Higher?'].astype(int)
                result['{}'.format(i)]=df['Higher?']
            except Exception as ex:  # no date column
                print('Ticker', i, 'ERROR', ex)
                print(df)
            
        return result