python python-3.x for-loop pandas-datareader

Running for-loop and skipping stocks with 'KeyError' : Date

I have written the following code which gets list of symbols of sp500 from wikipedia and then scrapes the data from yahoo daily reader.

html = urlopen('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = BeautifulSoup(html,'lxml')
sp500_raw = soup.find('table', {'class': 'wikitable sortable'})

spsymbol =[]

for row in sp500_raw.findAll('tr')[1:]:
    spsymbols = row.findAll('td')[0].text.strip()
    spsymbol.append(spsymbols)

start = datetime.date(2008,1,1)
end = datetime.date.today()

df = pd.DataFrame(spsymbol, columns=['Stock_name'])
df.Stock_name.str.replace('^\d+ +','').to_csv('SP Scrapped data.csv', index=False, header=False) 

stock_data = []
with open('SP Scrapped data.csv') as csvfile:
    stockticker_data = csv.reader(csvfile, delimiter=' ')
    for stockticker_data1 in stockticker_data:
        stockticker_data1 = [col.replace('.', '-') for col in stockticker_data1]
        for row in stockticker_data1:
            print(row)
            all_data = []
            for ticker in row:
                stock_data.append(web.get_data_yahoo(ticker, start, end))
                for df in stock_data:
                    df.to_csv(ticker, header=True, index=True, columns=['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], sep=' ')

Problem #1 I am getting 'KeyError' : Date. I am getting this error after the 5th symbol which is ATVI and odd since there is data for this ticker from 1993 but for some reason I am not sure why i am getting that error.

Problem #2 How i can skip stocks if there is 'KeyError' : Date without ending the for-loop. A similar code would get around this problem with try and except but i cannot apply that here. Link here

Complete Error

MMM
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py:1418: FutureWarning: 
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
ABT
ABBV
ABMD
ACN
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2896             try:
-> 2897                 return self._engine.get_loc(key)
   2898             except KeyError:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'Date'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-12-af32c9c31c51> in <module>
      9             all_data = []
     10             for ticker in row:
---> 11                 stock_data.append(web.get_data_yahoo(ticker, start, end))
     12                 for df in stock_data:
     13                     df.to_csv(ticker, header=True, index=True, columns=['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], sep=' ')

C:\ProgramData\Anaconda3\lib\site-packages\pandas_datareader\data.py in get_data_yahoo(*args, **kwargs)
     80 
     81 def get_data_yahoo(*args, **kwargs):
---> 82     return YahooDailyReader(*args, **kwargs).read()
     83 
     84 

C:\ProgramData\Anaconda3\lib\site-packages\pandas_datareader\base.py in read(self)
    249         # If a single symbol, (e.g., 'GOOG')
    250         if isinstance(self.symbols, (string_types, int)):
--> 251             df = self._read_one_data(self.url, params=self._get_params(self.symbols))
    252         # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
    253         elif isinstance(self.symbols, DataFrame):

C:\ProgramData\Anaconda3\lib\site-packages\pandas_datareader\yahoo\daily.py in _read_one_data(self, url, params)
    163         prices = DataFrame(data["prices"])
    164         prices.columns = [col.capitalize() for col in prices.columns]
--> 165         prices["Date"] = to_datetime(to_datetime(prices["Date"], unit="s").dt.date)
    166 
    167         if "Data" in prices.columns:

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2978             if self.columns.nlevels > 1:
   2979                 return self._getitem_multilevel(key)
-> 2980             indexer = self.columns.get_loc(key)
   2981             if is_integer(indexer):
   2982                 indexer = [indexer]

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2897                 return self._engine.get_loc(key)
   2898             except KeyError:
-> 2899                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2900         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2901         if indexer.ndim > 1 or indexer.size > 1:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: '

Solution

Error shows problem in line with web.get_data_yahoo so you would have to put all this part in try/except

  for ticker in row:
      try:
            stock_data.append(web.get_data_yahoo(ticker, start, end))
            for df in stock_data:
                df.to_csv(ticker, header=True, index=True, columns=['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], sep=' ')
      except Exception as ex:
          print("Ex:", ex)

But frankly I don't understand why you add to stock_data and run to_csv for every element from stock_data so you write again and again the same data.

from datetime import datetime, timedelta
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from pandas_datareader import data as web

html = urlopen('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = BeautifulSoup(html,'lxml')
sp500_raw = soup.find('table', {'class': 'wikitable sortable'})

spsymbol = []

for row in sp500_raw.findAll('tr')[1:]:
    spsymbols = row.findAll('td')[0].text.strip()
    spsymbol.append(spsymbols)

start = datetime(2008, 1, 1).date()
end = datetime.today().date()

for ticker in spsymbol:
    print(ticker)
    try:
        df = web.get_data_yahoo(ticker, start, end)
        df = df.reset_index()
        #print(df.head())
        df.to_csv(ticker + '.csv', header=True, index=True, columns=['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], sep=' ')
    except Exception as ex:
        print('Ex:', ex)