when I am learning how to fetch data from Wikipedia of S&P 500
got some error,my pupose is to get data from wikipedia and do analysis with python, all of them is following the tutorial video, I am a very beginner of python, or coding,
here is my code
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web
import pickle
import requests
def save_sp500_tickers():
resp = requests.get(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, "lxml")
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
#return tickers
# save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2000,1,1)
end = dt.datetime(2016,12,31)
for ticker in tickers:
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
get_data_from_yahoo()
I have revised all formate and indent errors, but the terminal said
traceback (most recent call last):
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 157, in _read_one_data
data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
KeyError: 'HistoricalPriceStore'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 52, in <module>
get_data_from_yahoo()
File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 46, in get_data_from_yahoo
df = web.DataReader(ticker, 'yahoo', start, end)
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\util\_decorators.py", line 208, in wrapper
return func(*args, **kwargs)
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\data.py", line 387, in DataReader
session=session,
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\base.py", line 251, in read
df = self._read_one_data(self.url, params=self._get_params(self.symbols))
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 160, in _read_one_data
raise RemoteDataError(msg.format(symbol, self.__class__.__name__))
pandas_datareader._utils.RemoteDataError: No data fetched for symbol MMM
using YahooDailyReader
C:\Users\CNTHWAN8\Desktop\personal\Python\salesorder\Python>C:/Users/CNTHWAN8/AppData/Local/Programs/Python/Python37/python.exe "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py"
Traceback (most recent call last):
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 157, in _read_one_data
data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
KeyError: 'HistoricalPriceStore'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 51, in <module>
get_data_from_yahoo()
File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 45, in get_data_from_yahoo
df = web.DataReader(ticker, 'yahoo', start, end)
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\util\_decorators.py", line 208, in wrapper
return func(*args, **kwargs)
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\data.py", line 387, in DataReader
session=session,
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\base.py", line 251, in read
df = self._read_one_data(self.url, params=self._get_params(self.symbols))
File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 160, in _read_one_data
raise RemoteDataError(msg.format(symbol, self.__class__.__name__))
pandas_datareader._utils.RemoteDataError: No data fetched for symbol MMM
using YahooDailyReader
so I am a little bit of confused what happened and how to fix that, anyone could help me here, thanks a lot
Problem is \n
in ticker
- you have to strip it to get ie. MMM
instead of MMM\n
ticker = row.findAll('td')[0].text.strip()
After that it start creating csv files.
There is other problem.
For BKR
(and few others) it shows error KeyError: 'Date'
. Probably it has problem to read data from server. It needs try/except
to skip this problem.
try:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
except Exception as ex:
print('Error:', ex)
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web
import pickle
import requests
def save_sp500_tickers():
resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, "lxml")
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text.strip()
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2000, 1, 1)
end = dt.datetime(2016, 12, 31)
for ticker in tickers:
print(ticker)
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
try:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
except Exception as ex:
print('Error:', ex)
else:
print('Already have {}'.format(ticker))
get_data_from_yahoo(True)