So I am trying to build a trading software and I am using the code from an online YouTuber. I am gathering all of the data for the companies on the S&P 500 in the get_data_from_yahoo() function. So when I run that code it says Already Have (then the given ticker) which is fine, but when I got to print the data for this in the following function, which is compile_data(), it only print one ticker which is ZTS. Anyone have any ideas?
import bs4 as bs
import datetime as dt
import os
import pandas as pd
from pandas_datareader import data as pdr
import pickle
import requests
import fix_yahoo_finance as yf
def save_sp500_tickers():
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text.replace('.', '-')
ticker = ticker[:-1]
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2019, 6, 8)
end = dt.datetime.now()
for ticker in tickers:
print(ticker)
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = pdr.get_data_yahoo(ticker, start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
save_sp500_tickers()
get_data_from_yahoo()
def complied_data():
with open("sp500tickers.pickle","rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
complied_data()
When I run this code this is what it says:
MMM
Already have MMM
ABT
Already have ABT
ABBV
Already have ABBV
ABMD
Already have ABMD
ACN
Already have ACN
ATVI
Already have ATVI
ADBE
Already have ADBE
AMD
Already have AMD
AAP
Already have AAP
AES
Already have AES
AMG
Already have AMG
AFL
Already have AFL
A
Already have A
APD
Already have APD
AKAM
Already have AKAM
ALK
Already have ALK
ALB
Already have ALB
It then continues to say that it already has all of the 500 companies(I did not show the hole thing because the list is very long). But when I run the compile_data() function it only prints the data for one ticker:
ZTS
Date
2019-01-02 83.945038
2019-01-03 81.043526
2019-01-04 84.223267
2019-01-07 84.730026
2019-01-08 85.991997
The problem is in a for loop, specifically the one in complied_data
.
The if-else and if blocks should be included in the for loop:
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
Otherwise they will be evaluated only after it is done looping and elaborate the last element.
The following is the output when changing to the above indentation:
(... omitted counting from 0)
470
480
490
500
MMM ABT ABBV ABMD ... YUM ZBH ZION ZTS
Date ...
2019-06-10 165.332672 80.643486 74.704918 272.429993 ... 107.794380 121.242027 43.187107 109.920105
2019-06-11 165.941788 80.494644 75.889320 262.029999 ... 106.722885 120.016762 43.758469 109.860268
2019-06-12 166.040024 81.318237 76.277657 254.539993 ... 108.082100 120.225945 43.512192 111.136780
2019-06-13 165.882843 81.655624 76.646561 255.529999 ... 108.121788 119.329407 44.063854 109.730621
2019-06-14 163.760803 81.586166 76.394157 250.960007 ... 108.925407 116.998398 44.211620 110.488556
[5 rows x 505 columns]