I am trying to create growth rates for thirty days after download the data from yahoo finance. I used the following codes and a function but my output prints the same numbers for all days ( 1-30) days of growth. any help is appreciated.
# Extracting data from yahoo finance
import pandas as pd
import requests
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
import time
import datetime
symbols = ['MSFT', 'HD', 'TSLA']
stocks_df = pd.DataFrame({'A' : []})
symbols_daily = yf.download(symbols,
period = "max",
interval = "1d")
for i,ticker in enumerate(symbols):
print(i,ticker)
# Work with stock prices
historyPrices = yf.download(tickers = ticker,
period = "max",
interval = "1d")
# generate features for historical prices, and what we want to predict
historyPrices['Ticker'] = ticker
historyPrices['Year']= historyPrices.index.year
historyPrices['Month'] = historyPrices.index.month
historyPrices['Weekday'] = historyPrices.index.weekday
historyPrices['Date'] = historyPrices.index.date
if stocks_df.empty:
stocks_df = historyPrices
else:
stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=True)
and I created the function:
# function to create growth
def get_growth_df(df:pd.DataFrame)->pd.DataFrame:
for i in range(1, 31):
df['future_growth_'+str(i)+'d'] = df['Adj Close'].shift(-i) / df['Adj Close']
GROWTH_KEYS = [k for k in df.columns if k.startswith('future_growth_')]
return df[GROWTH_KEYS]
get_growth_df(historyPrices).describe()
it gives me the same values for each of the thirty days.
I am expecting something like this for each of the thirty days growth, i.e. I expect output with the different values of mean, std, min, 25%, 50%, 75% quantiles:
future_growth_1d | future_growth_2d | |
---|---|---|
count | 100 | 100 |
mean | 0.99 | 0.83 |
std | 0.15 | 0.12 |
min | 0.29 | 0.29 |
25% | 0.93 | -.0.5 |
50% | 1.03 | 0.95 |
75% | 1.522 | 1.01 |
100% | 1.6 | 1.8 |
You are overwriting the growth values for each day in the loop, resulting in all days having the same values.
Here is how to do this:
import pandas as pd
import requests
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
import time
import datetime
symbols = ['MSFT', 'HD', 'TSLA']
stocks_df = pd.DataFrame({'A' : []})
symbols_daily = yf.download(symbols,
period = "max",
interval = "1d")
for i,ticker in enumerate(symbols):
print(i,ticker)
historyPrices = yf.download(tickers = ticker,
period = "max",
interval = "1d")
historyPrices['Ticker'] = ticker
historyPrices['Year']= historyPrices.index.year
historyPrices['Month'] = historyPrices.index.month
historyPrices['Weekday'] = historyPrices.index.weekday
historyPrices['Date'] = historyPrices.index.date
if stocks_df.empty:
stocks_df = historyPrices
else:
stocks_df = pd.concat([stocks_df, historyPrices], ignore_index=True)
def get_growth_df(df: pd.DataFrame) -> pd.DataFrame:
growth_df = pd.DataFrame()
for i in range(1, 31):
growth_df['future_growth_' + str(i) + 'd'] = df['Adj Close'].shift(-i) / df['Adj Close']
return growth_df
growth_df = get_growth_df(historyPrices)
print(growth_df.describe())
which gives
future_growth_1d future_growth_2d future_growth_3d future_growth_4d \
count 3485.000000 3484.000000 3483.000000 3482.000000
mean 1.001999 1.004002 1.006037 1.008135
std 0.035862 0.050936 0.062556 0.072825
min 0.789372 0.733607 0.660825 0.644403
25% 0.983968 0.975536 0.971718 0.966988
50% 1.001210 1.003070 1.004293 1.005382
75% 1.019175 1.029505 1.039817 1.045851
max 1.243951 1.375874 1.573759 1.581697
future_growth_5d future_growth_6d future_growth_7d future_growth_8d \
count 3481.000000 3480.000000 3479.000000 3478.000000
mean 1.010306 1.012427 1.014515 1.016621
std 0.081955 0.090146 0.097482 0.104642
min 0.569541 0.559745 0.594112 0.513476
25% 0.962229 0.958083 0.954501 0.952468
50% 1.006870 1.008695 1.009707 1.010488
75% 1.052710 1.059553 1.063870 1.069472
max 1.564756 1.653522 1.661863 1.648352
future_growth_9d future_growth_10d ... future_growth_21d \
count 3477.000000 3476.000000 ... 3465.000000
mean 1.018717 1.020812 ... 1.045285
std 0.111629 0.118306 ... 0.187711
min 0.498551 0.481948 ... 0.420806
25% 0.948485 0.947469 ... 0.924693
50% 1.011687 1.013544 ... 1.022052
75% 1.074856 1.081088 ... 1.128908
max 1.691109 1.704861 ... 2.154883
future_growth_22d future_growth_23d future_growth_24d \
count 3464.000000 3463.000000 3462.000000
mean 1.047560 1.049827 1.052075
std 0.193157 0.198435 0.203717
min 0.451508 0.449279 0.470774
25% 0.923134 0.920435 0.919296
50% 1.021298 1.022292 1.022174
75% 1.131996 1.138487 1.141386
max 2.121730 2.187785 2.167901
future_growth_25d future_growth_26d future_growth_27d \
count 3461.000000 3460.000000 3459.000000
mean 1.054362 1.056683 1.059002
std 0.208943 0.214268 0.219353
min 0.466464 0.468338 0.482869
25% 0.916874 0.916088 0.914912
50% 1.021142 1.021489 1.021616
75% 1.147301 1.157854 1.160344
max 2.198247 2.306711 2.348946
future_growth_28d future_growth_29d future_growth_30d
count 3458.000000 3457.000000 3456.000000
mean 1.061295 1.063603 1.065939
std 0.224028 0.228895 0.233832
min 0.482295 0.484973 0.407210
25% 0.913807 0.912978 0.912492
50% 1.024461 1.025763 1.025216
75% 1.163379 1.167726 1.172320
max 2.427503 2.420048 2.548036