I am working on the following project and I am having issues with calculating the yearly beta, alpha and rsquared instead of a single one of each one for the whole 6 years of data I have.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas_datareader import data as pdr
import datetime
import yfinance as yf
import statsmodels.api as sm
from statsmodels import regression
yf.pdr_override()
cryptos = ["BTC", "ETH", "XRP", "BCH", "USDT", #"BSV", #BSV does not have data in yahoo finance for the time period requested
"LTC", "BNB", "EOS", "LINK", "XMR",
"BTG", "ADA", "XLM", "TRX", "USDC"]
def get_and_process_data(c):
raw_data = pdr.get_data_yahoo(c + '-USD', start="2015-01-01", end="2021-01-01")
return raw_data.Close.pct_change()[1:]
df = pd.DataFrame({c: get_and_process_data(c) for c in cryptos})
df['MKT Return'] = df.mean(axis=1) # avg market return
print(df) # show dataframe with all data
def model(x, y):
# Calculate r-squared
X = sm.add_constant(x) # artificially add intercept to x, as advised in the docs
model = sm.OLS(y,X).fit()
rsquared = model.rsquared
# Fit linear regression and calculate alpha and beta
X = sm.add_constant(x)
model = regression.linear_model.OLS(y,X).fit()
alpha = model.params[0]
beta = model.params[1]
return beta, alpha, rsquared
results = pd.DataFrame({c: model(df[df[c].notnull()]['MKT Return'], df[df[c].notnull()][c]) for c in cryptos}).transpose()
results.columns = ['beta', 'alpha', 'rsquared']
# change format of each statistic
results['rsquared'] = results['rsquared'].round(decimals=2)
results['alpha'] = results['alpha'].round(decimals=4)
results['beta'] = results['beta'].round(decimals=2)
results['rsquared'] = results['rsquared'].mul(100).astype(int).astype(str).add('%') # change rsquared to percentage form
# rename index of statistics crypto labels
results = results.rename(index={'BTC': 'Bitcoin', 'ETH': 'Ethereum', 'XRP': 'Ripple', 'BCH': 'Bitcoin Cash', 'USDT': 'Tether',
'LTC': 'Litecoin', 'BNB': 'Binance Coin', 'EOS': 'EOS', 'LINK': 'Chainlink', 'XMR': 'Monero', 'BTG': 'Bitcoin Gold', 'ADA': 'Cardano',
'XLM': 'Stellar', 'TRX': 'TRON', 'USDC': 'USDCoin'})
# print all results
print(results)
while True:
try:
crypto = input("Cryptocurrency: ")
if crypto == "BTC" or crypto == "Bitcoin":
print(results.iloc[0,:])
elif crypto == "ETH" or crypto == "Ethereum":
print(results.iloc[1,:])
elif crypto == "XRP" or crypto == "Ripple":
print(results.iloc[2,:])
elif crypto == "BCH" or crypto == "Bitcoin Cash":
print(results.iloc[3,:])
elif crypto == "USDT" or crypto == "Tether":
print(results.iloc[4,:])
elif crypto == "LTC" or crypto == "Litecoin":
print(results.iloc[5,:])
elif crypto == "BNB" or crypto == "Binance Coin":
print(results.iloc[6,:])
elif crypto == "EOS":
print(results.iloc[7,:])
elif crypto == "LINK" or crypto == "Chainlink":
print(results.iloc[8,:])
elif crypto == "XMR" or crypto == "Monero":
print(results.iloc[9,:])
elif crypto == "BTG" or crypto == "Bitcoin Gold":
print(results.iloc[10,:])
elif crypto == "ADA" or crypto == "Cardano":
print(results.iloc[11,:])
elif crypto == "XLM" or crypto == "Stellar":
print(results.iloc[12,:])
elif crypto == "TRX" or crypto == "TRON":
print(results.iloc[13,:])
elif crypto == "USDC" or crypto == "USDCoin":
print(results.iloc[14,:])
else:
print("No Available Data")
except Exception as e:
print(e)
How can I create a dataframe or multiple dataframes, where I have the statistics for each cryptocurrency for each year, such as: Bitcoin, Ethereum, Ripple,... - beta, alpha, rsquared for 2015, 2016, 2017,...
Also, is it possible to simplify the while True:
section? It is intended to be a user input function where you are able to type a variant of the name of the crypto and the code would output the statistics for the corresponding data, while giving again the option to select a different crypto if desired, as shown with if
and elif
and the try
, except
.
You could wrap your results section in a function, and call it with a filtered data set. For instance, this will give you just the 2020 data.
df_2020 = df.filter(like="2020", axis=0)
You could calculate your results based on df_2020 instead of the entire df dataset. To iterate through them all, maybe you could create a sequence of slices:
df_list = [df.filter(like=year, axis=0) for year in ("2015", "2016", "2017", "2018", "2019", "2020")]
Your while loop can be MUCH simpler. Here's an idea that works fine. Notice you can call results.loc[crypto]
by name instead of hardcoding in row numbers with .iloc
.
abbrev = {'BTC': 'Bitcoin', 'ETH': 'Ethereum', 'XRP': 'Ripple', 'BCH': 'Bitcoin Cash', 'USDT': 'Tether',
'LTC': 'Litecoin', 'BNB': 'Binance Coin', 'EOS': 'EOS', 'LINK': 'Chainlink', 'XMR': 'Monero', 'BTG': 'Bitcoin Gold', 'ADA': 'Cardano',
'XLM': 'Stellar', 'TRX': 'TRON', 'USDC': 'USDCoin'}
while True:
crypto = input("Cryptocurrency: ")
crypto = abbrev.get(crypto, crypto) # if the abbreviation is given, try to get it from the abbrev dict
if crypto in results.index:
print(results.loc[crypto])
elif crypto in ["quit","stop","exit"]:
break
else:
print("No Available Data")