I am trying to do the following rolling regression: stock_return = α + β market_return.
My dataset "data" looks as follows:
Company Date stock_return market_return Alpha Beta
AAPL 01.01.2014 3% 4%
…
AAPL 31.12.2019 5% 1%
MSFT 01.01.2014 2% 4%
…
MSFT 31.12.2019 6% 1%
Moreover, I have a dataframe with events at which the regression should be performed:
Company Date
AAPL 05.02.2015
…
MSFT 04.08.2018
I am using the runner package for the rolling regression:
running_regression <- function(z) {
coef(lm(stock_return ~ market_return , data = as.data.frame(z)))
}
output <- runner(seq_along(data$market_return),
k = 180,
lag = 5,
at = ?
f = running_regression)
However, I struggle with my function and what to put into "at" to just do a regression for the events in my second dataframe.
Please see below a minimal example:
Date <- seq(from = as.Date("2014-01-01"), to = as.Date("2019-12-31"), by = 'day')
Date <- format(Date, format="%d.%m.%Y")
Company.name <- c(replicate(2191, ""))
AAPL <- data.frame(Company.name = "AAPL",Date)
market_return <- c(rnorm(2191))
AAPL <- cbind(AAPL, market_return)
MSFT <- c(replicate(2191, "MSFT"))
MSFT <- data.frame(Company.name = "MSFT",Date)
MSFT <- cbind(MSFT, market_return)
df <- rbind(AAPL, MSFT)
stock_return <- c(rnorm(4382))
df <- cbind(df,stock_return)
df[,"Alpha"] <- NA
df[,"Beta"] <- NA
Company.name2 <- c(replicate(450, "AAPL"),replicate(450, "MSFT"))
Event_date <- sample(seq(as.Date('2015/01/01'), as.Date('2019/12/31'), by="day"), 900)
Event_date <- format(Event_date, format="%d.%m.%Y")
df2 <- data.frame(Company.name2, Event_date)
#____
running_regression <- function(z) {
coef(lmList(stock_return ~ market_return | Company.name, data=z))
}
runner(df$market_return,
k = 180,
lag = 5,
f = running_regression)
Since runner version 0.3.5
you can specify x = df
and do running regression on windows from data.frame
. Because you mutate df2
you have to subset relevant part of df
by x = df[df$Company.name == Company.name2,]
- you have to do the same with idx
.
running_regression_intercept <- function(x) {
coef(lm(stock_return ~ market_return, data = x))[1]
}
running_regression_slope <- function(x) {
coef(lm(stock_return ~ market_return, data = x))[2]
}
library(dplyr)
library(runner)
df2 %>%
group_by(Company.name2) %>%
mutate(
intercept = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = running_regression_intercept,
),
slope = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = running_regression_slope
)
)
# Company.name2 Event_date alpha beta
# <fct> <date> <dbl> <dbl>
# 1 AAPL 2017-01-12 0.0114 0.00488
# 2 AAPL 2017-07-31 -0.0654 0.00574
# 3 AAPL 2019-02-27 -0.0861 0.0310
# 4 AAPL 2018-09-06 0.0405 -0.0630
# 5 AAPL 2015-09-03 -0.121 -0.0246
# 6 AAPL 2018-11-20 -0.0283 -0.0254
# 7 AAPL 2015-07-03 -0.116 -0.0186
# 8 AAPL 2015-02-03 0.102 0.0409
# 9 AAPL 2017-03-16 -0.0157 0.0124
# 10 AAPL 2019-06-08 -0.00302 0.0532
I needed to modify your data a bit because format changed Event.Date from Date
to character
.
Date <- seq(from = as.Date("2014-01-01"),
to = as.Date("2019-12-31"),
by = 'day')
market_return <- c(rnorm(2191))
AAPL <- data.frame(
Company.name = "AAPL",
Date = Date,
market_return = market_return
)
MSFT <- data.frame(
Company.name = "MSFT",
Date = Date,
market_return = market_return
)
df <- rbind(AAPL, MSFT)
df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),]
df2 <- data.frame(
Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")),
Event_date = sample(
seq(as.Date('2015/01/01'),
as.Date('2019/12/31'),
by="day"),
size = 900)
)