Search code examples
rregressionrunner

Do a rolling regression only on specific dates


I am trying to do the following rolling regression: stock_return = α + β market_return.

My dataset "data" looks as follows:

Company Date        stock_return    market_return       Alpha   Beta
AAPL    01.01.2014  3%              4%
…
AAPL    31.12.2019  5%              1%
MSFT    01.01.2014  2%              4%
…
MSFT    31.12.2019  6%              1%

Moreover, I have a dataframe with events at which the regression should be performed:

Company Date        
AAPL    05.02.2015
…   
MSFT    04.08.2018

I am using the runner package for the rolling regression:

running_regression <- function(z) {
  coef(lm(stock_return ~ market_return , data = as.data.frame(z)))
}

output <- runner(seq_along(data$market_return), 
                         k = 180, 
                         lag = 5,
                         at = ?
                         f = running_regression)

However, I struggle with my function and what to put into "at" to just do a regression for the events in my second dataframe.

Please see below a minimal example:

    Date <- seq(from = as.Date("2014-01-01"), to = as.Date("2019-12-31"), by = 'day')
    Date <- format(Date, format="%d.%m.%Y")

    Company.name <- c(replicate(2191, ""))
    AAPL <- data.frame(Company.name = "AAPL",Date)
    market_return <- c(rnorm(2191))
    AAPL <- cbind(AAPL, market_return)
    MSFT <- c(replicate(2191, "MSFT"))
    MSFT <- data.frame(Company.name = "MSFT",Date)
    MSFT <- cbind(MSFT, market_return)
    df <- rbind(AAPL, MSFT)
    stock_return <- c(rnorm(4382))
    df <- cbind(df,stock_return)
    df[,"Alpha"] <- NA
    df[,"Beta"] <- NA

    Company.name2 <- c(replicate(450, "AAPL"),replicate(450, "MSFT"))
    Event_date <- sample(seq(as.Date('2015/01/01'), as.Date('2019/12/31'), by="day"), 900)
    Event_date <- format(Event_date, format="%d.%m.%Y")
    df2 <- data.frame(Company.name2, Event_date)

    #____

    running_regression <- function(z) {
      coef(lmList(stock_return ~ market_return | Company.name, data=z))
    }


runner(df$market_return, 
       k = 180, 
       lag = 5,
       f = running_regression)

Solution

  • Since runner version 0.3.5 you can specify x = df and do running regression on windows from data.frame. Because you mutate df2 you have to subset relevant part of df by x = df[df$Company.name == Company.name2,] - you have to do the same with idx.

    running_regression_intercept <- function(x) {
      coef(lm(stock_return ~ market_return, data = x))[1]
    }
    
    running_regression_slope <- function(x) {
      coef(lm(stock_return ~ market_return, data = x))[2]
    }
    
    library(dplyr)
    library(runner)
    df2 %>%
      group_by(Company.name2) %>%
      mutate(
        intercept = runner(
          x = df[df$Company.name ==  Company.name2[1], ],
          k = "180 days",
          lag = "5 days",
          idx = df$Date[df$Company.name == Company.name2[1]],
          at = Event_date,
          f = running_regression_intercept,
        ),
        slope = runner(
          x = df[df$Company.name == Company.name2[1], ],
          k = "180 days",
          lag = "5 days",
          idx = df$Date[df$Company.name == Company.name2[1]],
          at = Event_date,
          f = running_regression_slope
        )
      )
    
    # Company.name2 Event_date    alpha     beta
    #     <fct>         <date>        <dbl>    <dbl>
    #   1 AAPL          2017-01-12  0.0114   0.00488
    #   2 AAPL          2017-07-31 -0.0654   0.00574
    #   3 AAPL          2019-02-27 -0.0861   0.0310 
    #   4 AAPL          2018-09-06  0.0405  -0.0630 
    #   5 AAPL          2015-09-03 -0.121   -0.0246 
    #   6 AAPL          2018-11-20 -0.0283  -0.0254 
    #   7 AAPL          2015-07-03 -0.116   -0.0186 
    #   8 AAPL          2015-02-03  0.102    0.0409 
    #   9 AAPL          2017-03-16 -0.0157   0.0124 
    #   10 AAPL          2019-06-08 -0.00302  0.0532 
    

    I needed to modify your data a bit because format changed Event.Date from Date to character.

    Date <- seq(from = as.Date("2014-01-01"), 
                to = as.Date("2019-12-31"), 
                by = 'day')
    market_return <- c(rnorm(2191))
    
    AAPL <- data.frame(
      Company.name = "AAPL", 
      Date =  Date,
      market_return = market_return
    )
    
    MSFT <- data.frame(
      Company.name = "MSFT", 
      Date = Date,
      market_return = market_return
    )
    
    df <- rbind(AAPL, MSFT)
    df$stock_return <- c(rnorm(4382))
    df <- df[order(df$Date),]
    
    df2 <- data.frame(
      Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")), 
      Event_date = sample(
        seq(as.Date('2015/01/01'), 
            as.Date('2019/12/31'), 
            by="day"),
        size =  900)
      )