Search code examples
rreturncalculated-columnsstock

Calculate standard deviation based on a specific time frame


I want to calculate the standard deviation of daily log returns over a period of time after a certain date. In other words: I want to set a data (e.g. 2019-01-15) and I want to calculate the standard deviation of the log returns for the 5 days after this date (so from 2019-01-16 to 2019-01-20). The problem is, that those start dates are different for the stocks. So I always have to link the ticker to the start date and then find the subsequent 5 days.

For this purpose, I need to link two data frames: Data frame 1 contains the identifier of the stock and one specific date (start date). Data frame 2 contains a list of all daily log returns over a period of one year (2020) for different tickers. Data frame 2 looks like this:

enter image description here

Here is the code to reproduce the example:

#Data frame 1: 

Identifier <- c("ACCR.PK", "ANIX.OQ", "TLRS.PK")
Dates <- c("2019-11-22", "2019-11-01", "2019-11-15")
df1 <- data.frame(Identifier, Dates)


# Data frame 2: 
 
Timeframe <- c("2019-11-04", "2019-11-05", "2019-11-06", "2019-11-07", "2019-11-08", "2019-11-09", "2019-11-10", "2019-11-11", "2019-11-12", "2019-11-13", "2019-11-14", "2019-11-15", "2019-11-16", "2019-11-17", "2019-11-18", "2019-11-19", "2019-11-20",
 "2019-11-21", "2019-11-22", "2019-11-23", "2019-11-24", "2019-11-25", "2019-11-26", "2019-11-27", "2019-11-28", "2019-11-29", "2019-11-30")

ACCR.PK <- c(-0.15415068, 0.15415068, 0.487703206, 0.782759339, -0.577315365, 0, 0.145953913, -0.01242252, -0.064538521, 0.026317308, -0.124297717, 0.097980408, -0.679901954, 0.051293294, -0.162518929, 0.028987537, 0.451985124,
 -0.09531018, 0, -0.105360516, -0.045462374, 0.022989518, 0.127833372, 0, 0.336472237, 0, -0.15415068)
 
HURC.OQ <- c(0.00252986782857967, 0.00392267244379774,-0.00673403218134361, 0.00334262149668962,  0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745, -0.00201236232924185, -0.0104137475666262, -0.00934859277129974,
 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812) 
 
ANIX.OQ <- c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639, -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, 
 -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812)

UBP.N <- c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568,
 0.0224488315394535, -0.0116960397631916, -0.00643779047484871, -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -0.000087987847382287, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942)

TLRS.PK <- c(-0.0723206615796261, -0.0645385211375711,-0.0689928714869512, 0.0689928714869512, 0.0744888519907394, -0,143481723477691, -0.0037336695520489, -0.0433188747188424, 0, -0.092709398104267, 0,123481056771021,  0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, -0,130053128248198, 0,183478294921779, 0, -0,141323855167744, -0.033855636939339, -0.0422003544903764)

df2 <- data.frame(Timeframe, UBP.N, HURC.OQ, ANIX.OQ, TLRS.PK)

I am stuck with combining the two data frames and to find the subsequent dates as well as linking them to the daily log returns.

Can somebody help me?


Solution

  • After converting the dates to Date-class,

    df1$Dates <- as.Date(df1$Dates)
    df2$Timeframe <- as.Date(df2$Timeframe)
    

    we can do this with dplyr:

    library(dplyr)
    library(tidyr) # pivot_longer
    df1 %>%
      mutate(rn = row_number()) %>%
      rowwise() %>%
      summarize(
        rn,
        Identifier,
        Timeframe = seq(Dates, Dates + 5, by = "days")
      ) %>%
      left_join(
        pivot_longer(df2, -Timeframe, names_to = "Identifier"),
        by = c("Identifier", "Timeframe")
      ) %>%
      group_by(rn, Identifier) %>%
      summarize(
        Timeframe = first(Timeframe),
        sigma = sd(value, na.rm = TRUE),
        .groups = "drop"
      )
    # # A tibble: 3 × 4
    #      rn Identifier Timeframe      sigma
    #   <int> <chr>      <date>         <dbl>
    # 1     1 ACCR.PK    2019-11-22 NA       
    # 2     2 ANIX.OQ    2019-11-01  2.19e- 2
    # 3     3 TLRS.PK    2019-11-15  5.04e+13
    

    Quick validation:

    sd(filter(df2, between(Timeframe, df1$Dates[2], df1$Dates[2]+5))$ANIX.OQ)
    # [1] 0.02188327
    

    Data

    df1 <- structure(list(Identifier = c("ACCR.PK", "ANIX.OQ", "TLRS.PK"), Dates = structure(c(18222, 18201, 18215), class = "Date")), row.names = c(NA, -3L), class = "data.frame")
    df2 <- structure(list(Timeframe = structure(c(18204, 18205, 18206, 18207, 18208, 18209, 18210, 18211, 18212, 18213, 18214, 18215, 18216, 18217, 18218, 18219, 18220, 18221, 18222, 18223, 18224, 18225, 18226, 18227, 18228, 18229, 18230), class = "Date"), UBP.N = c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568, 0.0224488315394535, -0.0116960397631916, -0.00643779047484871,  -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -8.7987847382287e-05, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942), HURC.OQ = c(0.00252986782857967, 0.00392267244379774, -0.00673403218134361, 0.00334262149668962, 0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745,  -0.00201236232924185, -0.0104137475666262, -0.00934859277129974, 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812), ANIX.OQ = c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639,  -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812), TLRS.PK = c(-0.0723206615796261, -0.0645385211375711, -0.0689928714869512, 0.0689928714869512, 0.0744888519907394, 0, 143481723477691, -0.0037336695520489,  -0.0433188747188424, 0, -0.092709398104267, 0, 123481056771021, 0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, 0, 130053128248198, 0, 183478294921779, 0, 0, 141323855167744, -0.033855636939339, -0.0422003544903764)), row.names = c(NA, -27L), class = "data.frame")