I want to calculate the standard deviation of daily log returns over a period of time after a certain date. In other words: I want to set a data (e.g. 2019-01-15) and I want to calculate the standard deviation of the log returns for the 5 days after this date (so from 2019-01-16 to 2019-01-20). The problem is, that those start dates are different for the stocks. So I always have to link the ticker to the start date and then find the subsequent 5 days.
For this purpose, I need to link two data frames: Data frame 1 contains the identifier of the stock and one specific date (start date). Data frame 2 contains a list of all daily log returns over a period of one year (2020) for different tickers. Data frame 2 looks like this:
Here is the code to reproduce the example:
#Data frame 1:
Identifier <- c("ACCR.PK", "ANIX.OQ", "TLRS.PK")
Dates <- c("2019-11-22", "2019-11-01", "2019-11-15")
df1 <- data.frame(Identifier, Dates)
# Data frame 2:
Timeframe <- c("2019-11-04", "2019-11-05", "2019-11-06", "2019-11-07", "2019-11-08", "2019-11-09", "2019-11-10", "2019-11-11", "2019-11-12", "2019-11-13", "2019-11-14", "2019-11-15", "2019-11-16", "2019-11-17", "2019-11-18", "2019-11-19", "2019-11-20",
"2019-11-21", "2019-11-22", "2019-11-23", "2019-11-24", "2019-11-25", "2019-11-26", "2019-11-27", "2019-11-28", "2019-11-29", "2019-11-30")
ACCR.PK <- c(-0.15415068, 0.15415068, 0.487703206, 0.782759339, -0.577315365, 0, 0.145953913, -0.01242252, -0.064538521, 0.026317308, -0.124297717, 0.097980408, -0.679901954, 0.051293294, -0.162518929, 0.028987537, 0.451985124,
-0.09531018, 0, -0.105360516, -0.045462374, 0.022989518, 0.127833372, 0, 0.336472237, 0, -0.15415068)
HURC.OQ <- c(0.00252986782857967, 0.00392267244379774,-0.00673403218134361, 0.00334262149668962, 0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745, -0.00201236232924185, -0.0104137475666262, -0.00934859277129974,
0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812)
ANIX.OQ <- c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639, -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179,
-0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812)
UBP.N <- c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568,
0.0224488315394535, -0.0116960397631916, -0.00643779047484871, -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -0.000087987847382287, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942)
TLRS.PK <- c(-0.0723206615796261, -0.0645385211375711,-0.0689928714869512, 0.0689928714869512, 0.0744888519907394, -0,143481723477691, -0.0037336695520489, -0.0433188747188424, 0, -0.092709398104267, 0,123481056771021, 0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, -0,130053128248198, 0,183478294921779, 0, -0,141323855167744, -0.033855636939339, -0.0422003544903764)
df2 <- data.frame(Timeframe, UBP.N, HURC.OQ, ANIX.OQ, TLRS.PK)
I am stuck with combining the two data frames and to find the subsequent dates as well as linking them to the daily log returns.
Can somebody help me?
After converting the dates to Date
-class,
df1$Dates <- as.Date(df1$Dates)
df2$Timeframe <- as.Date(df2$Timeframe)
we can do this with dplyr:
library(dplyr)
library(tidyr) # pivot_longer
df1 %>%
mutate(rn = row_number()) %>%
rowwise() %>%
summarize(
rn,
Identifier,
Timeframe = seq(Dates, Dates + 5, by = "days")
) %>%
left_join(
pivot_longer(df2, -Timeframe, names_to = "Identifier"),
by = c("Identifier", "Timeframe")
) %>%
group_by(rn, Identifier) %>%
summarize(
Timeframe = first(Timeframe),
sigma = sd(value, na.rm = TRUE),
.groups = "drop"
)
# # A tibble: 3 × 4
# rn Identifier Timeframe sigma
# <int> <chr> <date> <dbl>
# 1 1 ACCR.PK 2019-11-22 NA
# 2 2 ANIX.OQ 2019-11-01 2.19e- 2
# 3 3 TLRS.PK 2019-11-15 5.04e+13
Quick validation:
sd(filter(df2, between(Timeframe, df1$Dates[2], df1$Dates[2]+5))$ANIX.OQ)
# [1] 0.02188327
Data
df1 <- structure(list(Identifier = c("ACCR.PK", "ANIX.OQ", "TLRS.PK"), Dates = structure(c(18222, 18201, 18215), class = "Date")), row.names = c(NA, -3L), class = "data.frame")
df2 <- structure(list(Timeframe = structure(c(18204, 18205, 18206, 18207, 18208, 18209, 18210, 18211, 18212, 18213, 18214, 18215, 18216, 18217, 18218, 18219, 18220, 18221, 18222, 18223, 18224, 18225, 18226, 18227, 18228, 18229, 18230), class = "Date"), UBP.N = c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568, 0.0224488315394535, -0.0116960397631916, -0.00643779047484871, -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -8.7987847382287e-05, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942), HURC.OQ = c(0.00252986782857967, 0.00392267244379774, -0.00673403218134361, 0.00334262149668962, 0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745, -0.00201236232924185, -0.0104137475666262, -0.00934859277129974, 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812), ANIX.OQ = c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639, -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812), TLRS.PK = c(-0.0723206615796261, -0.0645385211375711, -0.0689928714869512, 0.0689928714869512, 0.0744888519907394, 0, 143481723477691, -0.0037336695520489, -0.0433188747188424, 0, -0.092709398104267, 0, 123481056771021, 0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, 0, 130053128248198, 0, 183478294921779, 0, 0, 141323855167744, -0.033855636939339, -0.0422003544903764)), row.names = c(NA, -27L), class = "data.frame")