I have this data in R for two time series such that growth in one of the series results in growth of the other series after some lag:
df_long = structure(list(time = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L,
13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L,
19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L,
26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L,
32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L,
39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 45L,
45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 50L, 50L, 51L, 51L,
52L, 52L, 53L, 53L, 54L, 54L, 55L, 55L, 56L, 56L, 57L, 57L, 58L,
58L, 59L, 59L, 60L, 60L, 61L, 61L, 62L, 62L, 63L, 63L, 64L, 64L,
65L, 65L, 66L, 66L, 67L, 67L, 68L, 68L, 69L, 69L, 70L, 70L, 71L,
71L, 72L, 72L, 73L, 73L, 74L, 74L, 75L, 75L, 76L, 76L, 77L, 77L,
78L, 78L, 79L, 79L, 80L, 80L, 81L, 81L, 82L, 82L, 83L, 83L, 84L,
84L, 85L, 85L, 86L, 86L, 87L, 87L, 88L, 88L, 89L, 89L, 90L, 90L,
91L, 91L, 92L, 92L, 93L, 93L, 94L, 94L, 95L, 95L, 96L, 96L, 97L,
97L, 98L, 98L, 99L, 99L, 100L, 100L), series = c("Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging",
"Leading", "Lagging", "Leading", "Lagging", "Leading", "Lagging"
), value = c(99.4395243534478, 49.6447967181503, 99.7698225105167,
50.1284418545783, 101.558708314149, 49.8766540607688, 100.070508391425,
49.8262287003011, 100.129287735161, 49.5241907163675, 101.715064986883,
49.9774861375955, 100.460916205989, 49.6075477652715, 98.7349387653935,
49.1660290317059, 99.3131471481065, 49.8098867398561, 99.5543380299001,
50.4594983045304, 105.273045069337, 49.7123265186958, 108.549174635345,
50.3039821611125, 112.937213377001, 49.1910586458554, 117.115339122692,
49.9722190172377, 120.989024510128, 50.2597036019717, 123.839343294228,
51.755395134878, 122.271001469299, 53.3074738414986, 119.272599768477,
54.6031368856927, 122.518596933253, 56.2319063541722, 121.090067201558,
57.9292390886276, 120.36611942886, 58.5975047842126, 121.400090427431,
57.9741037415852, 120.416998950092, 58.2415311831348, 120.778482610349,
58.378760524451, 120.904834400657, 59.6078353210825, 119.613169928027,
58.1470703597842, 122.684586279277, 58.6664163996338, 121.851892088966,
58.5742772528016, 120.280572632256, 57.9656861397817, 123.190747802792,
58.4869121631259, 118.518624697645, 59.3741239062539, 114.137088885571,
58.7929070388491, 112.034578571703, 58.5527808934215, 108.655239045281,
58.2813661386636, 105.336496980897, 57.3269099865169, 105.197602897623,
57.770224894356, 105.05684725131, 54.9386347844557, 104.413440281713,
54.8175553805619, 104.158460423083, 54.1228606240017, 104.080615509722,
51.0859082195604, 103.752307653576, 52.1981060037744, 104.260896403218,
51.6984323802381, 103.156062100451, 51.0194301601899, 106.744208989045,
51.0492227278142, 105.740180515963, 51.0041950032861, 103.304721692197,
51.5591488246471, 104.057197956036, 51.0766493498308, 103.990571714523,
52.190917844231, 105.293017402957, 52.9229183439836, 104.391022038711,
51.1672169581333, 109.979926253532, 52.242659958441, 115.154250045687,
52.232968695738, 120.894638533399, 52.0065357369094, 128.731853517781,
51.3116553533773, 133.042453009798, 51.7724232491348, 135.365618993348,
53.7565596327335, 131.278342696254, 56.3796575039867, 134.123048411341,
58.0894275243687, 133.508655541522, 61.2312725926207, 133.631448007239,
62.8281739067728, 133.849728541686, 63.7282872621704, 132.673688259431,
62.4027438857521, 132.89919355217, 62.2696917234875, 131.985299846747,
65.1083435050655, 131.914339976542, 62.8015123399534, 133.748239678934,
63.252476525753, 133.941162576869, 63.4658499336833, 133.414181646151,
62.7593077271382, 134.573287710474, 63.3903570339516, 136.077158706804,
63.2970863506865, 130.034970874851, 62.9285726540986, 125.105907922438,
63.1055777418292, 126.764059517985, 63.0429167717537, 122.11954358861,
64.4066965237066, 119.702696027261, 62.5968819121748, 121.768107679415,
61.3752461179914, 120.188723836278, 61.0855592688391, 119.060611279229,
60.2720388416955, 120.750494990473, 59.3822078713432, 120.36455785106,
57.9114629572074, 120.538914027895, 57.5595080676796, 120.996352384691,
58.9130288733449, 120.085202516525, 57.9747112805574, 121.308646065858,
57.6745921324798, 120.266209552808, 58.0406683045465, 120.931869666359,
58.0634180917107, 121.854007971433, 58.8238622264472, 121.056499149341,
58.2274299156511, 120.139114592318, 58.6168257440383, 121.916646753284,
57.8876525769674, 121.729456074554, 58.3028916163668, 121.192959979937,
57.9892351464628, 120.819714395897, 58.2331582717497, 119.775138801059,
57.6572256721037, 122.17198749257, 57.4155315048015, 119.808461657746,
59.3400727686692, 123.168401808201, 58.5276125109227, 122.379252065383,
57.4501650462833, 120.247872063789, 57.8225664327736, 119.294801047208,
57.4884411818443)), row.names = c(NA, -200L), class = c("tbl_df",
"tbl", "data.frame"))
When I plot it, it looks like this:
library(ggplot2)
library(tidyr)
ggplot(df_long, aes(x = time, y = value, color = series)) +
geom_line(size = 1) +
theme_minimal() +
labs(
title = "Time Series with Lagged Growth Pattern",
subtitle = "Leading series (blue) shows growth patterns that are followed by lagging series (red)",
x = "Time",
y = "Value",
color = "Series"
) +
scale_color_manual(values = c("Leading" = "#2c7bb6", "Lagging" = "#d7191c")) +
theme(
legend.position = "bottom",
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10)
)
I want to create a function in R that studies how growth in one series results in growth of the other series. For example:
Step 1: Take the first 5 points of Series 1 Window 1 (points 1-5) and Series 1 Window 2 (points 2-6). Calculate % growth (i.e. Series1Window2Sum-Series1Window1Sum/Series1Window1Sum * 100)
Step 2: Then, wait 3 points (i.e. 5 + 3 = 8)
Step 3: Then, look at Series 2 Window 1 (points 8-12) and Series 2 Window 2 (points 9-13). Calculate % growth (i.e. Series2Window2Sum-Series2Window1Sum/Series2Window1Sum * 100)
Step 4: Advance 1 point in Series 1. Calculate % growth between Series 1 Window 1 (points 2-6) and Series 1 Window 2 (points 3-7), etc.
Step 5: Repeat Steps for all data (i.e. sliding window)
I tried to write a function for this manually:
analyze_growth_patterns <- function(data, window = 5, lag = 3, dominant = 1) {
series1 <- data$value[data$series == unique(data$series)[1]]
series2 <- data$value[data$series == unique(data$series)[2]]
times <- unique(data$time)
dom_values <- if(dominant == 1) series1 else series2
fol_values <- if(dominant == 1) series2 else series1
max_start <- length(dom_values) - (window + lag + window) + 1
results <- lapply(1:max_start, function(i) {
dom_window1_sum <- sum(dom_values[i:(i + window - 1)])
dom_window2_sum <- sum(dom_values[(i + 1):(i + window)])
dom_growth <- (dom_window2_sum - dom_window1_sum) / dom_window1_sum * 100
fol_start <- i + window + lag
fol_window1_sum <- sum(fol_values[fol_start:(fol_start + window - 1)])
fol_window2_sum <- sum(fol_values[(fol_start + 1):(fol_start + window)])
fol_growth <- (fol_window2_sum - fol_window1_sum) / fol_window1_sum * 100
data.frame(
time = times[i],
dominant_growth = dom_growth,
following_growth = fol_growth,
following_time = times[fol_start],
dom_window1_sum = dom_window1_sum,
dom_window2_sum = dom_window2_sum,
fol_window1_sum = fol_window1_sum,
fol_window2_sum = fol_window2_sum
)
})
do.call(rbind, results)
}
plot_growth_analysis <- function(results) {
library(gridExtra)
library(ggplot2)
p1 <- ggplot(results) +
geom_line(aes(x = time, y = dominant_growth, color = "Dominant"), size = 1) +
geom_line(aes(x = following_time, y = following_growth, color = "Following"), size = 1) +
scale_color_manual(values = c("Dominant" = "#2c7bb6", "Following" = "#d7191c")) +
theme_minimal() +
labs(title = "Growth Patterns Over Time",
y = "Growth (%)",
color = "Series") +
theme(legend.position = "bottom")
p2 <- ggplot(results, aes(dominant_growth, following_growth)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(title = paste("Correlation:",
round(cor(results$dominant_growth, results$following_growth), 3)),
x = "Dominant Series Growth (%)",
y = "Following Series Growth (%)") +
theme(legend.position = "bottom")
grid.arrange(p1, p2, ncol = 2)
}
Finally, the plots:
results <- analyze_growth_patterns(df_long, window = 5, lag = 3)
plot_growth_analysis(results)
Are there any built in time series functions in R that can do most of this by itself? E.g. zoo, forecast?
PS (unrelated): : Something similar and much simpler - I included it in case anyone was interested:
library(ggplot2)
library(dplyr)
library(gridExtra)
library(tidyr)
p1 <- ggplot(df_long, aes(x = time, y = value, color = series)) +
geom_line() +
theme_minimal() +
labs(title = "Original Time Series") +
theme(legend.position = "bottom")
df_wide <- df_long %>%
pivot_wider(
id_cols = time,
names_from = series,
values_from = value
)
plot_list <- list()
for(i in 0:7) {
df_lag <- df_wide %>%
mutate(Lagging_lag = lag(Lagging, i)) %>%
na.omit()
cor_val <- cor(df_lag$Leading, df_lag$Lagging_lag)
plot_list[[i+1]] <- ggplot(df_lag, aes(x = Leading, y = Lagging_lag)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "black") +
theme_minimal() +
labs(
title = sprintf("Leading vs Lagging (Lag=%d)", i),
subtitle = sprintf("Correlation: %.3f", cor_val)
)
}
final_plot <- grid.arrange(
p1,
plot_list[[1]], plot_list[[2]], plot_list[[3]],
plot_list[[4]], plot_list[[5]], plot_list[[6]],
plot_list[[7]], plot_list[[8]],
top = "Time Series Analysis with Lags",
ncol = 3
)
If the objective here is to align the series we can use dtw
from the dtw package - also see packages ptw, rucrdtw, IncDTW and twdtw. See ?dtw and the dtw package vignette for more info. Suggest playing around with the window.type
and window.size
arguments.
Not shown here but we could also apply dtw
to growth <- lapply(df_wide, diff, 5)
in the same way to relate slopes.
library(dtw)
df_wide <- unstack(df_long, value ~ series)
d <- with(data.frame(wide), dtw(Leading, Lagging, keep = TRUE,
window.type = "sakoechiba", window.size = 5))
plot(d, type = "twoway")
Seems to work better with the revised window.type
.