Search code examples
rtime-seriesmissing-data

calculate rolling percent change missing or uneven data


I need to calculate 2 week percent change for a dataset that may not have samples spaced exactly 14 days apart. This forloop gives me % change for days that are exactly 14 days apart, but can't handle the sampling frequency wobble. I.e. 2022-06-14 % change is NA because there was no sample 2022-05-31 but there is one 2022-05-30. I would like a % change based either on the value of 2022-05-30 or an imputation of 2022-05-31 based on 2022-05-30 and 2022-06-02.

    library(dplyr)
    library(tidyr)
    library(lubridate)
    dat.N1 <- structure(list(date = c("2022-04-27", "2022-04-29", "2022-05-02", 
        "2022-05-04", "2022-05-06", "2022-05-17", "2022-05-19", "2022-05-24", 
        "2022-05-26", "2022-05-30", "2022-06-02", "2022-06-07", "2022-06-09", 
        "2022-06-14", "2022-06-17", "2022-06-21", "2022-06-28", "2022-06-30", 
        "2022-07-05", "2022-07-07", "2022-07-12"), copies_liter = c(168649.864, 
        62449.256, 464682.88, 127620.624, 2110.27168, 20384.6968, 6817.724, 
        145.2679712, 0.3792992, 51.4470568, 0.01, 30094.404, 42225.784, 
        37688.632, 30730.0368, 8108.9016, 6142.6856, 7411.6464, 77131.912, 
        23668.7056, 11973.198)), row.names = 210:230, class = "data.frame")
    
    dat.N1$date <- as.Date(dat.N1$date)
    
    dat.N1$date_min2 <- dat.N1$date-14
dat.N1$prop <-1:21

for (i in 1:21){

  copies_d_current <- dat.N1[i, "copies_liter"]
  copies_d_past <- dat.N1[dat.N1[, "date"]==dat.N1[i, "date_min2"],
                          "copies_liter"] 
  dat.N1$prop[i] <- ifelse(length(copies_d_current/copies_d_past)==0, 
                    NA, 
                    copies_d_current/copies_d_past %>% as.numeric())
  dat.N1$perc <- 100-dat.N1$prop*100
#print(i)
}

Solution

  • I'm not sure what type of imputation you might want, but here is simple linear interpoloation that gives you the percent change 14 days prior.

    dates = seq(min(dat.N1$date), max(dat.N1$date), by="day")
    dat.N1 %>% 
      left_join(
        data.frame(
          date=dates, imp_14d_prior = approxfun(dat.N1$date,dat.N1$copies_liter)(dates)
        ), by=c("date_min2"="date")
      ) %>% 
      mutate(perc_ch = 100-(copies_liter/imp_14d_prior)*100)
    

    Output:

             date copies_liter  date_min2 imp_14d_prior       perc_ch
    1  2022-04-27 1.686499e+05 2022-04-13            NA            NA
    2  2022-04-29 6.244926e+04 2022-04-15            NA            NA
    3  2022-05-02 4.646829e+05 2022-04-18            NA            NA
    4  2022-05-04 1.276206e+05 2022-04-20            NA            NA
    5  2022-05-06 2.110272e+03 2022-04-22            NA            NA
    6  2022-05-17 2.038470e+04 2022-05-03  2.961518e+05  9.311681e+01
    7  2022-05-19 6.817724e+03 2022-05-05  6.486545e+04  8.948944e+01
    8  2022-05-24 1.452680e+02 2022-05-10  8.755517e+03  9.834084e+01
    9  2022-05-26 3.792992e-01 2022-05-12  1.207814e+04  9.999686e+01
    10 2022-05-30 5.144706e+01 2022-05-16  1.872339e+04  9.972523e+01
    11 2022-06-02 1.000000e-02 2022-05-19  6.817724e+03  9.999985e+01
    12 2022-06-07 3.009440e+04 2022-05-24  1.452680e+02 -2.061648e+04
    13 2022-06-09 4.222578e+04 2022-05-26  3.792992e-01 -1.113248e+07
    14 2022-06-14 3.768863e+04 2022-05-31  3.430137e+01 -1.097750e+05
    15 2022-06-17 3.073004e+04 2022-06-03  6.018889e+03 -4.105600e+02
    16 2022-06-21 8.108902e+03 2022-06-07  3.009440e+04  7.305512e+01
    17 2022-06-28 6.142686e+03 2022-06-14  3.768863e+04  8.370149e+01
    18 2022-06-30 7.411646e+03 2022-06-16  3.304957e+04  7.757415e+01
    19 2022-07-05 7.713191e+04 2022-06-21  8.108902e+03 -8.512005e+02
    20 2022-07-07 2.366871e+04 2022-06-23  7.547126e+03 -2.136122e+02
    21 2022-07-12 1.197320e+04 2022-06-28  6.142686e+03 -9.491797e+01