Search code examples
rdatedplyrlag

Want to increment a previous date row field by a value in another column and Lag Not working


I have a simple table (included below), where I want to create a third column, call it st_date, where the value on the first row will be a fixed value (say 01/30/2020).

For each subsequent row, I want the value in st_date to be the lagged Date value from the previous row + the value in lengths in terms of business days (not weekends)

Ex: So on row #2, the value should be 01/30/2020 + 7 Working Day = 02/10/2020 On Row #2, the value should be 02/10/2020 + 10 = 02/25/2020

The code to generate the original table is

tmp <- as.data.frame(unclass(rle(t_1$BB_W_D))) %>% 
  mutate(st_date=df_start_date) 

==> df_start_date is my starting date I want to have in row #1

structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L, 
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L, 
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L, 
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291), class = "Date")), class = "data.frame", row.names = c(NA, 
-113L))

When I run the next code set,

tmp <- tmp %>% 
  mutate(st_date=lag(st_date,1)+lengths)

It creates the below, where it is not retaining the value in Row #1 and each subsequent row is now just incremented from the original value of 01/30/2020.

Not sure where the disconnect is here since I have used lag before and it never exhibited this behavior before

structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L, 
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L, 
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L, 
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(NA, 
18298, 18301, 18297, 18293, 18292, 18293, 18295, 18293, 18295, 
18300, 18298, 18296, 18294, 18296, 18299, 18296, 18301, 18301, 
18294, 18292, 18293, 18297, 18293, 18292, 18293, 18292, 18293, 
18292, 18294, 18292, 18295, 18294, 18304, 18301, 18296, 18292, 
18301, 18292, 18297, 18293, 18294, 18292, 18292, 18294, 18294, 
18293, 18293, 18293, 18293, 18293, 18299, 18303, 18293, 18292, 
18294, 18297, 18299, 18301, 18297, 18293, 18292, 18293, 18295, 
18293, 18295, 18300, 18298, 18296, 18294, 18296, 18299, 18296, 
18301, 18301, 18294, 18292, 18293, 18297, 18293, 18292, 18293, 
18292, 18293, 18292, 18294, 18292, 18295, 18294, 18304, 18301, 
18296, 18292, 18301, 18292, 18297, 18293, 18294, 18292, 18292, 
18294, 18294, 18293, 18293, 18293, 18293, 18293, 18299, 18303, 
18293, 18292, 18294, 18297), class = "Date")), class = "data.frame", row.names = c(NA, 
-113L))

Solution

  • Here is an example that uses a simple for loop. This code

    library(bizdays)
    
    create.calendar(name="my_cal", weekdays = c("saturday", "sunday"))
    
    for (idx in 2:nrow(df)) {
      days.to.add   <- df$lengths[idx]
      
      previous.date <- df$st_date[idx - 1]
      
      new.date      <- offset(previous.date, days.to.add, "my_cal")
      
      df$st_date[idx] <- new.date
    }
    
    head(df)
    

    gives you that output.

      lengths values    st_date
    1       1   <NA> 2020-01-30
    2       7     Up 2020-02-10
    3      10   Down 2020-02-24
    4       6     Up 2020-03-03
    5       2   Down 2020-03-05
    6       1     Up 2020-03-06
    

    Here is the result data frame.

    structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 
    4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 
    1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 
    6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 
    1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L, 
    3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L, 
    1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L, 
    1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
    ), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(18291, 
    18302, 18316, 18324, 18326, 18327, 18331, 18337, 18339, 18345, 
    18358, 18367, 18374, 18379, 18386, 18396, 18403, 18417, 18431, 
    18436, 18437, 18439, 18449, 18451, 18452, 18456, 18457, 18459, 
    18460, 18465, 18466, 18472, 18477, 18494, 18508, 18515, 18516, 
    18530, 18533, 18541, 18543, 18548, 18549, 18550, 18555, 18558, 
    18562, 18564, 18568, 18570, 18572, 18584, 18600, 18604, 18605, 
    18610, 18618, 18628, 18642, 18652, 18654, 18655, 18659, 18663, 
    18667, 18673, 18684, 18695, 18702, 18705, 18712, 18724, 18731, 
    18745, 18759, 18764, 18765, 18767, 18775, 18779, 18780, 18782, 
    18785, 18787, 18788, 18793, 18794, 18800, 18803, 18822, 18836, 
    18843, 18844, 18858, 18859, 18869, 18871, 18876, 18877, 18878, 
    18883, 18886, 18890, 18892, 18894, 18898, 18900, 18912, 18928, 
    18932, 18933, 18936, 18946), class = "Date")), row.names = c(NA, 
    -113L), class = "data.frame")
    

    HTH