Search code examples
rdatedplyrgrouping

Create a variable days after treatment


Here's my dataset:

df = data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2), 
                treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
                date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", 
"2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
"2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", 
"2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
"2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")))

I need to create a variale which reflects date after treatment for each id. Like this:

df = data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2), 
                treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
                date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
                         "2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", "2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
                         "2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")),
                dat = c(0,0,0,1,2,3,0,1,2,3,4,5,6,0,0,1,0,1,2,3)
)

Can you help me with this?


Solution

  • Here is a way.

    df <- data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2),
                    treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
                    date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", 
                                            "2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
                                            "2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", 
                                            "2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
                                            "2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")))
    
    suppressPackageStartupMessages(library(dplyr))
    
    df %>%
      group_by(id) %>%
      mutate(days = cumsum(treatment)) %>%
      group_by(id, days) %>%
      mutate(days = ifelse(days > 0, row_number() - 1L, 0)) %>%
      ungroup()
    #> # A tibble: 20 × 4
    #>       id treatment date        days
    #>    <dbl>     <dbl> <date>     <dbl>
    #>  1     1         0 2019-07-07     0
    #>  2     1         0 2019-07-07     0
    #>  3     1         1 2019-07-07     0
    #>  4     1         0 2019-07-07     1
    #>  5     1         0 2019-07-07     2
    #>  6     1         0 2019-07-06     3
    #>  7     1         1 2019-07-06     0
    #>  8     1         0 2019-07-05     1
    #>  9     1         0 2019-07-05     2
    #> 10     1         0 2019-04-20     3
    #> 11     1         0 2019-04-20     4
    #> 12     1         0 2019-04-20     5
    #> 13     1         0 2019-04-20     6
    #> 14     2         0 2019-04-19     0
    #> 15     2         1 2019-04-19     0
    #> 16     2         0 2019-03-14     1
    #> 17     2         1 2019-03-14     0
    #> 18     2         0 2019-03-14     1
    #> 19     2         0 2019-03-14     2
    #> 20     2         0 2019-03-14     3
    

    Created on 2022-08-24 by the reprex package (v2.0.1)