Search code examples
rdataframedplyr

Add Numbered Column by groups in R


library(dplyr)
library(tidyr)

df <- data.frame(
  SUBJECT = c(1, 2, 3, 3),
  TIME = c(7, 9, 4, 5),
  A = c(1, 0, 1, 0),
  B = c(NA, 1, 1, 1),
  C = c(1, 0, 0, 0)
)


df_long <- df %>%
  pivot_longer(cols = A:C, names_to = "EVENT", values_to = "value") %>%
  filter(!is.na(value) & value == 1) %>%
  select(-value) %>%
  mutate(ADDTHIS= as.character(c('1.1', '1.2', '1', '1.1', '1.2', '2')))
  SUBJECT  TIME EVENT ADDTHIS
    <dbl> <dbl> <chr> <chr>  
1       1     7 A     1.1    
2       1     7 C     1.2    
3       2     9 B     1      
4       3     4 A     1.1    
5       3     4 B     1.2    
6       3     5 B     2

I wish to add the ADDTHIS column so for every SUBJECT-TIME, it is 1.1, 1.2, 1.3, 1.4, etc but then for the next time it resets.


Solution

  • Pretty similar to Darren's:

    ... |>
      mutate(suffix = if(n() > 1) paste0(".", row_number()) else "", .by = c(SUBJECT, TIME)) |>
      mutate(ADDTHIS = paste0(cumsum(!duplicated(TIME)), suffix), .by = SUBJECT) |>
      select(-suffix)
    #  # A tibble: 6 × 4
    #   SUBJECT  TIME EVENT ADDTHIS
    #     <dbl> <dbl> <chr> <chr>  
    # 1       1     7 A     1.1    
    # 2       1     7 C     1.2    
    # 3       2     9 B     1      
    # 4       3     4 A     1.1    
    # 5       3     4 B     1.2    
    # 6       3     5 B     2