Search code examples
rdplyrexpand

Expand each group to the max n of rows


How can I expand a group to length of the max group:

df <- structure(list(ID = c(1L, 1L, 2L, 3L, 3L, 3L), col1 = c("A", 
"B", "O", "U", "L", "R")), class = "data.frame", row.names = c(NA, 
-6L))


ID col1
1 A
1 B
2 O
3 U
3 L
3 R

Desired Output:

1 A
1 B
NA NA
2 O
NA NA
NA NA
3 U
3 L
3 R

Solution

  • You can take advantage of the fact that df[n_bigger_than_nrow,] gives a row of NAs

    dplyr

    max_n <- max(count(df, ID)$n)
    
    df %>% 
      group_by(ID) %>% 
      summarise(cur_data()[seq(max_n),])
    #> `summarise()` has grouped output by 'ID'. You can override using the `.groups`
    #> argument.
    #> # A tibble: 9 × 2
    #> # Groups:   ID [3]
    #>      ID col1 
    #>   <int> <chr>
    #> 1     1 A    
    #> 2     1 B    
    #> 3     1 <NA> 
    #> 4     2 O    
    #> 5     2 <NA> 
    #> 6     2 <NA> 
    #> 7     3 U    
    #> 8     3 L    
    #> 9     3 R
    

    base R

    n <- tapply(df$ID, df$ID, length)
    max_n <- max(n)
    i <- lapply(n, \(x) c(seq(x), rep(Inf, max_n - x)))
    i <- Map(`+`, i, c(0, cumsum(head(n, -1))))
    df <- df[unlist(i),]
    rownames(df) <- NULL
    df$ID <- rep(as.numeric(names(i)), each = max_n)
    
    df
    #>   ID col1
    #> 1  1    A
    #> 2  1    B
    #> 3  1 <NA>
    #> 4  2    O
    #> 5  2 <NA>
    #> 6  2 <NA>
    #> 7  3    U
    #> 8  3    L
    #> 9  3    R