Search code examples
raggregatelevelsgroup

Aggregate by multiple groups and keep the result for each level in R


Is there any possibility to get expected result in simplest way (without intermediate steps, as I did in my code)? I was looking for it and couldn't find any solution...

So, for example:

df <- data.frame(   A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
                    B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
                    C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
                    D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
              value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36), 
              value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
              value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))

I would like to summarise values for each level (A,B,C,D) and combination of them to show the sum of each level in the final table.

A <- df %>%
  group_by(A) %>% 
  summarise(across(value_1:value_3, sum))

B <- df %>% 
  group_by(A,B) %>% 
  summarise(across(value_1:value_3, sum))

C <- df %>% 
  group_by(A,B,C) %>% 
  summarise(across(value_1:value_3, sum))

AD <- df %>% 
  group_by(A,D) %>% 
  summarise(across(value_1:value_3, sum))

BD <- df %>% 
  group_by(A,B,D) %>% 
  summarise(across(value_1:value_3, sum))

result <-bind_rows(A, B, C, AD, BD)
result[is.na(result)] <- "00"
result <- result[,c("A","B","C","D", "value_1", "value_2", "value_3")]
result <- result[order(result$A, result$B, result$C, result$D),]

My data has much more levels, so how could I do it without making intermediate steps like: B, C.. to get the result like this:

> head(result)
# A tibble: 6 x 7
  A     B     C     D     value_1 value_2 value_3
  <chr> <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
1 01    00    00    00         70     109     132
2 01    00    00    1          41      97      92
3 01    00    00    2          29      12      40
4 01    12    00    00         55      62     123
5 01    12    00    1          26      50      92
6 01    12    00    2          29      12      31

Is there any possibility to do it with .groups argument or using aggregate function instead of combination group_by and summarise?


Solution

  • df <- data.frame(   A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
                        B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
                        C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
                        D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
                        value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36), 
                        value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
                        value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
    
    grp_vars <- list("A", c("A", "B"), c("A", "B", "C"))
    
    library(tidyverse)
    
    map_df(grp_vars,
        ~ df %>%
          group_by(across(.x)) %>%
          summarise(across(value_1:value_3, sum), .groups = "drop")) %>%
      mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
      relocate(where(is.numeric), .after = where(is.character))
    
    
    #> # A tibble: 15 x 6
    #>    A     B     C     value_1 value_2 value_3
    #>    <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
    #>  1 01    00    00         70     109     132
    #>  2 02    00    00        256     423      16
    #>  3 01    12    00         55      62     123
    #>  4 01    14    00         15      47       9
    #>  5 02    18    00         34     297       4
    #>  6 02    22    00        222     126      12
    #>  7 01    12    01         39       2      83
    #>  8 01    12    02         16      60      40
    #>  9 01    14    01          0       0       9
    #> 10 01    14    02         15      47       0
    #> 11 02    18    01          0      95       4
    #> 12 02    18    02         16      14       0
    #> 13 02    18    05         18     188       0
    #> 14 02    22    01         74      65       8
    #> 15 02    22    02        148      61       4
    

    Created on 2022-03-31 by the reprex package (v2.0.1)

    if I understood correctly

    map_df(
      grp_vars,
      ~ df %>%
        group_by(across(.x)) %>%
        summarise(across(value_1:value_3, sum), .groups = "drop") %>%
        mutate(level = paste0(.x, collapse = ""))
    ) %>%
      mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
      relocate(level, where(is.numeric), .after = where(is.character))
    
    
    # A tibble: 15 x 7
       A     B     C     level value_1 value_2 value_3
       <chr> <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
     1 01    00    00    A          70     109     132
     2 02    00    00    A         256     423      16
     3 01    12    00    AB         55      62     123
     4 01    14    00    AB         15      47       9
     5 02    18    00    AB         34     297       4
     6 02    22    00    AB        222     126      12
     7 01    12    01    ABC        39       2      83
     8 01    12    02    ABC        16      60      40
     9 01    14    01    ABC         0       0       9
    10 01    14    02    ABC        15      47       0
    11 02    18    01    ABC         0      95       4
    12 02    18    02    ABC        16      14       0
    13 02    18    05    ABC        18     188       0
    14 02    22    01    ABC        74      65       8
    15 02    22    02    ABC       148      61       4