Search code examples
rdplyrsummarizeacross

summarise_each() with across() for dplyr package


I have this script, I want to know how I can replace summarise_each() with the across() function?

common_bw_elements = df %>% 
 group_by(range_of_commons = cut(common_IDs, 
  breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>% 
   summarise_each(funs(sum), sum_of_instances = frequent)

I am asking this, as I get the following message:

Warning message: summarise_each() is deprecated as of dplyr 0.7.0. Please use across() instead.

My code is very similar to the following post: summarize groups into intervals using dplyr

Any leads on this would be greatly appreciated.

For reference, you can use the following dput()

dput(df)
structure(list(common_IDs = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 17L, 18L, 25L, 26L, 27L, 37L, 51L, 55L, 56L, 63L, 68L, 69L, 70L, 71L, 74L, 76L, 81L, 84L, 86L, 87L, 89L, 90L, 91L, 92L, 101L, 
103L, 108L, 109L, 110L, 113L, 114L, 115L, 116L, 129L, 130L, 131L, 133L, 135L, 136L, 137L, 138L, 139L, 141L, 152L, 153L, 154L, 177L, 178L, 190L, 191L, 196L, 199L, 202L, 203L, 208L, 209L, 210L, 211L, 213L, 214L, 215L, 216L, 218L, 219L, 222L, 223L, 229L, 230L, 231L, 
232L, 239L, 251L, 252L, 254L, 257L, 264L, 265L, 271L, 272L, 273L, 275L, 276L, 277L, 280L, 293L, 294L, 297L, 298L, 299L, 300L, 301L, 304L, 317L, 320L, 337L, 346L, 347L, 364L, 371L, 373L, 386L, 387L, 389L, 412L, 417L, 419L, 420L, 432L, 440L, 441L, 442L, 443L, 451L, 
452L, 453L, 455L, 456L, 457L, 458L, 462L, 463L, 464L, 469L, 470L, 474L, 476L, 477L, 478L, 487L, 488L, 492L, 1484L, 1534L, 1546L, 1561L, 1629L, 1642L, 1670L, 1672L, 1681L, 1698L, 1723L, 1725L, 
1736L, 1738L, 1745L, 1753L, 1759L, 1764L, 1766L, 1767L, 1770L, 1772L, 1775L, 1776L, 1781L, 1784L, 1787L, 1791L, 1802L, 1807L, 1813L, 1815L, 1817L, 1821L, 1823L, 1825L, 1846L, 1850L, 1852L, 
1853L, 1854L, 1857L, 1858L, 1859L, 1868L, 1899L, 1904L, 1911L, 1913L, 1977L, 1997L, 1999L, 2023L, 2079L),
frequent = c(81L, 75L, 10L, 17L, 4L, 4L, 33L, 13L, 31L, 3L, 19L, 22L, 6L, 1L, 11L, 2L, 
1L, 1L, 3L, 14L, 1L, 2L, 1L, 14L, 1L, 9L, 6L, 9L, 2L, 5L, 13L, 4L, 4L, 1L, 4L, 1L, 3L, 1L, 6L, 2L, 1L, 3L, 2L, 5L, 2L, 1L, 17L, 5L, 4L, 4L, 1L, 4L, 7L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 6L, 
16L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 5L, 13L, 6L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 1L, 3L, 1L, 3L, 4L, 1L, 1L, 2L, 3L, 4L, 3L, 3L, 1L, 3L, 2L, 2L, 1L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), 
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -193L))

Solution

  • You can use summarise since you are only summing one variable by group.

    library(tidyverse)
    
    common_bw_elements = df %>%
      group_by(range_of_commons = cut(common_IDs,
                                      breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
      summarise(sum_of_instances = sum(frequent))
    

    Output

       range_of_commons  sum_of_instances
       <fct>                        <int>
     1 (-Inf,0]                        81
     2 (0,5]                          110
     3 (5,10]                          46
     4 (10,20]                         34
     5 (20,30]                         47
     6 (30,60]                         15
     7 (60,100]                        85
     8 (100,200]                       87
     9 (200,300]                       92
    10 (300,600]                       75
    11 (1.2e+03,1.8e+03]               29
    12 (1.8e+03, Inf]                  28
    

    If you had multiple columns to sum, then we would use across (or if you only had a few columns, then instead of everything(), you can provide a vector of column names (e.g., c(common_IDs, frequent)):

    df %>%
      group_by(range_of_commons = cut(common_IDs,
                                      breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
      summarise(across(everything(), ~ sum(.x))) %>%
      rename(sum_of_instances = frequent)
    

    Output

       range_of_commons  common_IDs sum_of_instances
       <fct>                  <int>            <int>
     1 (-Inf,0]                   0               81
     2 (0,5]                     15              110
     3 (5,10]                    13               46
     4 (10,20]                   35               34
     5 (20,30]                   78               47
     6 (30,60]                  199               15
     7 (60,100]                1191               85
     8 (100,200]               3928               87
     9 (200,300]               9392               92
    10 (300,600]              17290               75
    11 (1.2e+03,1.8e+03]      47829               29
    12 (1.8e+03, Inf]         48922               28