I have this script, I want to know how I can replace summarise_each()
with the across()
function?
common_bw_elements = df %>%
group_by(range_of_commons = cut(common_IDs,
breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
summarise_each(funs(sum), sum_of_instances = frequent)
I am asking this, as I get the following message:
Warning message: summarise_each() is deprecated as of dplyr 0.7.0. Please use across() instead.
My code is very similar to the following post: summarize groups into intervals using dplyr
Any leads on this would be greatly appreciated.
For reference, you can use the following dput()
dput(df)
structure(list(common_IDs = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 17L, 18L, 25L, 26L, 27L, 37L, 51L, 55L, 56L, 63L, 68L, 69L, 70L, 71L, 74L, 76L, 81L, 84L, 86L, 87L, 89L, 90L, 91L, 92L, 101L,
103L, 108L, 109L, 110L, 113L, 114L, 115L, 116L, 129L, 130L, 131L, 133L, 135L, 136L, 137L, 138L, 139L, 141L, 152L, 153L, 154L, 177L, 178L, 190L, 191L, 196L, 199L, 202L, 203L, 208L, 209L, 210L, 211L, 213L, 214L, 215L, 216L, 218L, 219L, 222L, 223L, 229L, 230L, 231L,
232L, 239L, 251L, 252L, 254L, 257L, 264L, 265L, 271L, 272L, 273L, 275L, 276L, 277L, 280L, 293L, 294L, 297L, 298L, 299L, 300L, 301L, 304L, 317L, 320L, 337L, 346L, 347L, 364L, 371L, 373L, 386L, 387L, 389L, 412L, 417L, 419L, 420L, 432L, 440L, 441L, 442L, 443L, 451L,
452L, 453L, 455L, 456L, 457L, 458L, 462L, 463L, 464L, 469L, 470L, 474L, 476L, 477L, 478L, 487L, 488L, 492L, 1484L, 1534L, 1546L, 1561L, 1629L, 1642L, 1670L, 1672L, 1681L, 1698L, 1723L, 1725L,
1736L, 1738L, 1745L, 1753L, 1759L, 1764L, 1766L, 1767L, 1770L, 1772L, 1775L, 1776L, 1781L, 1784L, 1787L, 1791L, 1802L, 1807L, 1813L, 1815L, 1817L, 1821L, 1823L, 1825L, 1846L, 1850L, 1852L,
1853L, 1854L, 1857L, 1858L, 1859L, 1868L, 1899L, 1904L, 1911L, 1913L, 1977L, 1997L, 1999L, 2023L, 2079L),
frequent = c(81L, 75L, 10L, 17L, 4L, 4L, 33L, 13L, 31L, 3L, 19L, 22L, 6L, 1L, 11L, 2L,
1L, 1L, 3L, 14L, 1L, 2L, 1L, 14L, 1L, 9L, 6L, 9L, 2L, 5L, 13L, 4L, 4L, 1L, 4L, 1L, 3L, 1L, 6L, 2L, 1L, 3L, 2L, 5L, 2L, 1L, 17L, 5L, 4L, 4L, 1L, 4L, 7L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 6L,
16L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 5L, 13L, 6L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 1L, 3L, 1L, 3L, 4L, 1L, 1L, 2L, 3L, 4L, 3L, 3L, 1L, 3L, 2L, 2L, 1L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -193L))
You can use summarise
since you are only summing one variable by group.
library(tidyverse)
common_bw_elements = df %>%
group_by(range_of_commons = cut(common_IDs,
breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
summarise(sum_of_instances = sum(frequent))
Output
range_of_commons sum_of_instances
<fct> <int>
1 (-Inf,0] 81
2 (0,5] 110
3 (5,10] 46
4 (10,20] 34
5 (20,30] 47
6 (30,60] 15
7 (60,100] 85
8 (100,200] 87
9 (200,300] 92
10 (300,600] 75
11 (1.2e+03,1.8e+03] 29
12 (1.8e+03, Inf] 28
If you had multiple columns to sum, then we would use across
(or if you only had a few columns, then instead of everything()
, you can provide a vector of column names (e.g., c(common_IDs, frequent)
):
df %>%
group_by(range_of_commons = cut(common_IDs,
breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
summarise(across(everything(), ~ sum(.x))) %>%
rename(sum_of_instances = frequent)
Output
range_of_commons common_IDs sum_of_instances
<fct> <int> <int>
1 (-Inf,0] 0 81
2 (0,5] 15 110
3 (5,10] 13 46
4 (10,20] 35 34
5 (20,30] 78 47
6 (30,60] 199 15
7 (60,100] 1191 85
8 (100,200] 3928 87
9 (200,300] 9392 92
10 (300,600] 17290 75
11 (1.2e+03,1.8e+03] 47829 29
12 (1.8e+03, Inf] 48922 28