I have a dataframe where I have grouped data, I am running summary statistics by group but also want to get summary statistics for everything combined. Is there a simpler way of doing it than doing it twice and combining like follows?
dataDF <- data.frame(
group = rep(c('a', 'b', 'c'), 10),
value1 = rnorm(30),
value2 = 1:30
)
grouped <- dataDF %>%
group_by(group) %>%
summarise(
mean1 = mean(value1),
mean2 = mean(value2),
sd1 = sd(value1),
sd2 = sd(value2),
max1 = max(value1),
max2 = max(value2)
)
total <- dataDF %>%
summarise(
mean1 = mean(value1),
mean2 = mean(value2),
sd1 = sd(value1),
sd2 = sd(value2),
max1 = max(value1),
max2 = max(value2)
)
combined <- rbind(
grouped,
data.frame(
group = 'All',
total
))
> combined
# A tibble: 4 x 7
group mean1 mean2 sd1 sd2 max1 max2
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a 0.336 14.5 1.15 9.08 1.98 28
2 b -0.215 15.5 1.17 9.08 1.30 29
3 c 0.332 16.5 0.874 9.08 2.19 30
4 All 0.151 15.5 1.07 8.80 2.19 30
I have written a function to do this
summarise_with_total <- function(data, func, ...){
# Gets the cohorts which the DF is grouped by
cohorts <- groups(data)
# Results split by cohorts
split <- data %>%
func(...) %>%
data.frame()
# results combined
combined <- data.frame(
setNames(data.frame(matrix(data = "All",
ncol = length(cohorts),
nrow = 1)),
cohorts),
ungroup(data) %>%
func(...)
)
return(rbind(split, combined))
}
dataDF %>%
group_by(group) %>%
summarise_with_total(summarise_at,
.vars = c('value1', 'value2'),
.funs = funs(mean, sd, max))