I am trying to produce a formatted html table which has columns for frequency, cumulative frequency, column percentage, and cumulative column percentage. The table should also have the data subsetted by a grouping variable, and including a group total.
I can almost achieve this using a combination of dplyr and tidyr, but the output is a dataframe which doesn't look so pretty. I wonder if there is an easier way using the tables::tabulate
command?
# Sample data
dat <- data.frame(
id = 1:100,
group = factor(sample(c("A", "B"), 100, replace = TRUE)),
sessions = factor(sample(1:10, 100, replace = TRUE))
)
# dplyr/tidyr solution
library(dplyr)
library(tidyr)
dat %>%
group_by(group, sessions) %>%
tally() %>%
spread(key = group, value = n) %>%
mutate(All = rowSums(.[-1])) %>%
gather(key = group, value = n, -sessions) %>%
group_by(group) %>%
mutate(
cum_n = cumsum(n),
p = round(n / sum(n)*100,1),
cum_p = round(cum_n / sum(n)*100,1),
) %>%
data.frame() %>%
reshape(timevar = "group", idvar = "sessions", direction = "wide")
# As far as I get using tables::tabulate
library(tables)
tabular(
Factor(sessions, "Sessions") ~
(Heading()*group + 1) *
(
(n = 1) +
# (cum_n = ??) +
Heading("%")*Percent(denom = "col")*Format(digits = 2)
# + Heading("cum_%")*??*Format(digits = 2)
),
data = dat
)
I would recommend using knitr::kable
and kableExtra
, amazing packages for producing tables. You can also set it up for multiple format outputs, for example using the same code to produce html
and latex
for pdf.
library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
dat %>%
group_by(group, sessions) %>%
tally() %>%
spread(key = group, value = n) %>%
mutate(All = rowSums(.[-1])) %>%
gather(key = group, value = n, -sessions) %>%
group_by(group) %>%
mutate(
cum_n = cumsum(n),
p = round(n / sum(n)*100,1),
cum_p = round(cum_n / sum(n)*100,1),
) %>%
data.frame() %>%
reshape(timevar = "group", idvar = "sessions", direction = "wide") %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover"))