I'm trying to calculate mean and SD and then perform t.tests on three different measurements (height, weight, speed) between multiple subgroups.
I started with a simple dataset that only contains two groups (control vs drug) and I have it all working well enough.
simple.df<-
structure(list(trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L), levels = c("control", "drug"), class = "factor"), height = c(15,
17, 25, 21, 11, 29, 18, 20), weight = c(80, 90, 81, 79, 200,
230, 215, 210), speed = c(50, 45, 60, 51, 52, 80, 41, 19)), class = "data.frame", row.names = c(NA,
-8L))
library(rstatix)
simple.df %>% group_by(trial) %>% get_summary_stats(type = "mean_sd")
testing<- data.frame(lapply(simple.df[-1], function(x) t.test(x~simple.df$trial)$p.value))
testing
Where I'm running into trouble is with the t.testing on a larger experiment similar to the dataframe below. I still have control vs drug and height, weight & speed, but now all the measurements were done at two timepoints in both males and females. I'm only concerned with comparing control versus drug for the same sex/age. I'm still ok calculating the mean and SD for each group, but have gotten stuck with figuring out the t-testing. Specifically, I just want the t-test on each of the three measurements for drug vs control in young males, drug vs control in old males, drug vs control in young females and drug vs control in old females, so 12 p-values total with some identification for what comparison each value represents.
Thanks for your help and expertise!
big.df<- structure(list(age = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("old", "young"
), class = "factor"), sex = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("f", "m"), class = "factor"),
trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L), levels = c("control", "drug"
), class = "factor"), height = c(15L, 17L, 25L, 21L, 11L,
29L, 18L, 20L, 300L, 320L, 316L, 325L, 170L, 175L, 172L,
180L, 28L, 40L, 33L, 35L, 60L, 45L, 67L, 52L, 250L, 260L,
240L, 248L, 11L, 19L, 16L, 4L), weight = c(80L, 90L, 81L,
79L, 200L, 230L, 215L, 210L, 152L, 150L, 148L, 155L, 160L,
158L, 157L, 140L, 176L, 164L, 135L, 196L, 175L, 178L, 120L,
147L, 160L, 155L, 175L, 142L, 139L, 142L, 150L, 145L), speed = c(50L,
45L, 60L, 51L, 52L, 80L, 41L, 19L, 55L, 56L, 61L, 67L, 85L,
90L, 100L, 77L, 90L, 80L, 77L, 80L, 81L, 95L, 87L, 91L, 50L,
60L, 55L, 59L, 71L, 65L, 66L, 62L)), row.names = c(NA, -32L
), class = "data.frame")
big.df %>% group_by (sex, age, trial) %>%
get_summary_stats (type = "mean_sd") %>%
arrange (variable, sex, age, trial)
I hope this code will work out for you
big.df<- structure(list(age = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("old", "young"
), class = "factor"), sex = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("f", "m"), class = "factor"),
trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L), levels = c("control", "drug"
), class = "factor"), height = c(15L, 17L, 25L, 21L, 11L,
29L, 18L, 20L, 300L, 320L, 316L, 325L, 170L, 175L, 172L,
180L, 28L, 40L, 33L, 35L, 60L, 45L, 67L, 52L, 250L, 260L,
240L, 248L, 11L, 19L, 16L, 4L), weight = c(80L, 90L, 81L,
79L, 200L, 230L, 215L, 210L, 152L, 150L, 148L, 155L, 160L,
158L, 157L, 140L, 176L, 164L, 135L, 196L, 175L, 178L, 120L,
147L, 160L, 155L, 175L, 142L, 139L, 142L, 150L, 145L), speed = c(50L,
45L, 60L, 51L, 52L, 80L, 41L, 19L, 55L, 56L, 61L, 67L, 85L,
90L, 100L, 77L, 90L, 80L, 77L, 80L, 81L, 95L, 87L, 91L, 50L,
60L, 55L, 59L, 71L, 65L, 66L, 62L)), row.names = c(NA, -32L
), class = "data.frame")
# A function to extract the 3 comparrisons
multi_t <- function(a_sex,a_age){
df_func <- big.df %>% filter(sex==a_sex,age==a_age)
h <- t.test(height~trial,df_func)$p.value
w <- t.test(weight~trial,df_func)$p.value
s <- t.test(speed~trial,df_func)$p.value
# cat(
# "sex =",a_sex,"\nage =",a_age,"\n\n"
# )
return(cbind(height=h,weight=w,speed=s))
}
# Table in a long version
ptable <- data.frame(
multi_t("m","young"),
multi_t("m","old"),
multi_t("f","young"),
multi_t("f","old")
) %>% pivot_longer(cols=everything(),
names_to = "value",
values_to = "p.values") %>%
mutate(comparison = rep(c("young males","old males",
"young females","old females"),each=3),
value=str_remove_all(value,"\\.\\d"))
ptable
# Table in a wider version
ptable %>% group_by(value) %>% mutate(id=row_number()) %>%
pivot_wider(names_from = value,values_from = p.values) %>%
select(-id)
ptable %>%
mutate(sig=p.values<0.05) %>%
ggplot(aes(x=value,y=p.values,color=sig))+
geom_point(show.legend = T)+facet_wrap(~comparison,scales="free")+
theme(legend.position = "bottom")+
labs(title="P values of 3 different measurements",
subtitle = "For 4 different populations")