Search code examples
rt-test

T-tests on specific subgroups


I'm trying to calculate mean and SD and then perform t.tests on three different measurements (height, weight, speed) between multiple subgroups.

I started with a simple dataset that only contains two groups (control vs drug) and I have it all working well enough.

simple.df<-
structure(list(trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L), levels = c("control", "drug"), class = "factor"), height = c(15, 
17, 25, 21, 11, 29, 18, 20), weight = c(80, 90, 81, 79, 200, 
230, 215, 210), speed = c(50, 45, 60, 51, 52, 80, 41, 19)), class = "data.frame", row.names = c(NA, 
-8L))

library(rstatix)
simple.df %>% group_by(trial) %>% get_summary_stats(type = "mean_sd")

testing<- data.frame(lapply(simple.df[-1], function(x) t.test(x~simple.df$trial)$p.value))

testing

Where I'm running into trouble is with the t.testing on a larger experiment similar to the dataframe below. I still have control vs drug and height, weight & speed, but now all the measurements were done at two timepoints in both males and females. I'm only concerned with comparing control versus drug for the same sex/age. I'm still ok calculating the mean and SD for each group, but have gotten stuck with figuring out the t-testing. Specifically, I just want the t-test on each of the three measurements for drug vs control in young males, drug vs control in old males, drug vs control in young females and drug vs control in old females, so 12 p-values total with some identification for what comparison each value represents.

Thanks for your help and expertise!

big.df<- structure(list(age = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("old", "young"
), class = "factor"), sex = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("f", "m"), class = "factor"), 
    trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L), levels = c("control", "drug"
    ), class = "factor"), height = c(15L, 17L, 25L, 21L, 11L, 
    29L, 18L, 20L, 300L, 320L, 316L, 325L, 170L, 175L, 172L, 
    180L, 28L, 40L, 33L, 35L, 60L, 45L, 67L, 52L, 250L, 260L, 
    240L, 248L, 11L, 19L, 16L, 4L), weight = c(80L, 90L, 81L, 
    79L, 200L, 230L, 215L, 210L, 152L, 150L, 148L, 155L, 160L, 
    158L, 157L, 140L, 176L, 164L, 135L, 196L, 175L, 178L, 120L, 
    147L, 160L, 155L, 175L, 142L, 139L, 142L, 150L, 145L), speed = c(50L, 
    45L, 60L, 51L, 52L, 80L, 41L, 19L, 55L, 56L, 61L, 67L, 85L, 
    90L, 100L, 77L, 90L, 80L, 77L, 80L, 81L, 95L, 87L, 91L, 50L, 
    60L, 55L, 59L, 71L, 65L, 66L, 62L)), row.names = c(NA, -32L
), class = "data.frame")

big.df %>% group_by (sex, age, trial) %>% 
  get_summary_stats (type = "mean_sd") %>%
  arrange (variable, sex, age, trial)


Solution

  • I hope this code will work out for you

    enter image description here

    big.df<- structure(list(age = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                              2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                              1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("old", "young"
                                              ), class = "factor"), sex = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                      2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                      2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("f", "m"), class = "factor"), 
                            trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
                                                1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
                                                1L, 1L, 1L, 2L, 2L, 2L, 2L), levels = c("control", "drug"
                                                ), class = "factor"), height = c(15L, 17L, 25L, 21L, 11L, 
                                                                                 29L, 18L, 20L, 300L, 320L, 316L, 325L, 170L, 175L, 172L, 
                                                                                 180L, 28L, 40L, 33L, 35L, 60L, 45L, 67L, 52L, 250L, 260L, 
                                                                                 240L, 248L, 11L, 19L, 16L, 4L), weight = c(80L, 90L, 81L, 
                                                                                                                            79L, 200L, 230L, 215L, 210L, 152L, 150L, 148L, 155L, 160L, 
                                                                                                                            158L, 157L, 140L, 176L, 164L, 135L, 196L, 175L, 178L, 120L, 
                                                                                                                            147L, 160L, 155L, 175L, 142L, 139L, 142L, 150L, 145L), speed = c(50L, 
                                                                                                                                                                                             45L, 60L, 51L, 52L, 80L, 41L, 19L, 55L, 56L, 61L, 67L, 85L, 
                                                                                                                                                                                             90L, 100L, 77L, 90L, 80L, 77L, 80L, 81L, 95L, 87L, 91L, 50L, 
                                                                                                                                                                                             60L, 55L, 59L, 71L, 65L, 66L, 62L)), row.names = c(NA, -32L
                                                                                                                                                                                             ), class = "data.frame")
    
    
    # A function to extract the 3 comparrisons
    multi_t <- function(a_sex,a_age){
      df_func <- big.df %>% filter(sex==a_sex,age==a_age)
    
      h <- t.test(height~trial,df_func)$p.value
      w <- t.test(weight~trial,df_func)$p.value
      s <- t.test(speed~trial,df_func)$p.value
      
      # cat(
      #   "sex =",a_sex,"\nage =",a_age,"\n\n"
      # )
      return(cbind(height=h,weight=w,speed=s))
    }
    
    
    # Table in a long version
    ptable <- data.frame(
    multi_t("m","young"),
    multi_t("m","old"),
    multi_t("f","young"),
    multi_t("f","old")
    ) %>% pivot_longer(cols=everything(),
                       names_to = "value",
                       values_to = "p.values") %>% 
      mutate(comparison = rep(c("young males","old males",
                                "young females","old females"),each=3),
             value=str_remove_all(value,"\\.\\d"))
    ptable
    
    # Table in a wider version
    ptable %>% group_by(value) %>% mutate(id=row_number()) %>% 
      pivot_wider(names_from = value,values_from = p.values) %>% 
      select(-id)
    
    ptable %>% 
      mutate(sig=p.values<0.05) %>% 
      ggplot(aes(x=value,y=p.values,color=sig))+
      geom_point(show.legend = T)+facet_wrap(~comparison,scales="free")+
      theme(legend.position = "bottom")+
      labs(title="P values of 3 different measurements",
           subtitle = "For 4 different populations")