As a follow up on a recent SO question (see here) I am wondering how to perform multiple t.tests in R
with weighted data (package srvyr
). I cant make it run and would be happy if anyone could help me here. I added a random sample in the code below.
Many thanks!
#create data
surveydata <- as.data.frame(replicate(1,sample(1:5,1000,rep=TRUE)))
colnames(surveydata)[1] <- "q1"
surveydata$q2 <- sample(6, size = nrow(surveydata), replace = TRUE)
surveydata$q3 <- sample(6, size = nrow(surveydata), replace = TRUE)
surveydata$q4 <- sample(6, size = nrow(surveydata), replace = TRUE)
surveydata$group <- c(1,2)
#replace all value "6" wir NA
surveydata[surveydata == 6] <- NA
#add NAs to group 1 in q1
surveydata$q1[which(surveydata$q1==1 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==2 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==3 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==4 & surveydata$group==1)] = NA
surveydata$q1[which(surveydata$q1==5 & surveydata$group==1)] = NA
#add weights
surveydata$weights <- round(runif(nrow(surveydata), min=0.2, max=1.5), 3)
#create vector for relevant questions
rquest <- names(surveydata)[1:4]
# create survey design
library(srvyr)
surveydesign <- surveydata %>%
as_survey_design(strata = group, weights = weights, variables = c("group", all_of(rquest)))
# perform multiple t.test (doesn't work yet)
outcome <- do.call(rbind, lapply(names(surveydesign$variables)[-1], function(i) {
tryCatch({
test <- t.test(as.formula(paste(i, "~ survey")), data = surveydesign)
data.frame(question = i,
group1 = test$estimate[1],
group2 = test$estimate[2],
difference = diff(test$estimate),
p_value = test$p.value, row.names = 1)
}, error = function(e) {
data.frame(question = i,
group1 = NA,
group2 = NA,
difference = NA,
p_value = NA, row.names = 1)
})
}))
As I understand it you have a series of question columns in the example q1 to q4. You've used srvyr
to generate a weights
column. It is possible in our data that for a particular question one entire group maybe all NA
and you'd like to generate results into a df even when that is true. You want a weighted Student's t-test
making use of the weights
column not a simple t-test. The only function I found that provides that is weights::wtd.t.test
which doesn't offer a formula interface but wants to be fed vectors.
In order of steps taken:
library(srvyr)
library(dplyr)
library(rlang)
library(weights)
NA
s by variable, pull
s the vectors for x
, y
, weightx
, weighty
, runs the test, and extracts the info you want into a df row.multiple_wt_ttest <- function(i) {
i <- ensym(i)
xxx <- surveydata %>%
filter(!is.na(!!i)) %>%
split(.$group)
newx <- pull(xxx[[1]], i)
newy <- pull(xxx[[2]], i)
wtx <- pull(xxx[[1]], weights)
wty <- pull(xxx[[2]], weights)
test <- wtd.t.test(x = newx,
y = newy,
weight = wtx,
weighty = wty,
samedata = FALSE)
data.frame(question = rlang::as_name(i),
group1 = test$additional[[2]],
group2 = test$additional[[3]],
difference = test$additional[[1]],
p.value = test$coefficients[[3]])
}
lapply
to apply it column by column (notice it handles the case in q2
where group == 1
is all NA
.lapply(names(surveydata)[1:4], multiple_wt_ttest)
#> [[1]]
#> question group1 group2 difference p.value
#> 1 q1 NaN 3.010457 NaN NA
#>
#> [[2]]
#> question group1 group2 difference p.value
#> 1 q2 3.009003 3.071842 -0.06283922 0.515789
#>
#> [[3]]
#> question group1 group2 difference p.value
#> 1 q3 2.985096 2.968867 0.0162288 0.8734034
#>
#> [[4]]
#> question group1 group2 difference p.value
#> 1 q4 2.856255 3.047787 -0.1915322 0.04290471
do.call
and rbind
to make the df you desiredo.call(rbind, lapply(names(surveydata)[1:4], multiple_wt_ttest))
#> question group1 group2 difference p.value
#> 1 q1 NaN 3.010457 NaN NA
#> 2 q2 3.009003 3.071842 -0.06283922 0.51578905
#> 3 q3 2.985096 2.968867 0.01622880 0.87340343
#> 4 q4 2.856255 3.047787 -0.19153218 0.04290471
Your data (without showing all the gyrations to create it and heading the first 200 rows)
surveydata <-
structure(list(q1 = c(NA, 1L, NA, 4L, NA, 5L, NA, 3L, NA, 5L,
NA, 5L, NA, 1L, NA, 5L, NA, 3L, NA, 4L, NA, 5L, NA, 4L, NA, 2L,
NA, 5L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 5L,
NA, 4L, NA, 4L, NA, 3L, NA, 4L, NA, 2L, NA, 4L, NA, 3L, NA, 1L,
NA, 1L, NA, 3L, NA, 5L, NA, 3L, NA, 5L, NA, 5L, NA, 4L, NA, 2L,
NA, 5L, NA, 1L, NA, 3L, NA, 2L, NA, 5L, NA, 4L, NA, 1L, NA, 5L,
NA, 2L, NA, 2L, NA, 4L, NA, 1L, NA, 3L, NA, 4L, NA, 5L, NA, 3L,
NA, 5L, NA, 1L, NA, 1L, NA, 3L, NA, 2L, NA, 4L, NA, 4L, NA, 1L,
NA, 4L, NA, 3L, NA, 2L, NA, 3L, NA, 5L, NA, 2L, NA, 5L, NA, 2L,
NA, 1L, NA, 5L, NA, 2L, NA, 1L, NA, 2L, NA, 3L, NA, 2L, NA, 3L,
NA, 4L, NA, 4L, NA, 3L, NA, 1L, NA, 3L, NA, 1L, NA, 5L, NA, 3L,
NA, 5L, NA, 4L, NA, 1L, NA, 4L, NA, 1L, NA, 3L, NA, 1L, NA, 4L,
NA, 5L, NA, 4L, NA, 4L, NA, 3L, NA, 3L, NA, 2L, NA, 1L), q2 = c(NA,
2L, 2L, 1L, 5L, 4L, 3L, 2L, 4L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, NA,
2L, 2L, 5L, 5L, 4L, 5L, 4L, NA, 1L, 3L, 4L, 5L, 2L, NA, 5L, 2L,
NA, 4L, 4L, 5L, 4L, 1L, NA, 5L, 1L, 4L, 2L, 1L, NA, 5L, 1L, 3L,
2L, 4L, NA, 2L, NA, 1L, 4L, NA, 2L, 3L, NA, 3L, 1L, 1L, 1L, 1L,
1L, 4L, 5L, 1L, 4L, 5L, 4L, NA, 2L, 3L, 2L, 2L, 2L, 4L, 2L, 3L,
5L, NA, 2L, NA, NA, 5L, 2L, 3L, 2L, 1L, 5L, 3L, 2L, 1L, 2L, NA,
1L, 3L, 5L, 5L, 1L, 1L, NA, 3L, 3L, 1L, 2L, 3L, 3L, 2L, 4L, 2L,
5L, 4L, 3L, 1L, NA, 4L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 4L, 5L, 4L,
1L, 3L, NA, 1L, 3L, 5L, 2L, 1L, 3L, 3L, NA, NA, 5L, NA, 5L, 2L,
5L, 2L, NA, NA, NA, 1L, 4L, 3L, 2L, 3L, 1L, 3L, 5L, 1L, 2L, 3L,
5L, 4L, 4L, NA, NA, 5L, 2L, 3L, 3L, 2L, 2L, 1L, 3L, 1L, 4L, 5L,
2L, 5L, 3L, 1L, 5L, NA, 4L, 3L, 5L, 1L, 1L, 3L, 4L, 4L, 1L, 4L,
3L, 3L, NA, 2L, 3L, 5L, 5L), q3 = c(4L, 4L, 1L, NA, 4L, 5L, 1L,
3L, 4L, 4L, 1L, 3L, 2L, 1L, 2L, 4L, 2L, 3L, 4L, 4L, 1L, 3L, 4L,
5L, 5L, 1L, 3L, 5L, 1L, 2L, 1L, 5L, 5L, 3L, 1L, 3L, 1L, 5L, 1L,
3L, NA, NA, 3L, 5L, NA, 2L, 2L, 1L, 1L, 3L, 5L, 5L, 2L, NA, 5L,
2L, 3L, NA, NA, 3L, 2L, 5L, 2L, 1L, NA, NA, 4L, 2L, NA, 1L, NA,
NA, 5L, 3L, 5L, 4L, 2L, 4L, NA, 2L, 4L, 5L, NA, 2L, 1L, 3L, NA,
5L, 5L, 4L, 5L, 1L, 5L, 4L, 5L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, NA,
NA, 5L, 1L, 2L, 5L, 5L, 5L, 3L, 3L, 3L, 2L, 4L, NA, 3L, NA, 3L,
4L, 2L, 2L, 5L, 1L, NA, 1L, NA, 2L, 2L, 3L, 2L, 5L, 1L, 4L, 4L,
3L, 5L, 5L, NA, NA, 4L, NA, 5L, 1L, 1L, 2L, 5L, 4L, 5L, 4L, 1L,
1L, NA, 4L, 4L, 4L, 5L, 1L, NA, 2L, 3L, NA, 1L, NA, NA, NA, 4L,
2L, 4L, 2L, 1L, 1L, 2L, 1L, 5L, 1L, 3L, 3L, 4L, NA, 1L, 1L, 1L,
3L, 5L, 1L, NA, 3L, 5L, 5L, 4L, NA, 1L, 4L, 5L, 3L, 5L, NA, 1L,
4L), q4 = c(NA, 3L, 1L, 1L, 2L, NA, 1L, 5L, 1L, 3L, 3L, 1L, 3L,
5L, 1L, 3L, 2L, 1L, 1L, 3L, 5L, 5L, NA, 5L, 5L, 5L, 4L, 4L, 4L,
3L, 3L, 2L, 1L, 3L, 5L, 3L, 1L, 5L, NA, 3L, 2L, 5L, 4L, 4L, 4L,
2L, 1L, 1L, 2L, 5L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 4L, 4L, 1L, 2L,
3L, 3L, 4L, NA, 3L, 3L, 2L, 2L, NA, 3L, 5L, 4L, 4L, 3L, 3L, 4L,
NA, NA, 3L, NA, 1L, NA, 3L, 3L, 3L, 2L, 3L, 3L, 4L, 1L, 1L, 2L,
5L, 1L, 1L, 5L, 2L, 2L, 2L, 3L, 4L, 5L, 3L, NA, NA, 2L, 2L, 3L,
2L, 3L, 2L, 3L, 1L, 3L, 3L, 4L, 5L, NA, 4L, 4L, 3L, 1L, 4L, 5L,
4L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 1L, 5L, 2L, NA, 4L, 2L,
1L, 3L, 3L, 4L, 3L, 2L, 4L, 5L, 4L, 2L, 3L, 5L, 1L, NA, 3L, 2L,
5L, NA, 1L, 2L, 4L, 5L, 2L, NA, 1L, 3L, NA, 3L, 3L, 3L, 5L, 4L,
5L, 3L, 3L, NA, 4L, 2L, 3L, 2L, 5L, 4L, 4L, 5L, 5L, 3L, 2L, NA,
4L, 1L, 5L, 2L, 4L, 3L, 4L, NA, 3L, 1L, 3L), group = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1", "2"), class = "factor"),
weights = c(1.445, 0.408, 0.621, 0.961, 1.492, 0.625, 1.131,
0.246, 0.612, 0.621, 1.311, 0.649, 1.282, 0.898, 1.268, 0.641,
0.764, 0.759, 0.306, 0.707, 0.899, 0.785, 1.279, 0.458, 0.882,
0.384, 1.492, 0.468, 0.785, 0.707, 0.489, 1.113, 0.692, 0.293,
0.642, 1.327, 0.362, 1.405, 1.173, 0.732, 0.661, 0.522, 1.001,
0.374, 1.181, 0.819, 1.389, 0.43, 0.477, 0.879, 0.634, 0.417,
0.359, 1.007, 0.866, 0.203, 1.469, 0.294, 1.326, 1.391, 0.871,
1.036, 1.251, 0.417, 1.074, 1.268, 0.963, 0.469, 0.215, 1.074,
0.644, 1.054, 0.787, 0.714, 0.568, 0.397, 1.421, 0.692, 0.262,
0.644, 0.793, 0.808, 0.25, 0.842, 1.039, 0.359, 0.987, 1.257,
0.301, 0.203, 0.823, 1.328, 1.192, 0.256, 1.099, 0.668, 1.129,
0.413, 0.266, 1.121, 0.893, 1.484, 0.568, 1.255, 0.531, 0.461,
0.773, 0.298, 0.233, 0.676, 0.478, 0.806, 0.556, 0.201, 0.801,
0.348, 1.396, 0.552, 0.384, 0.615, 0.499, 0.819, 0.954, 0.943,
0.956, 0.323, 0.706, 0.699, 0.9, 1.156, 1.436, 1.115, 0.762,
0.258, 1.421, 0.644, 1.349, 0.251, 0.735, 0.479, 1.055, 1.395,
1.062, 1.155, 0.869, 0.436, 0.415, 0.745, 1.247, 0.21, 0.879,
0.776, 0.747, 0.835, 0.609, 0.733, 0.563, 1.067, 1.436, 0.679,
1.497, 1.385, 1.087, 1.286, 0.503, 0.738, 0.504, 0.665, 1.421,
1.288, 0.691, 0.972, 0.467, 0.425, 0.406, 0.862, 0.749, 0.935,
0.291, 0.444, 1.118, 1.048, 0.886, 0.982, 0.578, 1.402, 0.778,
1.139, 0.804, 0.618, 1.147, 0.594, 0.984, 0.986, 0.941, 0.794,
0.323, 1.41, 0.902, 0.417)), row.names = c(NA, 200L), class = "data.frame")