I am trying to produce multiple frequency tables that are stratified by multiple independent variables. I can get this to work for one variable and one stratification variable, but my for-loop is broken.
library(tidyverse)
# Create example dataframe of survey data
df <- data.frame(
var1 = sample(1:7, 1000, replace = TRUE),
var2 = sample(1:7, 1000, replace = TRUE),
var3 = sample(1:7, 1000, replace = TRUE),
var4 = sample(1:7, 1000, replace = TRUE),
var5 = sample(1:7, 1000, replace = TRUE),
var6 = sample(1:7, 1000, replace = TRUE),
strat1 = sample(c("A", "B", "C"), 1000, replace = TRUE),
strat2 = sample(c("X", "Y"), 1000, replace = TRUE),
strat3 = sample(c("True", "False"), 1000, replace = TRUE)
)
Example that works for one variable and one stratification variable. I want to convert this code into a for loop:
temp_df <- df %>% count(var1)
temp_df$percent <- temp_df$n / sum(temp_df$n) * 10
strat_df <- temp_df %>%
left_join((df %>% group_by(var1, strat1) %>% count(var1) %>% pivot_wider(names_from = strat1, values_from = n)), by = "var1")
for(k in c("A","B","C")){
strat_df[paste0(k, "_pct")] <- (strat_df[[k]] / temp_df$n) * 100
}
I want this same sort of output, but with added columns for count and _pct of the other two stratification variables.
I've tried using the following for loop, but it's only giving me one row per variable and it only produces two columns for each strat variable, whereas the output I'm looking for would have a raw count and column percentage column for each category within a stratification variable. Since there are 3 strat vars, two having two categories and one having three categories, my desired output would have 13 columns including the column for "v#", "n", and "percent".
# Create a list of the variables of interest
variables <- c("var1", "var2", "var3", "var4", "var5", "var6")
# Create a list of the stratification variables
strats <- c("strat1", "strat2", "strat3")
# Create a loop that runs through each variable
for(i in variables){
# Create a frequency table for the current variable
temp_df <- df %>% count(!! i)
# Add a column for the percent of responses within each response category
temp_df$percent <- temp_df$n / sum(temp_df$n) * 100
# Add a column for the raw count for each category of the stratification variables
for(j in strats){
temp_df <- temp_df %>% group_by(!!i) %>% mutate( !!j := n() )
}
# Add a column for the percent of the stratification variable category within the response category
for(j in strats){
temp_df[paste0(j, "_pct")] <- (temp_df[[j]] / temp_df$n) * 100
}
assign(paste0(i,"_df"), temp_df)
}
UPDATE:
Came up with a solution that outputs what I need:
for(i in variables){
j = sym(i)
temp_df <- df %>% count(!!j)
temp_df$percent <- temp_df$n / sum(temp_df$n) * 10
strat_df <- temp_df %>%
left_join((df %>% group_by(!!j, strat1) %>% count(!!j) %>% pivot_wider(names_from = strat1, values_from = n)), by = i) %>%
left_join((df %>% group_by(!!j, strat2) %>% count(!!j) %>% pivot_wider(names_from = strat2, values_from = n)), by = i) %>%
left_join((df %>% group_by(!!j, strat3) %>% count(!!j) %>% pivot_wider(names_from = strat3, values_from = n)), by = i)
for(k in c("A","B","C","X","Y","True","False")){
strat_df[paste0(k, "_pct")] <- (strat_df[[k]] / temp_df$n) * 100
}
assign(paste0(i,"_df"), strat_df)