I am trying to have weighted means for a list of dependent variables break down by a list of independent variables.
To do this, I first created a function "cross_fun", which will then be mapped in a second function "multi". This works quite well. However, I notice that the mean values are not weighted. It seems that the weights at value_fn are ignored in pivot_wider.
library(tidyverse)
library(rlang)
library(scales)
dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3
DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
mutate(one = 1, # for Tatals
no_weight = 1) # as weight with 1 for all rows
IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of independent variables
DV_List = c("dv1", "dv2") # List of dependent variables
cross_fun <- function(.data, DV, IVs, weight, fun) { # calculate a function for a DV by every IV
List <- list() # initialize a List
df <- .data %>%
select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
for (i in 1:(ncol(df) - 2)) { # a list for every IV
List[[i]] <- df %>%
select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
mutate(ORDER = 1)
}
dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
.x = List,
.f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = {{ DV }}, values_fn = {{ fun }})
) %>%
purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
select(-any_of(c("ORDER"))) %>%
rename(Total = 1) # column one are the Totals
return(dt)
}
DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt, fun = ~weighted.mean(.x, weight = wt, na.rm = TRUE)) %>% # calculate the crosstab for dv1
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1)))
multi <- function(.data, DVs, IVs, weight, ...){ # calculate weighted means for a List of DVs by a List of IVs
Answers <- .data %>% # extract a first column for the resulting table
select(all_of(DVs)) %>%
colnames() %>%
tibble() %>%
select("Variable" = 1)
dt <- .data %>%
select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
map_dfr(all_of(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}, fun = ~weighted.mean(.x, weight = {{ weight }}, na.rm = TRUE)) %>% # map all DVs to the cross_fun from above and bind it to dataframe
cbind(Answers, .) %>% # add the first column
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round
return(dt)
}
DATA %>% # calculate the means for the DVs by the IVs with the weight wt
multi(DVs = DV_List, IVs = IV_List, weight = wt)
DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight is always = 1)
multi(DVs = DV_List, IVs = IV_List, weight = no_weight)
# it seams that the weights are ignored, when using weighted.mean as values_fn in pivot_wider
A Solution: Put the weighted.mean-function here and not as a part fo the pivot_wider-function
Here comes the working RepEx:
library(tidyverse)
library(rlang)
library(scales)
dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3
DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
mutate(one = 1, # for Tatals
no_weight = 1) # as weight with 1 for all rows
IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of column variables
DV_List = c('dv1', 'dv2') # List of dependent variables
cross_fun <- function(.data, DV, IVs, weight, ...) { # calculate a function for a DV by every IV
List <- list() # initialize a List
df <- .data %>%
select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
for (i in 1:(ncol(df) - 2)) { # a list for every IV
List[[i]] <- df %>%
select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
group_by(UV = df[[i]]) %>%
summarise(mean = weighted.mean({{ DV }}, {{weight}})) %>% # THE SOLUTION: Put the weighted.mean-function here and not as a part fo the pivot_wider-function
mutate(ORDER = 1)
}
dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
.x = List,
.f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = mean) # THE SOLUTION: Here only the "mean" from the summarise above
) %>%
purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
select(-any_of(c("ORDER"))) %>%
rename(Total = 1) # column one are the Totals
return(dt)
}
multi <- function(.data, DVs, IVs, weight, fun, ...){ # calculate weighted means for a List of DVs by a List of IVs
Answers <- .data %>% # extract a first column for the resulting table
select(all_of(DVs)) %>%
colnames() %>%
tibble() %>%
select("Variable" = 1)
dt <- .data %>%
select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
map_dfr(syms(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}) %>% # map all DVs to the cross_fun from above and bind it to dataframe
cbind(Answers, .) %>% # add the first column
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round
return(dt)
}
DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1)))
DATA %>% cross_fun(dv2, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1)))
DATA %>% # calculate the means for the DVs by the IVs with the weight wt
multi(DVs = c('dv1', 'dv2'), IVs = IV_List, weight = wt, fun = (sum({{ DV }} * {{ weight }}) /sum({{ weight }})))
DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight always = 1)
multi(DVs = DV_List, IVs = IV_List, weight = no_weight)