Search code examples
rpivotmeanweighted

R weights are ignored, while using weighted.mean as value_fn in pivot.wider


I am trying to have weighted means for a list of dependent variables break down by a list of independent variables.

To do this, I first created a function "cross_fun", which will then be mapped in a second function "multi". This works quite well. However, I notice that the mean values are not weighted. It seems that the weights at value_fn are ignored in pivot_wider.

library(tidyverse)
library(rlang)
library(scales)

dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3

DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
  mutate(one = 1, # for Tatals
         no_weight = 1) # as weight with 1 for all rows

IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of independent variables 
DV_List = c("dv1", "dv2") # List of dependent variables

cross_fun <- function(.data, DV, IVs, weight, fun) { # calculate a function for  a DV by every IV
  
 List <- list() # initialize a List
  
  df <- .data %>% 
    select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
  
  for (i in 1:(ncol(df) - 2)) { # a list for every IV
    List[[i]] <- df %>%
      select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
      mutate(ORDER = 1) 
  }
  
    dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
      .x = List,
      .f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = {{ DV }}, values_fn = {{ fun }}) 
    ) %>%
      purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
      select(-any_of(c("ORDER"))) %>%
      rename(Total = 1) # column one are the Totals
  return(dt)
}

DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt, fun = ~weighted.mean(.x, weight = wt, na.rm = TRUE)) %>% # calculate the crosstab for dv1
  mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 

multi <- function(.data, DVs, IVs, weight, ...){ # calculate weighted means for a List of DVs by a List of IVs
  
  Answers <- .data %>% # extract a first column for the resulting table
    select(all_of(DVs)) %>% 
    colnames() %>% 
    tibble() %>% 
    select("Variable" = 1)
  
    dt <- .data %>%
      select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
      map_dfr(all_of(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}, fun = ~weighted.mean(.x, weight = {{ weight }}, na.rm = TRUE)) %>% # map all DVs to the cross_fun from above and bind it to dataframe
      cbind(Answers, .) %>% # add the first column
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round 

  return(dt)
}

DATA %>% # calculate the means for the DVs by the IVs with the weight wt
  multi(DVs = DV_List, IVs = IV_List, weight = wt)

DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight is always = 1)
  multi(DVs = DV_List, IVs = IV_List, weight = no_weight)

# it seams that the weights are ignored, when using weighted.mean as values_fn in pivot_wider


Solution

  • A Solution: Put the weighted.mean-function here and not as a part fo the pivot_wider-function

    Here comes the working RepEx:

    library(tidyverse)
    library(rlang)
    library(scales)
    
    dv1 <- c(1, 2, 1, 2, 1, 2) # dependent variable 1
    dv2 <- c(2, 1, 2, 1, 2, 1) # dependent variable 2
    wt <- c(0.5, 5, 0.5, 5, 0.5, 5) # weighting variable
    iv1 <- c("m", "f", "m", "f", "m", "f") # independent variable 1
    iv2 <- c("b", "b", "b", "a", "a", "a") # indipendent variable 2
    iv3 <- c("x", "y", "y", "x", "y", "y") # indipendent variable 3
    
    DATA <- dplyr::tibble(iv1, iv2, iv3, dv1, dv2, wt) %>% # build data frame
      mutate(one = 1, # for Tatals
             no_weight = 1) # as weight with 1 for all rows
    
    IV_List = c('one', 'iv1', 'iv2', 'iv3') # List of column variables 
    DV_List = c('dv1', 'dv2') # List of dependent variables
    
    cross_fun <- function(.data, DV, IVs, weight, ...) { # calculate a function for  a DV by every IV
      
     List <- list() # initialize a List
      
      df <- .data %>% 
        select(all_of({{ IVs }}), {{ DV }}, {{ weight }}) # select the variables to get the List
      
      for (i in 1:(ncol(df) - 2)) { # a list for every IV
        List[[i]] <- df %>%
          select(all_of(i), {{ DV }}, {{ weight }}) %>% # build the lists out of the IVs and the DV
          group_by(UV = df[[i]]) %>% 
          summarise(mean = weighted.mean({{ DV }}, {{weight}})) %>% # THE SOLUTION: Put the weighted.mean-function here and not as a part fo the pivot_wider-function
          mutate(ORDER = 1) 
      }
      
        dt <- purrr::map( # pivot wider for every value of IVs and calculate the values with the function "fun"
          .x = List,
          .f = ~ pivot_wider(.x, id_cols = "ORDER", names_from = 1, values_from = mean) # THE SOLUTION: Here only the "mean" from the summarise above
        ) %>%
          purrr::reduce(left_join, by = "ORDER") %>% # reduce by leftjoin
          select(-any_of(c("ORDER"))) %>%
          rename(Total = 1) # column one are the Totals
      return(dt)
    }
    
    multi <- function(.data, DVs, IVs, weight, fun, ...){ # calculate weighted means for a List of DVs by a List of IVs
    
      Answers <- .data %>% # extract a first column for the resulting table
        select(all_of(DVs)) %>% 
        colnames() %>% 
        tibble() %>% 
        select("Variable" = 1)
      
        dt <- .data %>%
          select(all_of(DVs), {{ weight }}, all_of({{ IV_List }})) %>% # select all needed columns
          map_dfr(syms(DVs), cross_fun, .data = ., IVs = IV_List, weight = {{ weight }}) %>% # map all DVs to the cross_fun from above and bind it to dataframe
          cbind(Answers, .) %>% # add the first column
          mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) # round 
    
      return(dt)
    }
    
    DATA %>% cross_fun(dv1, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 
    
    DATA %>% cross_fun(dv2, IVs = IV_List, weight = wt) %>% # calculate the crosstab for dv1
      mutate(across(where(is.numeric), ~scales::number(.x, accuracy = .1))) 
    
    
    DATA %>% # calculate the means for the DVs by the IVs with the weight wt
      multi(DVs = c('dv1', 'dv2'), IVs = IV_List, weight = wt, fun = (sum({{ DV }} * {{ weight }}) /sum({{ weight }})))
    
    DATA %>% # calculate the means for the DVs by the IVs without weights (no_weight always = 1)
      multi(DVs = DV_List, IVs = IV_List, weight = no_weight)