Search code examples
rdplyr

Weighted mean per group with different weights per group using dplyr


I am attempting to modify my code below to a single pipeline using dplyr. I am calculating a weighted mean across two columns per year where each year has a different weighting. How can this been performed without explicitly splitting the dataset and then combining the results? Thanks

library(tidyverse)
seet.seed = 1
df = data.frame(yr = sample(c(2022,2023,2024), size =20,replace = TRUE),
                col_1 = rnorm(20),
                col_2 = rnorm(20),
                col_3 = rnorm(20))
   


dfList = split(df,df$yr)
selected_cols = c("col_1","col_2")

df2022 = dfList[[1]]  %>% rowwise() %>%
    mutate( mean_wt = weighted.mean(c_across(all_of(selected_cols )),c(0.40,0.60)))%>%        
    ungroup()        

df2023 = dfList[[2]]  %>% rowwise() %>%
    mutate( mean_wt = weighted.mean(c_across(all_of(selected_cols )),c(0.70,0.30)))%>%        
    ungroup()

df2024 = dfList[[3]]  %>% rowwise() %>%
    mutate( mean_wt = weighted.mean(c_across(all_of(selected_cols )),c(0.50,0.50)))%>%        
    ungroup()

newDF = bind_rows(list(df2022 ,df2023,df2024 ))

Solution

  • library(dplyr)
    
    yr_wt = list("2022" = c(.4, .6), "2023" = c(.7, .3), "2024" = c(.5, .5))
    
    df |>
      rowwise() |>
      mutate(mean_wt = weighted.mean(c_across(col_1:col_2), yr_wt[[as.character(yr)]])) |>
      ungroup()
    

    rowwise() can be a very slow operation on even moderately sized data frames. Here is a faster option still using the yr_wt list for lookup:

    df |>
      mutate(mean_wt = rowSums(as.matrix(pick(col_1:col_2)) %*% diag(yr_wt[[as.character(pull(cur_group()))]])),
             .by = yr)