Search code examples
rdplyr

Adaptive functions that apply a function to subsets of columns based on common naming characteristics


I am either too caffeinated or not caffeinated enough, because I cannot figure out how to do this. I need to create a function that calculates an equation that exponentiates the intercept and effect for several sets of variables, each set grouped by a common string within column names, and then sums all the exponents, yielding a single value. I need to do this across columns within each row so dplyr seems the obvious choice. The tricky part is that the function needs to be able to do this for a different number of elements within each set. Easier to show than describe.

Here are two datasets

set.seed(1)

names_df1 <- c("ball", "bell", "bat")
df1 <- data.frame(int_ball = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_ball = sample(seq(-.99,-.01, .01),5,replace=T),
                  int_bell = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_bell = sample(seq(-.99,-.01, .01),5,replace=T),
                  int_bat = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_bat = sample(seq(-.99,-.01, .01),5,replace=T))


names_df2 <- c("dog", "cat", "bird", "fish")
df2 <- data.frame(int_dog = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_dog = sample(seq(-.99,-.01, .01),5,replace=T),
                  int_cat = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_cat = sample(seq(-.99,-.01, .01),5,replace=T),
                  int_bird = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_bird = sample(seq(-.99,-.01, .01),5,replace=T),
                  int_fish = sample(seq(-.99,-.01, .01),5,replace=T),
                  eff_fish = sample(seq(-.99,-.01, .01),5,replace=T))

Each dataset has as many pairs of variables as there are elements in the string vector preceding each dataset (names_df1 and names_df2). I need to add together the int_ and eff_ variables for each pair, then exponentiate the result, then add all those exponents together. For the dataset we three sets of pairs the results would look like this

df1 %>%
  mutate(eq_df1 = exp(int_ball + eff_ball) + exp(int_bell + eff_bell) + exp(int_bat + eff_bat))

#   int_ball eff_ball int_bell eff_bell int_bat eff_bat   eq_df1
# 1    -0.32    -0.57    -0.03    -0.93   -0.11   -0.21 1.519698
# 2    -0.61    -0.86    -0.15    -0.27   -0.63   -0.67 1.159504
# 3    -0.99    -0.18    -0.79    -0.21   -0.66   -0.16 1.118678
# 4    -0.66    -0.41    -0.46    -0.15   -0.11   -0.65 1.354026
# 5    -0.13    -0.49    -0.26    -0.63   -0.56   -0.30 1.371762

And for the dataset with four sets of pairs it would look like this

df2 %>%
  mutate(eq_df2 = exp(int_dog + eff_dog) + exp(int_cat + eff_cat) + exp(int_bird + eff_bird) + exp(int_fish + eff_fish))

#   int_dog eff_dog int_cat eff_cat int_bird eff_bird int_fish eff_fish   eq_df2
# 1   -0.26   -0.80   -0.56   -0.58    -0.98    -0.35    -0.19    -0.11 1.671570
# 2   -0.58   -0.56   -0.75   -0.94    -0.55    -0.30    -0.87    -0.77 1.125734
# 3   -0.62   -0.13   -0.30   -0.76    -0.82    -0.13    -0.60    -0.16 1.673230
# 4   -0.80   -0.30   -0.61   -0.68    -0.78    -0.30    -0.11    -0.71 1.388169
# 5   -0.72   -0.60   -0.49   -0.86    -0.22    -0.25    -0.52    -0.87 1.400453

Any help much appreciated. The solution doesn't have to be in dplyr.


Solution

  • You could define your function to pivots the cols to long format, do the required calculation, and bind back to the original data:

    library(dplyr)
    library(tidyr)
    library(tibble)
    
    f <- function(.data, vars = starts_with(c("eff_", "int_"))) {
      .data |> 
        select( {{ vars }} ) |> 
        rowid_to_column() |>
        pivot_longer(-rowid, names_sep = "_", names_to = c(".value", "name")) |> 
        summarise(eq = sum(exp(pick(2) + pick(3))), .by = rowid) |> 
        select(-rowid) |> 
        bind_cols(.data, results = _)
    }
    
    f(df1)
      int_ball eff_ball int_bell eff_bell int_bat eff_bat       eq
    1    -0.32    -0.57    -0.03    -0.93   -0.11   -0.21 1.519698
    2    -0.61    -0.86    -0.15    -0.27   -0.63   -0.67 1.159504
    3    -0.99    -0.18    -0.79    -0.21   -0.66   -0.16 1.118678
    4    -0.66    -0.41    -0.46    -0.15   -0.11   -0.65 1.354026
    5    -0.13    -0.49    -0.26    -0.63   -0.56   -0.30 1.371762
    
    f(df2)
      int_dog eff_dog int_cat eff_cat int_bird eff_bird int_fish eff_fish       eq
    1   -0.26   -0.80   -0.56   -0.58    -0.98    -0.35    -0.19    -0.11 1.671570
    2   -0.58   -0.56   -0.75   -0.94    -0.55    -0.30    -0.87    -0.77 1.125734
    3   -0.62   -0.13   -0.30   -0.76    -0.82    -0.13    -0.60    -0.16 1.673230
    4   -0.80   -0.30   -0.61   -0.68    -0.78    -0.30    -0.11    -0.71 1.388169
    5   -0.72   -0.60   -0.49   -0.86    -0.22    -0.25    -0.52    -0.87 1.400453