Search code examples
rdplyrappendsummarize

dplyr: append summarise rows by threshold variable


Constraint: Using dplyr, or a tidyverse library:

Objective: I'd like to summarise data using a threshold. the threshold takes many values, and append/collate these summary results.

Minimal reproducible example:

df <- data.frame(colA=c(1,2,1,1),
                 colB=c(0,0,3,1),
                 colC=c(0,5,2,3),
                 colD=c(2,4,4,2))
> df
  colA colB colC colD
1    1    0    0    2
2    2    0    1    2
3    1    3    2    2
4    1    1    3    2

Current: Single threshold

df.ans <- df %>%
            group_by(colA) %>%
            summarize(theshold=1,
                      calcB = sum(df$colB[df$colB > theshold] - 1),
                      calcC = sum(df$colC[df$colC > theshold] - 1),
                      calcD = sum(df$colD[df$colD > theshold] - 1))
> df.ans
# A tibble: 2 x 5
   colA theshold calcB calcC calcD
  <dbl>    <dbl> <dbl> <dbl> <dbl>
1     1        1     2     3     4
2     2        1     2     3     4

Desired: Multiple thresholds

> df.ans
# A tibble: 6 x 5
   colA theshold calcB calcC calcD
  <dbl>    <dbl> <dbl> <dbl> <dbl>
1     1        1     2     3     4
2     2        1     2     3     4
3     1        2     ....
4     2        2     ....
5     1        3     ....
6     2        3     ....

Solution

  • Just write a function to do your thresholding

    thresh_fun <- function(df, threshold) {
      df %>% 
      group_by(colA) %>%
      summarize(threshold=threshold,
                calcB = sum(colB[colB > threshold] - 1),
                calcC = sum(colC[colC > threshold] - 1),
                calcD = sum(colD[colD > threshold] - 1))
    }
    

    and then map it the data.frame for each of your values

    # library(purrr) for map_df
    map_df(1:3, ~thresh_fun(df, .))
    #    colA threshold calcB calcC calcD
    #   <dbl>     <int> <dbl> <dbl> <dbl>
    # 1     1         1     2     3     5
    # 2     2         1     0     4     3
    # 3     1         2     2     2     3
    # 4     2         2     0     4     3
    # 5     1         3     0     0     3
    # 6     2         3     0     4     3