I want to create a function named ratio_function that does the same as the following code:
data = data %>%
group_by(ID) %>%
summarise(sum_ratio = sum(surface[category == "A"], na.rm = T)/sum(total_area[category == "A"], na.rm = T)*mean(`MEAN`[category == "A"], na.rm = T))
but inside of summarise such as:
data = data %>%
group_by(ID) %>%
summarise(sum_ratio = ratio_function("A"))
The problem is that surface, total_area and category aren't recognized as variable name in summarise once they are called in the function.
If you do not want to pass the names of the other relevant columns one by one to the function, you would have to pass the entire dataframe to work on:
library(tidyverse)
# generate data
data <- tribble(
~ID, ~surface, ~total_area, ~category, ~MEAN,
1,50,200,"A",1.5,
1,30,150,"A",1.2,
1,20,100,"B",0.8,
2,70,300,"A",2.0,
2,60,250,"B",1.0,
2,80,350,"A",1.8,
3,40,180,"A",1.4,
3,20,90,"A",1.1,
3,30,130,"B",0.9,
4,55,220,"A",1.6,
4,45,180,"A",1.3,
4,25,90,"B",0.7
)
# old approach
data |>
group_by(ID) |>
summarise(sum_ratio = sum(surface[category == "A"], na.rm = T) / sum(total_area[category == "A"], na.rm = T) *
mean(`MEAN`[category == "A"], na.rm = T))
#> # A tibble: 4 × 2
#> ID sum_ratio
#> <dbl> <dbl>
#> 1 1 0.309
#> 2 2 0.438
#> 3 3 0.278
#> 4 4 0.363
# define function
ratio_function <- function(df, category) {
sum(df$surface[df$category == "A"], na.rm = T) / sum(df$total_area[df$category == "A"], na.rm = T) *
mean(df$`MEAN`[df$category == "A"], na.rm = T)
}
# new approach
data |>
group_by(ID) |>
summarize(new = ratio_function(pick(everything()), "A"))
#> # A tibble: 4 × 2
#> ID new
#> <dbl> <dbl>
#> 1 1 0.309
#> 2 2 0.438
#> 3 3 0.278
#> 4 4 0.363
Created on 2024-07-19 with reprex v2.1.1