writing test conditions in a map_if function : apply function on all dataframes with a column that includes specific values

Once again i'm struggling with map functions of purrr.

I've got a list of dataframes, all with ID and Name columns.

I want to perform some recoding and then aggregation on rows with some specific values. For that purpose, i've got another dataframe with a vector of ID and newID that I want to replace before doing some aggregration (sum all numeric variable).

I know how to perform this on one df (see II/), but I don't know what test to write in a map_if function to apply thoses operations on all dataframes where column ID includes some values of new newIDdf$ID (here dataframe B and C).

Any ideas ?

## I/  2 objects 
 # a list of df
 list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
                                Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"), 
                                Var1 = rnorm(5),
                                Var2 = rnorm(5),
                                Var3 = rnorm(5)),
                B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
                               Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)),
                C = data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                               Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)))
 
 # a dataframe of correspondance for aggregation operations
 newIDdf <- data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                       IDagr =  c("Y", "Y", "Z", "Z", "Z"))
 
 ## II/ what I want to do (but on 1 df)
# example on 1 df
 
 On1df <- list_df[["B"]] %>% 
   mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                           .init= ID, 
                           str_replace)) %>%
   mutate(Name = case_when(ID == "Z" ~ "Z_name",
                         ID == "Y" ~ "Y_name",
                             TRUE ~ Name)) %>%
   group_by(ID) %>% 
   mutate_if(is.numeric, ~list(. = sum(.))) %>% 
   distinct(ID, .keep_all = TRUE)

## III/ What I really want to achieve
 # what if I want to do that simultaneously on df B and C 
 # I mean applying thoses operations on dataframes 
# where column ID includes some values of new newIDdf$ID
 
 list_df_output <- list_df %>% map_if( .p = ~ any(ID %in% newIDdf$ID), ### what test to put here ? (because this doesn't work)
                                       ~ mutate(.x, ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                                                             .init= ID, 
                                                             str_replace)) %>%
                                         mutate(.,Name = case_when(ID == "Z" ~ "Z_name",
                                                                 ID == "Y" ~ "Y_name",
                                                                 TRUE ~ Name)) %>%
                                         group_by(., ID) %>% 
                                         mutate_if(., is.numeric, ~list(. = sum(.))) %>% 
                                         distinct(., ID, .keep_all = TRUE) )

Solution

Do you want this? I also changed your mutate_at function to the more recent version using across and where:

  list_df |> 
     map_if(~any(.x$ID %in%newIDdf$ID) , ~ .x |> 
              mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                                  .init= ID, 
                                  str_replace)) %>%
              mutate(Name = case_when(ID == "Z" ~ "Z_name",
                                      ID == "Y" ~ "Y_name",
                                      TRUE ~ Name)) %>%
              group_by(ID) %>% 
              mutate(across(where(is.numeric), ~ sum(.))) %>%
              distinct(ID, .keep_all = TRUE))

Output:

$A
  ID   Name       Var1       Var2       Var3
1  a a_name  0.1015844  0.6306434  0.5058593
2  b b_name -0.1420690  0.5152645  0.2497879
3  c c_name  0.5841423  1.2883330  0.5297098
4  Z Z_name  1.6645565  0.2307524 -1.0418045
5  Y Y_name -0.1293767 -2.4152871 -0.1935843

$B
# A tibble: 3 × 4
# Groups:   ID [3]
  ID    Name     Var1   Var2
  <chr> <chr>   <dbl>  <dbl>
1 a     a_name -0.512 -0.119
2 b     b_name -2.14  -0.834
3 Z     Z_name  0.468  2.54 

$C
# A tibble: 2 × 4
# Groups:   ID [2]
  ID    Name    Var1  Var2
  <chr> <chr>  <dbl> <dbl>
1 Y     Y_name 1.15  0.162
2 Z     Z_name 0.790 2.03