Search code examples
rlistdplyrtibble

replace values in list-column based on named vector


Given the following df(or tibble) with a list-column:

set.seed(1)
df <- data.frame(a = sample(letters, 7),
                 b = sample(letters, 7),
                 c = c("yes", "no", "yes", "no", "yes", "no", "no"),
                 list_col = I(list(c(1, 2, 3), "hjhj", c(1, 4), "kkjkj", c(3, 4), "jkj", c(1, 2))))
df
#   a b   c list_col
# 1 y r yes  1, 2, 3
# 2 d s  no     hjhj
# 3 g a yes     1, 4
# 4 a u  no    kkjkj
# 5 b w yes     3, 4
# 6 k j  no      jkj
# 7 n n  no     1, 2

str(df)
# 'data.frame': 7 obs. of  4 variables:
#  $ a       : chr  "y" "d" "g" "a" ...
#  $ b       : chr  "r" "s" "a" "u" ...
#  $ c       : chr  "yes" "no" "yes" "no" ...
#  $ list_col:List of 7
#   ..$ : num  1 2 3
#   ..$ : chr "hjhj"
#   ..$ : num  1 4
#   ..$ : chr "kkjkj"
#   ..$ : num  3 4
#   ..$ : chr "jkj"
#   ..$ : num  1 2
#   ..- attr(*, "class")= chr "AsIs"

I want to replace list_col values with the names of the corresponding values from a look up table but only for rows where column c == "yes"

#named lookup
c_yes_column_look_up <- c("number1" = 1,
                          "number2" = 2,
                          "number3" = 3, 
                          "number4" = 4)

so my final df looks like:

df_final
#   a b   c     list_col
# 1 y r yes number1,....
# 2 d s  no         hjhj
# 3 g a yes number1,....
# 4 a u  no        kkjkj
# 5 b w yes number3,....
# 6 k j  no          jkj
# 7 n n  no         1, 2

str(df_final)
# 'data.frame': 7 obs. of  4 variables:
#  $ a       : chr  "y" "d" "g" "a" ...
#  $ b       : chr  "r" "s" "a" "u" ...
#  $ c       : chr  "yes" "no" "yes" "no" ...
#  $ list_col:List of 7
#   ..$ : chr  "number1" "number2" "number3"
#   ..$ : chr "hjhj"
#   ..$ : chr  "number1" "number4"
#   ..$ : chr "kkjkj"
#   ..$ : chr  "number3" "number4"
#   ..$ : chr "jkj"
#   ..$ : num  1 2
#   ..- attr(*, "class")= chr "AsIs"

I was thinking something along the lines of this but cant quite figure it out:

library(tidyverse)
df %>% 
  #rowwise() %>%
  mutate(list_col = case_when(c == "yes" & list_col %in% c_yes_column_look_up ~ names(list_col[list_col %in% c_yes_column_look_up]),
                                                        TRUE ~ list_col))

open to other approaches too, thanks


Solution

  • The named vector should be reversed. We could use either rowwise or map2 (but map2 may be more efficient). Loop over the 'list_col' and elements of 'c', create a condition where the 'c' value is 'yes', then do the name matching (on the reversed name vector) with vector values, coalesce (if there are NAs) to return the original vector

    library(dplyr)
    library(purrr)
    df1 <- df %>% 
        mutate(list_col = map2(list_col, c, ~ if(.y %in% 'yes')
              unname(coalesce(setNames(names(c_yes_column_look_up), 
                  c_yes_column_look_up)[.x], as.character(.x))) else .x))
    

    -ouptut

    > str(df1)
    'data.frame':   7 obs. of  4 variables:
     $ a       : chr  "y" "d" "g" "a" ...
     $ b       : chr  "r" "s" "a" "u" ...
     $ c       : chr  "yes" "no" "yes" "no" ...
     $ list_col:List of 7
      ..$ : chr  "number1" "number2" "number3"
      ..$ : chr "hjhj"
      ..$ : chr  "number1" "number4"
      ..$ : chr "kkjkj"
      ..$ : chr  "number3" "number4"
      ..$ : chr "jkj"
      ..$ : num  1 2