Given the following df
(or tibble
) with a list-column
:
set.seed(1)
df <- data.frame(a = sample(letters, 7),
b = sample(letters, 7),
c = c("yes", "no", "yes", "no", "yes", "no", "no"),
list_col = I(list(c(1, 2, 3), "hjhj", c(1, 4), "kkjkj", c(3, 4), "jkj", c(1, 2))))
df
# a b c list_col
# 1 y r yes 1, 2, 3
# 2 d s no hjhj
# 3 g a yes 1, 4
# 4 a u no kkjkj
# 5 b w yes 3, 4
# 6 k j no jkj
# 7 n n no 1, 2
str(df)
# 'data.frame': 7 obs. of 4 variables:
# $ a : chr "y" "d" "g" "a" ...
# $ b : chr "r" "s" "a" "u" ...
# $ c : chr "yes" "no" "yes" "no" ...
# $ list_col:List of 7
# ..$ : num 1 2 3
# ..$ : chr "hjhj"
# ..$ : num 1 4
# ..$ : chr "kkjkj"
# ..$ : num 3 4
# ..$ : chr "jkj"
# ..$ : num 1 2
# ..- attr(*, "class")= chr "AsIs"
I want to replace list_col
values with the names of the corresponding values from a look up table but only for rows where column c == "yes"
#named lookup
c_yes_column_look_up <- c("number1" = 1,
"number2" = 2,
"number3" = 3,
"number4" = 4)
so my final df
looks like:
df_final
# a b c list_col
# 1 y r yes number1,....
# 2 d s no hjhj
# 3 g a yes number1,....
# 4 a u no kkjkj
# 5 b w yes number3,....
# 6 k j no jkj
# 7 n n no 1, 2
str(df_final)
# 'data.frame': 7 obs. of 4 variables:
# $ a : chr "y" "d" "g" "a" ...
# $ b : chr "r" "s" "a" "u" ...
# $ c : chr "yes" "no" "yes" "no" ...
# $ list_col:List of 7
# ..$ : chr "number1" "number2" "number3"
# ..$ : chr "hjhj"
# ..$ : chr "number1" "number4"
# ..$ : chr "kkjkj"
# ..$ : chr "number3" "number4"
# ..$ : chr "jkj"
# ..$ : num 1 2
# ..- attr(*, "class")= chr "AsIs"
I was thinking something along the lines of this but cant quite figure it out:
library(tidyverse)
df %>%
#rowwise() %>%
mutate(list_col = case_when(c == "yes" & list_col %in% c_yes_column_look_up ~ names(list_col[list_col %in% c_yes_column_look_up]),
TRUE ~ list_col))
open to other approaches too, thanks
The named vector should be reversed. We could use either rowwise
or map2
(but map2
may be more efficient). Loop over the 'list_col' and elements of 'c', create a condition where the 'c' value is 'yes', then do the name matching (on the reversed name vector) with vector values, coalesce
(if there are NAs) to return the original vector
library(dplyr)
library(purrr)
df1 <- df %>%
mutate(list_col = map2(list_col, c, ~ if(.y %in% 'yes')
unname(coalesce(setNames(names(c_yes_column_look_up),
c_yes_column_look_up)[.x], as.character(.x))) else .x))
-ouptut
> str(df1)
'data.frame': 7 obs. of 4 variables:
$ a : chr "y" "d" "g" "a" ...
$ b : chr "r" "s" "a" "u" ...
$ c : chr "yes" "no" "yes" "no" ...
$ list_col:List of 7
..$ : chr "number1" "number2" "number3"
..$ : chr "hjhj"
..$ : chr "number1" "number4"
..$ : chr "kkjkj"
..$ : chr "number3" "number4"
..$ : chr "jkj"
..$ : num 1 2