Search code examples
rmerge

Merge a data frame and list based on the names of the items in a list multiple times


I am looking to merge a dataframe and a list together in R with multiple conditions (the column "Variable" as well as "Variable_").

Want to merge list that has 3 groups each with V1-V5 and V1_-V5_:

$GROUP1
$GROUP1$V1
[1] 0.857138

$GROUP1$V2
[1] 1

$GROUP1$V3
[1] 0.5

$GROUP1$V4
[1] "not limiting"

$GROUP1$V5
[1] 0.1


$Group2
$Group2$V1
[1] 0.65

$Group2$V2
[1] 1

$Group2$V3
[1] 1

$Group2$V4
[1] 0.6

$Group2$V5
[1] 0.25
...

and a dataframe:

   Col.A Col.B  Group Variable_ Variable
1      x     x Group1       V1_       V1
2      x     x Group1       V2_       V2
3      x     x Group1       V3_       V3
4      x     x Group1       V4_       V4
5      x     x Group1       V5_       V5
6      x     x Group2       V1_       V1
7      x     x Group2       V2_       V2
8      x     x Group2       V3_       V3
9      x     x Group2       V4_       V4
10     x     x Group2       V5_       V5
11     x     x Group3       V1_       V1
12     x     x Group3       V2_       V2
13     x     x Group3       V3_       V3
14     x     x Group3       V4_       V4
15     x     x Group3       V5_       V5

I would like the final output to look like the above dataframe but with two new columns one with the values from "Variable" of the list and the other with the values for "Variable_"

Code to merge by one condition merge by "Variable"

List |> 
  unlist() |> 
  enframe() |> 
  separate_wider_delim(cols = name, names = c("Group", "Variable"), delim = ".") |> 
  right_join(data)

data = structure(list(Col.A = c("x", "x", "x", "x", "x", "x", "x", "x", 
                                "x", "x", "x", "x", "x", "x", "x"), Col.B = c("x", "x", "x", 
                                                                              "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x", "x"), 
                      Group = c("Group1", "Group1", "Group1", "Group1", "Group1", 
                                "Group2", "Group2", "Group2", "Group2", "Group2", "Group3", 
                                "Group3", "Group3", "Group3", "Group3"), Variable_ = c("V1_", 
                                                                                      "V2_", "V3_", "V4_", "V5_", "V1_", "V2_", "V3_", "V4_", "V5_", "V1_", 
                                                                                      "V2_", "V3_", "V4_", "V5_"), Variable = c("V1", 
                                                                                                                             "V2", "V3", "V4", "V5", "V1", "V2", "V3", "V4", "V5", "V1", 
                                                                                                                             "V2", "V3", "V4", "V5")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                    -15L))

List = list(GROUP1 = list(V1 = 0.857138, V2 = 1, V3 = 0.5, 
                          V4 = "not limiting", V5 = 0.1), 
            Group2 = list(V1 = 0.65, V2 = 1, V3 = 1, 
                          V4 = 0.6, V5 = 0.25), 
            Group3 = list(V1 = 0.65, V2 = 0.75, V3 = 0.3, 
                          V4 = 1, V5 = 1),
            GROUP1 = list(V1_ = "x", V2_ = "x", V3_ = "x", 
                          V4_ = "x", V5_ = "x"), 
            Group2 = list(V1_ = "x", V2_ = "x", V3_ = "x", 
                          V4_ = "x", V5_ = "x"), 
            Group3 = list(V1_ = "x", V2_ = "x", V3_ = "x", 
                          V4_ = "x", V5_ = "x"))

Solution

  • unlist() your list into a named lookup vector, then paste() Group with Variable or Variable_ to index into it:

    library(dplyr)
    library(stringr)
    
    # `str_to_title()` to match "GROUP1" with "Group1"
    lookup <- List |>
      setNames(str_to_title(names(List))) |>
      unlist()
    
    data |>
      mutate(
        Value_ = lookup[paste(Group, Variable_, sep = ".")],
        Value = lookup[paste(Group, Variable, sep = ".")]
      )
    

    Result:

       Col.A Col.B  Group Variable_ Variable Value_        Value
    1      x     x Group1       V1_       V1      x     0.857138
    2      x     x Group1       V2_       V2      x            1
    3      x     x Group1       V3_       V3      x          0.5
    4      x     x Group1       V4_       V4      x not limiting
    5      x     x Group1       V5_       V5      x          0.1
    6      x     x Group2       V1_       V1      x         0.65
    7      x     x Group2       V2_       V2      x            1
    8      x     x Group2       V3_       V3      x            1
    9      x     x Group2       V4_       V4      x          0.6
    10     x     x Group2       V5_       V5      x         0.25
    11     x     x Group3       V1_       V1      x         0.65
    12     x     x Group3       V2_       V2      x         0.75
    13     x     x Group3       V3_       V3      x          0.3
    14     x     x Group3       V4_       V4      x            1
    15     x     x Group3       V5_       V5      x            1