Search code examples
rdictionarymergemapply

How to merge data fames in different lists including based on partial match in name with no one to one equivalent


I have two lists of dataframes that I need to merge based on the matching names in in the two lists. Here is the data:

data <- list(foo = structure(list(bodyPart = c("leg", "arm", "knee", "eye"), side = c("LEFT", "RIGHT", "LEFT", "LEFT"), device = c("LLI", "LSM", "GHT", "LLM"), length = c(12, 476, 7, 2), id = c("AA", "BB", "CC", "NN"), mwID = c("a12", "k87", "j98", NA)), class = "data.frame", row.names = c(NA, -4L)), bar = structure(list(bodyPart = c("ankel", "ear", "knee", "ARM"), side = c("LEFT", "LEFT", "LEFT", "RIGHT"), device = c("GOM", "LSM", "YYY", "RWS"), id = c("ZZ", "DD", "FF", "DC"), tqID = c("kj8", "ll23", "sc26",  "fg12")), class = "data.frame", row.names = c(NA, -4L)))

Here is the list I would like to merge to the data:

sub <- list(foo.new = structure(list("Duo:length" = c(23, 54, 77), id = c("AA", "BB", "CC")), class = "data.frame", row.names = c(NA, -3L)), bar.excel.km = structure(list("Duo:side" = c("LEFT", "RIGHT", "LEFT"), id = c("ZZ", "DD", "FF")), class = "data.frame", row.names = c(NA, -3L)))

here is the desired output:

  output <- list(foo = structure(list(id = c("AA", "BB", "CC", "NN"), bodyPart = c("leg", 
  "arm", "knee", "eye"), side = c("LEFT", "RIGHT", "LEFT", "LEFT"
  ), device = c("LLI", "LSM", "GHT", "LLM"), length = c(12, 476, 
  7, 2), mwID = c("a12", "k87", "j98", NA), `Duo:length` = c(23, 
  54, 77, NA)), row.names = c(NA, -4L), class = "data.frame"), 
  bar = structure(list(id = c("DC", "DD", "FF", "ZZ"), bodyPart = c("ARM", 
  "ear", "knee", "ankel"), side = c("RIGHT", "LEFT", "LEFT", 
  "LEFT"), device = c("RWS", "LSM", "YYY", "GOM"), tqID = c("fg12", 
  "ll23", "sc26", "kj8"), `Duo:side` = c(NA, "RIGHT", "LEFT", 
  "LEFT")), row.names = c(NA, -4L), class = "data.frame"))
      

I used the code below but the problem is there I can't specify that all the dataframes starting with "foo" in "sub" table should be merged to the "foo" table in Data.

Map(merge, data, sub, by='id', all=TRUE)

Solution

  • For pattern-matching like this, I suggest a double for-loop for simplicity.

    for (nm1 in names(data)) {
      for (nm2 in grep(nm1, names(sub), value=TRUE)) {
        data[[nm1]] <- merge(data[[nm1]], sub[[nm2]], by = "id", all = TRUE)
      }    
    }
    data
    # $foo
    #   id bodyPart  side device length mwID Duo:length
    # 1 AA      leg  LEFT    LLI     12  a12         23
    # 2 BB      arm RIGHT    LSM    476  k87         54
    # 3 CC     knee  LEFT    GHT      7  j98         77
    # 4 NN      eye  LEFT    LLM      2 <NA>         NA
    # $bar
    #   id bodyPart  side device tqID Duo:side
    # 1 DC      ARM RIGHT    RWS fg12     <NA>
    # 2 DD      ear  LEFT    LSM ll23    RIGHT
    # 3 FF     knee  LEFT    YYY sc26     LEFT
    # 4 ZZ    ankel  LEFT    GOM  kj8     LEFT
    

    Your output$foo includes Duo:bodyPart, but that isn't clear from where that data is sourced, I've skipped it for now.


    Data

    data <- list(foo = structure(list(bodyPart = c("leg", "arm", "knee", "eye"), side = c("LEFT", "RIGHT", "LEFT", "LEFT"), device = c("LLI", "LSM", "GHT", "LLM"), length = c(12, 476, 7, 2), id = c("AA", "BB", "CC", "NN"), mwID = c("a12", "k87", "j98", NA)), class = "data.frame", row.names = c(NA, -4L)), bar = structure(list(bodyPart = c("ankel", "ear", "knee", "ARM"), side = c("LEFT", "LEFT", "LEFT", "RIGHT"), device = c("GOM", "LSM", "YYY", "RWS"), id = c("ZZ", "DD", "FF", "DC"), tqID = c("kj8", "ll23", "sc26",  "fg12")), class = "data.frame", row.names = c(NA, -4L)))
    sub <- list(foo.new = structure(list("Duo:length" = c(23, 54, 77), id = c("AA", "BB", "CC")), class = "data.frame", row.names = c(NA, -3L)), bar.excel.km = structure(list("Duo:side" = c("LEFT", "RIGHT", "LEFT"), id = c("ZZ", "DD", "FF")), class = "data.frame", row.names = c(NA, -3L)))