Search code examples
rdataframedplyrmerge

Replace values in 7 columns of original dataframe with values in reference dataframe


I have a data frame raw.data which has 1472 rows and 599 columns. I also have another data frame revisions which has 240 rows and 12 columns. What I want to do is this: I want to replace the values in columns b131d,b131i,b132d,b132i,b133,b134,b135 in raw.data dataframe with corresponding values of the columns with same name in revisions dataframe. Please note that I just want to update my values in raw.data dataframe where the brid...9 column from raw.data dataframe matches the brid...4 column of revisions dataframe. All other values should stay same.

Here is a minimum reporducible example with only two columns of raw.data that need replacement:

structure(list(brid...9 = c("001683", "002611", "298578", "310321", 
"343830", "043767", "338519", "309483", "001477", "312380", "045782", 
"267492", "293870", "164378", "001559", "331994", "062818", "332776", 
"000446", "001943", "056444", "040221", "100511", "001029", "307917", 
"234181", "297916", "290176", "010393", "292641", "003300", "083654", 
"002952", "309239", "338527", "309484", "302067", "005355", "021120", 
"001729", "372581", "099031", "311608", "298961", "234640", "294863", 
"292335", "294876", "293873", "331200", "339464", "293047", "000758", 
"080393", "037177", "331307", "330965", "322966", "000483", "000789", 
"360986", "343866", "299247", "004643", "004430", "323147", "021204", 
"002012", "309504", "237514", "000538", "000554", "298748", "278068", 
"202139", "032775", "330543", "359292", "097596", "371340", "001795", 
"297071", "357626", "010702", "009743", "037254", "168314", "088917", 
"371377", "308018", "328074", "002153", "331907", "299078", "004172", 
"002753", "298294", "002838", "244509", "329343", "273513", "029554", 
"330948", "098855", "004641", "000031", "331995", "030461", "021265", 
"058921", "339592", "034908", "035575", "298257", "005180", "323812", 
"167199", "324734", "295419", "342856", "169038", "375589", "307864", 
"281041", "375636", "308363", "309994", "309765", "309893", "322698", 
"342383", "316105", "310050", "056801", "004383", "375640", "328704", 
"296442", "296654", "327853", "316747", "293048", "002085", "296271", 
"330529", "375655", "142762", "002191"), pvid = c(2, 2, 2, 2, 
2, 2, 2, 2, 3, 3, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 
2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 1, 
1, 1, 1, 3, 1, 1, 1, 2, 1, 1, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 1, 3, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 2, 
2, 1, 1, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 1, 3, 3, 4, 4, 3, 1), b131d = c("35", 
"10", "10", "60", "37", "67", "40", "36", "30", "40", NA, "35", 
"53", "20", "60", "15", "2", "40", "85", "35", NA, NA, "10", 
"87", "60", "65", "40", "75", "20", "60", "40", "67", "70", NA, 
"20", "60", "80", "75", "70", "70", "55", NA, "50", "10", "10", 
"10", "50", "60", "75", "35", "20", "70", "75", NA, NA, "73", 
"58", "20", "33", "20", "30", NA, "50", "50", "40", "60", "70", 
"50", NA, "65", "50", "40", "70", "22", NA, "25", "39", NA, "40", 
NA, NA, "10", "60", "20", "10", NA, "35", "10", NA, "28", "40", 
"40", "50", "40", "13", "60", "10", "28", "80", "58", "20", "10", 
"10", "20", "5", "40", "25", "10", "70", "80", "94", NA, "15", 
NA, "20", "70", "25", "60", "30", NA, "70", NA, "20", "80", NA, 
"40", "5", "60", "60", "25", "20", NA, "70", "10", "50", "40", 
"60", "60", "80", "65", "25", NA, "10", NA, "70", NA, "40", "30"
), b131i = c("3", "50", "60", "20", "36", "5", NA, "50", NA, 
"30", NA, "25", "22", "40", NA, "15", NA, NA, NA, NA, NA, NA, 
"30", NA, NA, NA, "30", "12", "40", NA, "30", NA, NA, "50", "74", 
NA, NA, NA, NA, NA, "5", NA, NA, "50", "40", "40", "20", "10", 
NA, "35", "50", NA, NA, NA, NA, "7", "20", "30", "35", "30", 
"15", NA, NA, NA, "30", NA, NA, NA, NA, "5", NA, "25", NA, NA, 
NA, NA, "26", "50", "15", "66", "40", "65", NA, "40", "55", NA, 
NA, "50", "78", "42", NA, "10", NA, "17", "5", NA, "5", "12", 
"5", NA, NA, NA, "30", NA, NA, "20", NA, "10", NA, NA, NA, NA, 
"10", "70", "30", NA, NA, "10", "10", "75", NA, NA, NA, NA, NA, 
NA, "20", NA, NA, NA, "10", NA, NA, "10", NA, NA, NA, NA, NA, 
NA, "40", "40", "20", "25", NA, NA, "20", "25")), row.names = c(NA, 
-148L), class = c("tbl_df", "tbl", "data.frame"))

The reproducible revisions dataframe with 3 columns

structure(list(brid...4 = c("375588", "082516", "029554", "198631", 
"101184", "045782", "186416", "003099", "022403", "035358", "999988", 
"040356", "063597", "039301", "371370", "060497", "004641", "166654", 
"037615", "375604", "263834", "036573", "204951", "375592", "016811", 
"173196", "023387", "030461", "037622", "037589", "164660", "072816", 
"072952", "138389", "999989", "056394", "058921", "360906", "136443", 
"096188", "056948", "037624", "034908", "064932", "064425", "013037", 
"035575", "121340", "375606", "375607", "230391", "167199", "207547", 
"048246", "079663", "375628", "007669", "128992", "375627", "166649", 
"206532", "200250", "375611", "169038", "093698", "315562", "375608", 
"172690", "375589", "307894", "211934", "043434", "054954", "127261", 
"005995", "375636", "375631", "022989", "375635", "375633", "375630", 
"375634", "047348", "108376", "035680", "056801", "040501", "036278", 
"014445", "375632", "375670", "375678", "003989", "007613", "056405", 
"325677", "375700", "375701", "003196", "105547", "375597", "124071", 
"375650", "375651", "375657", "375656", "375654", "375653", "375655", 
"375658", "375661", "375660", "375662", "006245", "999985", "021296", 
"059863", "054942", "054564", "380001", "380002", "380003", "098915", 
"035185", "079003", "200202", "375594", "062818", "023812", "018553", 
"010753", "049941", "038632", "375596", "308958", "056444", "050068", 
"036149", "040221", "012821", "051534", "011275", "168937", "010757", 
"019550", "051741", "220128", "042194", "009126", "056780", "230570", 
"040875", "037429", "012166", "046899", "063118", "233221", "013396", 
"044001", "061278", "208377", "086911", "375605", "081707", "166824", 
"200363", "099031", "056473", "089882", "051642", "035882", "060509", 
"186600", "053064", "199934", "058816", "004203", "066689", "051726", 
"035527", "187411", "037605", "037607", "200024", "037608", "037606", 
"375601", "037609", "037614", "121343", "039555", "080393", "199980", 
"037177", "037613", "308028", "006978", "126557", "042910", "231053", 
"050396", "074930", "051298", "138693", "108820", "059598", "008072", 
"037610", "202139", "097393", "063733", "051402", "082054", "999980", 
"104503", "059504", "036042", "050797", "195292", "200993", "109912", 
"037254", "072805", "168314", "375595", "030154", "051530", "101668", 
"054987", "051742", "120692", "209452", "097907", "375586", "055016", 
"042520", "013794", "055435", "230984", "375599"), b131d = c(0, 
0, 0, 20, 10, 0, 0, 10, 5, 10, 20, 20, 0, 0, 0, 0, 5, 0, 0, 5, 
25, 10, 0, 0, 30, 0, 0, 10, 0, 0, 0, 10, 20, 10, 0, 10, 10, 0, 
0, 20, 0, 0, 0, 0, 20, 0, 15, 0, 0, 0, 0, 10, 0, 0, 8, 0, 0, 
20, 0, 0, 0, 0, 0, 0, 0, 15, 0, 6, 0, 20, 15, 0, 0, 0, 20, 0, 
15, 10, 5, 10, 15, 0, 0, 0, 16, 10, 20, 0, 0, 0, 0, 20, 22, 10, 
0, 10, 20, 10, 25, 0, 0, 5, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 25, 20, 0, 0, 5, 10, 0, 2, 29, 0, 0, 0, 
0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 10, 0, 0, 0, 
0, 10, 0, 18, 10, 0, 0, 0, 22, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 7, 0, 2, 0, 10, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 
0, 20, 0, 10, 0, 0, 10, 10, 0, 0, 0, 10, 10, 25, 0, 5, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 10, 0, 0, 0, 10, 5, 0), b131i = c(25, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 9, 
0, 20, 10, 0, 0, 5, 10, 0, 0, 0, 0, 0, 5, 15, 0, 0, 0, 0, 0, 
0, 5, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 30, 0, 0, 0, 0, 0, 0, 
6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 18, 0, 15, 0, 10, 0, 5, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 
10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 10, 0, 
0, 0, 30, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 
-240L), class = c("tbl_df", "tbl", "data.frame"))

Solution

  • I think dplyr::rows_update would help here, but first we need to make the key columns match in name, and make the replacement columns be the same type:

    library(dplyr)
    raw.data |>
      rows_update(revisions |> 
                    rename(`brid...9` = `brid...4`) |>
                    mutate(across(b131d:b131i, as.character)),
                  unmatched = "ignore")