I have a data frame raw.data
which has 1472 rows and 599 columns. I also have another data frame revisions
which has 240 rows and 12 columns. What I want to do is this:
I want to replace the values in columns b131d,b131i,b132d,b132i,b133,b134,b135
in raw.data
dataframe with corresponding values of the columns with same name in revisions
dataframe. Please note that I just want to update my values in raw.data
dataframe where the brid...9
column from raw.data
dataframe matches the brid...4
column of revisions
dataframe. All other values should stay same.
Here is a minimum reporducible example with only two columns of raw.data
that need replacement:
structure(list(brid...9 = c("001683", "002611", "298578", "310321",
"343830", "043767", "338519", "309483", "001477", "312380", "045782",
"267492", "293870", "164378", "001559", "331994", "062818", "332776",
"000446", "001943", "056444", "040221", "100511", "001029", "307917",
"234181", "297916", "290176", "010393", "292641", "003300", "083654",
"002952", "309239", "338527", "309484", "302067", "005355", "021120",
"001729", "372581", "099031", "311608", "298961", "234640", "294863",
"292335", "294876", "293873", "331200", "339464", "293047", "000758",
"080393", "037177", "331307", "330965", "322966", "000483", "000789",
"360986", "343866", "299247", "004643", "004430", "323147", "021204",
"002012", "309504", "237514", "000538", "000554", "298748", "278068",
"202139", "032775", "330543", "359292", "097596", "371340", "001795",
"297071", "357626", "010702", "009743", "037254", "168314", "088917",
"371377", "308018", "328074", "002153", "331907", "299078", "004172",
"002753", "298294", "002838", "244509", "329343", "273513", "029554",
"330948", "098855", "004641", "000031", "331995", "030461", "021265",
"058921", "339592", "034908", "035575", "298257", "005180", "323812",
"167199", "324734", "295419", "342856", "169038", "375589", "307864",
"281041", "375636", "308363", "309994", "309765", "309893", "322698",
"342383", "316105", "310050", "056801", "004383", "375640", "328704",
"296442", "296654", "327853", "316747", "293048", "002085", "296271",
"330529", "375655", "142762", "002191"), pvid = c(2, 2, 2, 2,
2, 2, 2, 2, 3, 3, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2,
2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 1,
1, 1, 1, 3, 1, 1, 1, 2, 1, 1, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 1, 3, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 2,
2, 1, 1, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 1, 3, 3, 4, 4, 3, 1), b131d = c("35",
"10", "10", "60", "37", "67", "40", "36", "30", "40", NA, "35",
"53", "20", "60", "15", "2", "40", "85", "35", NA, NA, "10",
"87", "60", "65", "40", "75", "20", "60", "40", "67", "70", NA,
"20", "60", "80", "75", "70", "70", "55", NA, "50", "10", "10",
"10", "50", "60", "75", "35", "20", "70", "75", NA, NA, "73",
"58", "20", "33", "20", "30", NA, "50", "50", "40", "60", "70",
"50", NA, "65", "50", "40", "70", "22", NA, "25", "39", NA, "40",
NA, NA, "10", "60", "20", "10", NA, "35", "10", NA, "28", "40",
"40", "50", "40", "13", "60", "10", "28", "80", "58", "20", "10",
"10", "20", "5", "40", "25", "10", "70", "80", "94", NA, "15",
NA, "20", "70", "25", "60", "30", NA, "70", NA, "20", "80", NA,
"40", "5", "60", "60", "25", "20", NA, "70", "10", "50", "40",
"60", "60", "80", "65", "25", NA, "10", NA, "70", NA, "40", "30"
), b131i = c("3", "50", "60", "20", "36", "5", NA, "50", NA,
"30", NA, "25", "22", "40", NA, "15", NA, NA, NA, NA, NA, NA,
"30", NA, NA, NA, "30", "12", "40", NA, "30", NA, NA, "50", "74",
NA, NA, NA, NA, NA, "5", NA, NA, "50", "40", "40", "20", "10",
NA, "35", "50", NA, NA, NA, NA, "7", "20", "30", "35", "30",
"15", NA, NA, NA, "30", NA, NA, NA, NA, "5", NA, "25", NA, NA,
NA, NA, "26", "50", "15", "66", "40", "65", NA, "40", "55", NA,
NA, "50", "78", "42", NA, "10", NA, "17", "5", NA, "5", "12",
"5", NA, NA, NA, "30", NA, NA, "20", NA, "10", NA, NA, NA, NA,
"10", "70", "30", NA, NA, "10", "10", "75", NA, NA, NA, NA, NA,
NA, "20", NA, NA, NA, "10", NA, NA, "10", NA, NA, NA, NA, NA,
NA, "40", "40", "20", "25", NA, NA, "20", "25")), row.names = c(NA,
-148L), class = c("tbl_df", "tbl", "data.frame"))
The reproducible revisions
dataframe with 3 columns
structure(list(brid...4 = c("375588", "082516", "029554", "198631",
"101184", "045782", "186416", "003099", "022403", "035358", "999988",
"040356", "063597", "039301", "371370", "060497", "004641", "166654",
"037615", "375604", "263834", "036573", "204951", "375592", "016811",
"173196", "023387", "030461", "037622", "037589", "164660", "072816",
"072952", "138389", "999989", "056394", "058921", "360906", "136443",
"096188", "056948", "037624", "034908", "064932", "064425", "013037",
"035575", "121340", "375606", "375607", "230391", "167199", "207547",
"048246", "079663", "375628", "007669", "128992", "375627", "166649",
"206532", "200250", "375611", "169038", "093698", "315562", "375608",
"172690", "375589", "307894", "211934", "043434", "054954", "127261",
"005995", "375636", "375631", "022989", "375635", "375633", "375630",
"375634", "047348", "108376", "035680", "056801", "040501", "036278",
"014445", "375632", "375670", "375678", "003989", "007613", "056405",
"325677", "375700", "375701", "003196", "105547", "375597", "124071",
"375650", "375651", "375657", "375656", "375654", "375653", "375655",
"375658", "375661", "375660", "375662", "006245", "999985", "021296",
"059863", "054942", "054564", "380001", "380002", "380003", "098915",
"035185", "079003", "200202", "375594", "062818", "023812", "018553",
"010753", "049941", "038632", "375596", "308958", "056444", "050068",
"036149", "040221", "012821", "051534", "011275", "168937", "010757",
"019550", "051741", "220128", "042194", "009126", "056780", "230570",
"040875", "037429", "012166", "046899", "063118", "233221", "013396",
"044001", "061278", "208377", "086911", "375605", "081707", "166824",
"200363", "099031", "056473", "089882", "051642", "035882", "060509",
"186600", "053064", "199934", "058816", "004203", "066689", "051726",
"035527", "187411", "037605", "037607", "200024", "037608", "037606",
"375601", "037609", "037614", "121343", "039555", "080393", "199980",
"037177", "037613", "308028", "006978", "126557", "042910", "231053",
"050396", "074930", "051298", "138693", "108820", "059598", "008072",
"037610", "202139", "097393", "063733", "051402", "082054", "999980",
"104503", "059504", "036042", "050797", "195292", "200993", "109912",
"037254", "072805", "168314", "375595", "030154", "051530", "101668",
"054987", "051742", "120692", "209452", "097907", "375586", "055016",
"042520", "013794", "055435", "230984", "375599"), b131d = c(0,
0, 0, 20, 10, 0, 0, 10, 5, 10, 20, 20, 0, 0, 0, 0, 5, 0, 0, 5,
25, 10, 0, 0, 30, 0, 0, 10, 0, 0, 0, 10, 20, 10, 0, 10, 10, 0,
0, 20, 0, 0, 0, 0, 20, 0, 15, 0, 0, 0, 0, 10, 0, 0, 8, 0, 0,
20, 0, 0, 0, 0, 0, 0, 0, 15, 0, 6, 0, 20, 15, 0, 0, 0, 20, 0,
15, 10, 5, 10, 15, 0, 0, 0, 16, 10, 20, 0, 0, 0, 0, 20, 22, 10,
0, 10, 20, 10, 25, 0, 0, 5, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 25, 20, 0, 0, 5, 10, 0, 2, 29, 0, 0, 0,
0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 10, 0, 0, 0,
0, 10, 0, 18, 10, 0, 0, 0, 22, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 7, 0, 2, 0, 10, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0,
0, 20, 0, 10, 0, 0, 10, 10, 0, 0, 0, 10, 10, 25, 0, 5, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 10, 0, 0, 0, 10, 5, 0), b131i = c(25, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 9,
0, 20, 10, 0, 0, 5, 10, 0, 0, 0, 0, 0, 5, 15, 0, 0, 0, 0, 0,
0, 5, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 30, 0, 0, 0, 0, 0, 0,
6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0,
0, 0, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 18, 0, 15, 0, 10, 0, 5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 10, 0,
0, 0, 30, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-240L), class = c("tbl_df", "tbl", "data.frame"))
I think dplyr::rows_update
would help here, but first we need to make the key columns match in name, and make the replacement columns be the same type:
library(dplyr)
raw.data |>
rows_update(revisions |>
rename(`brid...9` = `brid...4`) |>
mutate(across(b131d:b131i, as.character)),
unmatched = "ignore")