Search code examples
rdplyrmergepipeleft-join

Merge dataframes with unequal lengths and substituting 0 for values that are not present


I have two data frames of unequal length, one data frame contains a subset of dates of the first data frame and they each have different columns:

Data Frame 1:

structure(list(date = structure(c(19241, 19249, 19252, 19255, 
19259, 19260, 19268, 19286, 19288, 19290, 19294, 19302, 19306, 
19307, 19310, 19315, 19318, 19330, 19332, 19334, 19341, 19343, 
19344, 19363, 19365, 19367, 19373, 19382, 19385, 19400, 19405, 
19406, 19408, 19410, 19415, 19418, 19421, 19435, 19438, 19441, 
19444, 19449, 19454, 19463, 19467, 19468, 19476, 19480, 19494, 
19497, 19498, 19500, 19504, 19507, 19512, 19519, 19523, 19525, 
19535, 19546, 19547, 19550, 19557, 19560, 19565, 19585, 19587, 
19588, 19589, 19592, 19595), class = "Date"), month = c(9, 9, 
9, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 
12, 12, 12, 12, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 
3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 
7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8), season = c("Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Winter", 
"Winter", "Winter", "Winter", "Winter", "Winter", "Winter", "Winter", 
"Winter", "Winter", "Winter", "Winter", "Winter", "Winter", "Winter", 
"Winter", "Winter", "Winter", "Spring", "Spring", "Spring", "Spring", 
"Spring", "Spring", "Spring", "Spring", "Spring", "Spring", "Spring", 
"Spring", "Spring", "Spring", "Spring", "Spring", "Spring", "Spring", 
"Spring", "Summer", "Summer", "Summer", "Summer", "Summer", "Summer", 
"Summer", "Summer", "Summer", "Summer", "Summer", "Summer", "Summer", 
"Summer", "Summer", "Summer", "Summer"), daytype = c("Weekday", 
"Weekday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", "Weekend/Holiday", 
"Weekend/Holiday", "Weekday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", 
"Weekend/Holiday", "Weekday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", 
"Weekday", "Weekend/Holiday", "Weekday", "Weekday", "Weekday", 
"Weekend/Holiday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", 
"Weekday", "Weekday", "Weekday", "Weekend/Holiday", "Weekend/Holiday", 
"Weekday", "Weekend/Holiday", "Weekday", "Weekday", "Weekday", 
"Weekday", "Weekend/Holiday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", 
"Weekday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", "Weekday", 
"Weekday", "Weekend/Holiday", "Weekday", "Weekday", "Weekend/Holiday", 
"Weekend/Holiday", "Weekday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", 
"Weekend/Holiday", "Weekday", "Weekend/Holiday", "Weekday", "Weekend/Holiday", 
"Weekend/Holiday", "Weekday", "Weekday", "Weekend/Holiday", "Weekday", 
"Weekday", "Weekday", "Weekend/Holiday", "Weekend/Holiday", "Weekday", 
"Weekend/Holiday"), total_effort_mean = c(0, 0, 4.11092592592593, 
0, 0, 0, 0, 0, 0, 0, 0, 7.09981481481481, 7.02314814814815, 42.0277777777778, 
0, 0, 17.0518518518519, 3.3362962962963, 6.65444444444444, 0, 
0, 0, 3.295, 9.98138888888889, 30.0266666666667, 0, 0, 0, 6.95314814814815, 
18.1203703703704, 33.1041666666667, 81.1657407407407, 44.5388888888889, 
3.73407407407407, 11.3738888888889, 0, 15.4440740740741, 4.02666666666667, 
12.1866666666667, 45.0765740740741, 12.4, 71.2662037037037, 29.7519444444444, 
34.8207407407407, 13.1905555555556, 0, 0, 4.53222222222222, 0, 
0, 9.37611111111111, 0, 28.37, 0, 0, 0, 0, 0, 28.8261111111111, 
0, 0, 0, 0, 9.38574074074074, 0, 0, 0, 0, 0, 0, 0)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -71L), groups = structure(list(
    date = structure(c(19241, 19249, 19252, 19255, 19259, 19260, 
    19268, 19286, 19288, 19290, 19294, 19302, 19306, 19307, 19310, 
    19315, 19318, 19330, 19332, 19334, 19341, 19343, 19344, 19363, 
    19365, 19367, 19373, 19382, 19385, 19400, 19405, 19406, 19408, 
    19410, 19415, 19418, 19421, 19435, 19438, 19441, 19444, 19449, 
    19454, 19463, 19467, 19468, 19476, 19480, 19494, 19497, 19498, 
    19500, 19504, 19507, 19512, 19519, 19523, 19525, 19535, 19546, 
    19547, 19550, 19557, 19560, 19565, 19585, 19587, 19588, 19589, 
    19592, 19595), class = "Date"), .rows = structure(list(1L, 
        2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
        15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 
        26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 
        37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 
        48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 
        59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 
        70L, 71L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -71L), .drop = TRUE))

Data frame 2:

structure(list(date = structure(c(19306, 19307, 19318, 19332, 
19363, 19400, 19405, 19406, 19408, 19415, 19421, 19438, 19441, 
19444, 19449, 19454, 19467, 19498, 19535, 19560), class = "Date"), 
    kCPUE_mean = c(1.6491067338525, 0.413793103448276, 0, 0.536672629695885, 
    0, 0, 0.290541824256615, 0.206362854686156, 0.217817454360528, 
    0, 0.923530060205667, 1.59412955465587, 0.4085480691282, 
    0, 0, 4.97787610619469, 0.910815939278937, 0, 0, 0), rCPUE_mean = c(0, 
    0, 0.37344398340249, 0, 0.300651411391348, 0, 0.0562148657089319, 
    0, 0, 1.05324751316559, 0, 1.59412955465587, 0, 3.46699352127473, 
    4.17056683136478, 0, 0, 0, 0, 0), cCPUE_mean = c(1.6491067338525, 
    0.413793103448276, 0.37344398340249, 0.536672629695885, 0.300651411391348, 
    0, 0.346756689965547, 0.206362854686156, 0.217817454360528, 
    1.05324751316559, 0.923530060205667, 3.18825910931174, 0.4085480691282, 
    3.46699352127473, 4.17056683136478, 4.97787610619469, 0.910815939278937, 
    0, 0, 0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-20L))

I would like to merge these data frames together but where there is no observation of a date in data frame 2 I would like the values for kCPUE_mean, rCPUE_mean and cCPUE_mean to be 0. Is this possible with code? Could it also be done in a dplyr pipe?


Solution

  • Do a left_join and then replace the NAs with 0s

    df3 <- dplyr::left_join(df1,df2,by="date")
    df3[is.na(df3)] <- 0