Search code examples
rdataframedatemultiple-columns

How to add date column to a dataframe based on a common column in r


I have two dataframes:

dput(head(catch,70)) 
structure(list(fldFinalLength = c(NA, 260L, NA, NA, 510L, 650L, 
280L, 550L, NA, 550L, 320L, NA, NA, NA, 410L, 260L, NA, NA, NA, 
330L, 670L, 890L, NA, NA, 1690L, NA, 240L, 470L, 420L, NA, 700L, 
NA, 420L, NA, NA, 260L, 300L, 860L, 370L, NA, NA, 510L, 240L, 
930L, NA, 730L, 730L, NA, 270L, NA, 780L, NA, 280L, 590L, 290L, 
NA, NA, 410L, 350L, NA, NA, 1350L, 560L, 460L, 650L, 430L, NA, 
NA, 560L, 360L), fldInitialLength = c(NA, 220L, NA, NA, 160L, 
650L, 280L, 550L, NA, 290L, 320L, NA, NA, NA, 240L, 160L, NA, 
NA, NA, 330L, 340L, 520L, NA, NA, 830L, NA, 240L, 230L, 230L, 
NA, 340L, NA, 310L, NA, NA, 260L, 300L, 860L, 370L, NA, NA, 210L, 
220L, 730L, NA, 730L, 730L, NA, 270L, NA, 170L, NA, 280L, 190L, 
210L, NA, NA, 390L, 300L, NA, NA, 1350L, 220L, 250L, 380L, 430L, 
NA, NA, 170L, 360L), fldCatchWeight = c(0.73, 0.672, 61.3, 0.34, 
1.534, 0.934, 0.244, 2.55, 0.136, 13.784, 0.182, 0.27, 1.538, 
0.116, 5.94, 0.284, 0.06, 1.064, 1.2, 0.342, 13.3, 167.34, 0.834, 
23.08, 16.59, 0.286, 0.072, 1.24, 0.44, 0.052, 14.66, 6.36, 1.96, 
7.48, 1.7, 0.092, 0.146, 3.73, 0.82, 9.54, 0.846, 3.492, 0.248, 
16.13, 0.17, 1.87, 2.31, 0.084, 0.082, 0.344, 29.08, 3.248, 0.148, 
4.916, 0.871, 0.27, 0.404, 1.028, 0.598, 10.27, 0.444, 7.82, 
4.84, 3.712, 2.09, 0.454, 0.13, 4.93, 1.584, 0.568), fldMeasuringInterval = c(NA, 
10L, NA, NA, 10L, 10L, 10L, 10L, NA, 10L, 10L, NA, NA, NA, 10L, 
10L, NA, NA, NA, 10L, 10L, 10L, NA, NA, 10L, NA, 10L, 10L, 10L, 
NA, 10L, NA, 10L, NA, NA, 10L, 10L, 10L, 10L, NA, NA, 10L, 10L, 
10L, NA, 10L, 10L, NA, 10L, NA, 10L, NA, 10L, 10L, 10L, NA, NA, 
10L, 10L, NA, NA, 10L, 10L, 10L, 10L, 10L, NA, NA, 10L, 10L), 
    fldMeasuringOperator = c(NA, "DFE", NA, NA, "DFE", "MSL", 
    "MSL", "MSL", NA, "MSL", "MSL", NA, NA, NA, "MSL", "DFE", 
    NA, NA, NA, "DFE", "DFE", "DFE", NA, NA, "DFE", NA, "DFE", 
    "DFE", "DFE", NA, "MSL", NA, "MSL", NA, NA, "MSL", "MSL", 
    "MSL", "MSL", NA, NA, "MSL", "MSL", "MSL", NA, "MSL", "MSL", 
    NA, "MSL", NA, "MSL", NA, "MSL", "DFE", "DFE", NA, NA, "DFE", 
    "DFE", NA, NA, "MSL", "MSL", "MSL", "MSL", "MSL", NA, NA, 
    "MSL", "MSL"), fldCruiseStationNumber = c(59L, 59L, 59L, 
    59L, 59L, 60L, 60L, 60L, 60L, 60L, 60L, 61L, 61L, 61L, 61L, 
    62L, 62L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 64L, 
    64L, 64L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 66L, 66L, 
    66L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 
    67L, 67L, 70L, 70L, 70L, 70L, 70L, 70L, 71L, 71L, 71L, 71L, 
    71L, 71L, 71L, 71L, 71L, 71L, 71L), fldMainSpeciesCode = c("BEN", 
    "BIB", "BOF", "LSS", "WAF", "SMH", "TBR", "WAF", "WHB", "WHG", 
    "WIT", "BEN", "BOF", "CDT", "LEM", "MEG", "NEP", "NEP", "PLA", 
    "SOL", "DGS", "DGS", "GFB", "BOF", "COE", "CTC", "CUR", "CUR", 
    "DGS", "NEP", "WAF", "WHB", "WIT", "BEN", "BOF", "CUR", "CUR", 
    "DFL", "JOD", "LSS", "MAC", "MEG", "MEG", "SMH", "PLA", "RDS", 
    "RNS", "SDF", "SHR", "TUB", "WAF", "WHB", "WIT", "HKE", "LBI", 
    "LSS", "WHB", "WHG", "WIT", "BEN", "BOF", "COE", "CUR", "CUR", 
    "DFL", "DFL", "EDC", "GUG", "HKE", "LEM"), fldMainSpeciesCode.1 = c("BEN", 
    "BIB", "BOF", "LSS", "WAF", "SMH", "TBR", "WAF", "WHB", "WHG", 
    "WIT", "BEN", "BOF", "CDT", "LEM", "MEG", "NEP", "NEP", "PLA", 
    "SOL", "DGS", "DGS", "GFB", "BOF", "COE", "CTC", "CUR", "CUR", 
    "DGS", "NEP", "WAF", "WHB", "WIT", "BEN", "BOF", "CUR", "CUR", 
    "DFL", "JOD", "LSS", "MAC", "MEG", "MEG", "SMH", "PLA", "RDS", 
    "RNS", "SDF", "SHR", "TUB", "WAF", "WHB", "WIT", "HKE", "LBI", 
    "LSS", "WHB", "WHG", "WIT", "BEN", "BOF", "COE", "CUR", "CUR", 
    "DFL", "DFL", "EDC", "GUG", "HKE", "LEM")), row.names = c(NA, 
70L), class = "data.frame")
dput(head(days_IAMS2023,13)) 
structure(list(fldDateTimeShot = c("24 February 2023", "24 February 2023", 
"24 February 2023", "24 February 2023", "24 February 2023", "25 February 2023", 
"25 February 2023", "25 February 2023", "25 February 2023", "25 February 2023", 
"26 February 2023", "26 February 2023", "27 February 2023"), 
    fldCruiseStationNumber = 59:71), row.names = c(NA, 13L), class = "data.frame")

Both have the fldCruiseStationNumber column.

I want to have the days fldDateTimeShot repeated in all the fldCruiseStationNumber of the dataframe catch using the dataframe days_IAMS2023. My difficulty is the fact that they have different number of rows.

Any help is much appreciated.


Solution

  • Just do a left_join:

    library(dplyr)
    
    updated_df <-
        left_join(
            catch,
            days_IAMS2023,
            by = c("fldCruiseStationNumber")
        )
    

    Here's a sample of the produced data.frame:

    > updated_df[ c(10,30,50,70), ]
       fldFinalLength fldInitialLength fldCatchWeight fldMeasuringInterval fldMeasuringOperator fldCruiseStationNumber fldMainSpeciesCode fldMainSpeciesCode.1  fldDateTimeShot
    10            550              290         13.784                   10                  MSL                     60                WHG                  WHG 24 February 2023
    30             NA               NA          0.052                   NA                 <NA>                     65                NEP                  NEP 25 February 2023
    50             NA               NA          0.344                   NA                 <NA>                     67                TUB                  TUB 25 February 2023
    70            360              360          0.568                   10                  MSL                     71                LEM                  LEM 27 February 2023