I have two dataframes:
dput(head(catch,70))
structure(list(fldFinalLength = c(NA, 260L, NA, NA, 510L, 650L,
280L, 550L, NA, 550L, 320L, NA, NA, NA, 410L, 260L, NA, NA, NA,
330L, 670L, 890L, NA, NA, 1690L, NA, 240L, 470L, 420L, NA, 700L,
NA, 420L, NA, NA, 260L, 300L, 860L, 370L, NA, NA, 510L, 240L,
930L, NA, 730L, 730L, NA, 270L, NA, 780L, NA, 280L, 590L, 290L,
NA, NA, 410L, 350L, NA, NA, 1350L, 560L, 460L, 650L, 430L, NA,
NA, 560L, 360L), fldInitialLength = c(NA, 220L, NA, NA, 160L,
650L, 280L, 550L, NA, 290L, 320L, NA, NA, NA, 240L, 160L, NA,
NA, NA, 330L, 340L, 520L, NA, NA, 830L, NA, 240L, 230L, 230L,
NA, 340L, NA, 310L, NA, NA, 260L, 300L, 860L, 370L, NA, NA, 210L,
220L, 730L, NA, 730L, 730L, NA, 270L, NA, 170L, NA, 280L, 190L,
210L, NA, NA, 390L, 300L, NA, NA, 1350L, 220L, 250L, 380L, 430L,
NA, NA, 170L, 360L), fldCatchWeight = c(0.73, 0.672, 61.3, 0.34,
1.534, 0.934, 0.244, 2.55, 0.136, 13.784, 0.182, 0.27, 1.538,
0.116, 5.94, 0.284, 0.06, 1.064, 1.2, 0.342, 13.3, 167.34, 0.834,
23.08, 16.59, 0.286, 0.072, 1.24, 0.44, 0.052, 14.66, 6.36, 1.96,
7.48, 1.7, 0.092, 0.146, 3.73, 0.82, 9.54, 0.846, 3.492, 0.248,
16.13, 0.17, 1.87, 2.31, 0.084, 0.082, 0.344, 29.08, 3.248, 0.148,
4.916, 0.871, 0.27, 0.404, 1.028, 0.598, 10.27, 0.444, 7.82,
4.84, 3.712, 2.09, 0.454, 0.13, 4.93, 1.584, 0.568), fldMeasuringInterval = c(NA,
10L, NA, NA, 10L, 10L, 10L, 10L, NA, 10L, 10L, NA, NA, NA, 10L,
10L, NA, NA, NA, 10L, 10L, 10L, NA, NA, 10L, NA, 10L, 10L, 10L,
NA, 10L, NA, 10L, NA, NA, 10L, 10L, 10L, 10L, NA, NA, 10L, 10L,
10L, NA, 10L, 10L, NA, 10L, NA, 10L, NA, 10L, 10L, 10L, NA, NA,
10L, 10L, NA, NA, 10L, 10L, 10L, 10L, 10L, NA, NA, 10L, 10L),
fldMeasuringOperator = c(NA, "DFE", NA, NA, "DFE", "MSL",
"MSL", "MSL", NA, "MSL", "MSL", NA, NA, NA, "MSL", "DFE",
NA, NA, NA, "DFE", "DFE", "DFE", NA, NA, "DFE", NA, "DFE",
"DFE", "DFE", NA, "MSL", NA, "MSL", NA, NA, "MSL", "MSL",
"MSL", "MSL", NA, NA, "MSL", "MSL", "MSL", NA, "MSL", "MSL",
NA, "MSL", NA, "MSL", NA, "MSL", "DFE", "DFE", NA, NA, "DFE",
"DFE", NA, NA, "MSL", "MSL", "MSL", "MSL", "MSL", NA, NA,
"MSL", "MSL"), fldCruiseStationNumber = c(59L, 59L, 59L,
59L, 59L, 60L, 60L, 60L, 60L, 60L, 60L, 61L, 61L, 61L, 61L,
62L, 62L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 64L,
64L, 64L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 66L, 66L,
66L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 67L, 67L, 67L, 67L,
67L, 67L, 70L, 70L, 70L, 70L, 70L, 70L, 71L, 71L, 71L, 71L,
71L, 71L, 71L, 71L, 71L, 71L, 71L), fldMainSpeciesCode = c("BEN",
"BIB", "BOF", "LSS", "WAF", "SMH", "TBR", "WAF", "WHB", "WHG",
"WIT", "BEN", "BOF", "CDT", "LEM", "MEG", "NEP", "NEP", "PLA",
"SOL", "DGS", "DGS", "GFB", "BOF", "COE", "CTC", "CUR", "CUR",
"DGS", "NEP", "WAF", "WHB", "WIT", "BEN", "BOF", "CUR", "CUR",
"DFL", "JOD", "LSS", "MAC", "MEG", "MEG", "SMH", "PLA", "RDS",
"RNS", "SDF", "SHR", "TUB", "WAF", "WHB", "WIT", "HKE", "LBI",
"LSS", "WHB", "WHG", "WIT", "BEN", "BOF", "COE", "CUR", "CUR",
"DFL", "DFL", "EDC", "GUG", "HKE", "LEM"), fldMainSpeciesCode.1 = c("BEN",
"BIB", "BOF", "LSS", "WAF", "SMH", "TBR", "WAF", "WHB", "WHG",
"WIT", "BEN", "BOF", "CDT", "LEM", "MEG", "NEP", "NEP", "PLA",
"SOL", "DGS", "DGS", "GFB", "BOF", "COE", "CTC", "CUR", "CUR",
"DGS", "NEP", "WAF", "WHB", "WIT", "BEN", "BOF", "CUR", "CUR",
"DFL", "JOD", "LSS", "MAC", "MEG", "MEG", "SMH", "PLA", "RDS",
"RNS", "SDF", "SHR", "TUB", "WAF", "WHB", "WIT", "HKE", "LBI",
"LSS", "WHB", "WHG", "WIT", "BEN", "BOF", "COE", "CUR", "CUR",
"DFL", "DFL", "EDC", "GUG", "HKE", "LEM")), row.names = c(NA,
70L), class = "data.frame")
dput(head(days_IAMS2023,13))
structure(list(fldDateTimeShot = c("24 February 2023", "24 February 2023",
"24 February 2023", "24 February 2023", "24 February 2023", "25 February 2023",
"25 February 2023", "25 February 2023", "25 February 2023", "25 February 2023",
"26 February 2023", "26 February 2023", "27 February 2023"),
fldCruiseStationNumber = 59:71), row.names = c(NA, 13L), class = "data.frame")
Both have the fldCruiseStationNumber column.
I want to have the days fldDateTimeShot repeated in all the fldCruiseStationNumber of the dataframe catch
using the dataframe days_IAMS2023
. My difficulty is the fact that they have different number of rows.
Any help is much appreciated.
Just do a left_join:
library(dplyr)
updated_df <-
left_join(
catch,
days_IAMS2023,
by = c("fldCruiseStationNumber")
)
Here's a sample of the produced data.frame:
> updated_df[ c(10,30,50,70), ]
fldFinalLength fldInitialLength fldCatchWeight fldMeasuringInterval fldMeasuringOperator fldCruiseStationNumber fldMainSpeciesCode fldMainSpeciesCode.1 fldDateTimeShot
10 550 290 13.784 10 MSL 60 WHG WHG 24 February 2023
30 NA NA 0.052 NA <NA> 65 NEP NEP 25 February 2023
50 NA NA 0.344 NA <NA> 67 TUB TUB 25 February 2023
70 360 360 0.568 10 MSL 71 LEM LEM 27 February 2023