I need to merge datasets A and B based on the date, id and nearest time.
The time in the datasets are not matched. The time in B is always 0 to 10 minutes greater than the time in A.
I tried left_join
with within
, between
, overlaps
, etc. but couldn't manage it.
A
DATETIME | ID | W
--------------------------------
2020-12-02 18:02:01 | 1 | 0.25
2020-12-02 19:06:21 | 1 | 0.35
2020-12-02 18:12:08 | 2 | 0.44
2020-12-03 10:03:03 | 3 | 0.98
B
DATETIME | ID | X1 | X3
--------------------------------------
2020-12-02 18:08:01 | 1 | 1.3 | 99.3
2020-12-02 18:21:11 | 2 | 4.2 | 33.2
2020-12-03 10:09:22 | 3 | 7.1 | 39.9
Desired merge
DATETIME.x | ID.x | W | DATETIME.y | ID.y | X1 | X3
----------------------------------------------------------------------------
2020-12-02 18:02:01 | 1 | 0.25 | 2020-12-02 18:08:01 | 1 | 1.3 | 99.3
2020-12-02 19:06:21 | 1 | 0.35 | | | |
2020-12-02 18:12:08 | 2 | 0.44 | 2020-12-02 18:21:11 | 2 | 4.2 | 33.2
2020-12-03 10:03:03 | 3 | 0.98 | 2020-12-03 10:09:22 | 3 | 7.1 | 39.9
Edit: I just added a part of original datasets. The DATASET B always have less number of rows than DATASET A
DATASET A
structure(list(DateTime = structure(c(1598636287, 1598636622,
1598638754, 1598639954, 1598641154, 1598642353, 1598643551, 1598692415,
1598693716, 1598694315, 1598694914, 1598695514, 1598696414, 1598697012,
1598697312, 1598697912, 1598698512, 1598699111, 1598699710, 1598700309,
1598700910, 1598701209, 1598701809, 1598702408, 1598703007, 1598703608,
1598704207, 1598704806, 1598705105, 1598705705, 1598706713, 1598707914,
1598709112, 1598710313, 1598711511, 1598712711, 1598713910, 1598715110,
1598715710, 1598716909, 1598718109, 1598719309, 1598720508, 1598721708,
1598722906, 1598724105, 1598724706, 1598725906, 1598727104, 1598728304,
1598729502, 1598730704, 1598731903, 1598733101, 1598733703, 1598734903,
1598736102, 1598737301, 1598738500, 1598739700, 1598740900, 1598742099,
1598742697, 1598743898, 1598745098, 1598746297, 1598747497, 1598748696,
1598749895, 1598751096, 1598751695, 1598752893, 1598754094, 1598755293,
1598756493, 1598757692, 1598758891, 1598760092, 1598760690, 1598784123,
1598784723, 1598785324, 1598785922, 1598786522, 1598787121, 1598787722,
1598788921, 1598789521, 1598790122, 1598790722, 1598791321, 1598791920,
1598792520, 1598793120, 1598794320, 1598794920, 1598795518, 1598796118,
1598796719, 1598797319, 1598797918, 1598798516, 1598799716, 1598800317,
1598800917, 1598801518, 1598802115, 1598802717, 1598803315, 1598803915,
1598805115, 1598805714, 1598806313, 1598806915, 1598807514, 1598808114,
1598808714, 1598809313, 1598810511, 1598811113, 1598811711, 1598812311,
1598812911, 1598813511, 1598814112, 1598814710, 1598815910, 1598816510,
1598817110, 1598817709, 1598818309, 1598818910, 1598819508, 1598820108,
1598821308, 1598821908, 1598822508, 1598823107, 1598823707, 1598824306,
1598824907, 1598825506, 1598826706, 1598827305, 1598827905, 1598828505,
1598829104, 1598829704, 1598830305, 1598830905, 1598832103, 1598832703,
1598833303, 1598833903, 1598834502, 1598835101, 1598835701, 1598836300,
1598837501, 1598838100, 1598838700, 1598839300, 1598839900, 1598840499,
1598841100, 1598841699, 1598842898, 1598843497, 1598844098, 1598844697,
1598845298, 1598845897, 1598846497, 1598847098, 1598848297, 1598848896,
1598849496, 1598850096, 1598850696, 1598851296, 1598851895, 1598852494,
1598853695, 1598854294, 1598854893, 1598855494, 1598856093, 1598856693,
1598857292, 1598857891, 1598859093, 1598859692, 1598860291, 1598860891,
1598861491, 1598862091, 1598862690, 1598863290, 1598864490, 1598865089,
1598865689, 1598866289, 1598866889, 1598867490, 1598868088, 1598868689,
1598869888, 1598870486, 1598871086, 1598871686, 1598872287, 1598872886,
1598873486, 1598874085, 1598875285, 1598875884, 1598876485, 1598877084,
1598877684, 1598878284, 1598878885, 1598879484, 1598880683, 1598881284,
1598881883, 1598882482, 1598883082, 1598883682, 1598884283, 1598884881,
1598886081, 1598886681, 1598887280, 1598887879, 1598888479, 1598889079,
1598889678, 1598890280, 1598891478, 1598892078, 1598892680, 1598893278,
1598893878, 1598894477, 1598895076, 1598895677, 1598896876, 1598897476,
1598898076, 1598898676, 1598899275, 1598899875, 1598900475, 1598901075,
1598902273, 1598902873, 1598903473, 1598904074, 1598904673, 1598905272,
1598905872, 1598906473, 1598907671, 1598908272, 1598908871, 1598909471,
1598910071, 1598910672, 1598911270, 1598911870, 1598913070, 1598913669,
1598914268, 1598914870, 1598915469, 1598916067, 1598916667, 1598917268
), tzone = "UTC", class = c("POSIXct", "POSIXt")), ID = c("8",
"1", "1", "2", "3", "4", "6", "1", "2", "3", "4", "6", "7", "8",
"1", "2", "3", "4", "6", "7", "8", "1", "2", "3", "4", "6", "7",
"8", "1", "2", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2",
"3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7",
"8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4",
"5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7", "8", "1",
"2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6",
"7", "8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3",
"4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7", "8",
"1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5",
"6", "7", "8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2",
"3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7",
"8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4",
"5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7", "8", "1",
"2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6",
"7", "8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3",
"4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7", "8",
"1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5",
"6", "7", "8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2",
"3", "4", "5", "6", "7", "8", "1", "2", "3", "4", "5", "6", "7",
"8", "1", "2", "3", "4", "5", "6", "7", "8", "1", "2", "3", "4",
"5", "6", "7", "8"), W = c(-315.330562689608, -2.52416919062039,
-6.75916991234354, -4.9925923458724, -2.68077945324944, -2.59352725163617,
2.02704542198996, -115.650578709976, -27.2982158256338, -37.8537391639228,
43.2574538530341, 1.03630362417363, 1.27128035515822, -111.453375637402,
-16.1498545551005, -32.0567779488567, -14.2158062039774, -12.7265408441033,
14.4552340030648, -0.585924470701273, -29.2725817416404, -14.4761930694506,
-10.5411967306746, 38.1106281789558, 29.9561186051226, 5.80726901038645,
-0.980361745751139, 27.7640143766151, -11.4539988350286, -7.87890259507465,
-11.0164165425268, -14.671496103194, -21.8761126812378, -22.4177243924256,
-38.1473733135052, 10.9434594904922, 45.3757341781314, -18.9774126098487,
-8.66568056481869, -3.89551896276334, 2.34305751190445, 21.1184797201658,
-2.80106266579382, -2.13127112875359, -1.30488761540419, 3.52433963912613,
3.59774311813275, -1.57072913521284, 0.17549951904551, 4.49101332311171,
4.14251221995872, 9.44852080922671, -18.5351726039937, -7.42350363641328,
-4.62564402194394, -4.5136677142908, 3.50669871332958, 7.83229254342291,
1.97803316530571, -109.966915087567, 0.249080462434332, 0.411500902934364,
-4.32842271743313, -35.3580246389245, 0.471211838052437, -1.86781930165719,
-0.237138120660447, -12.3918391824992, 9.57167516936909, -2.15079530394908,
-0.15681768087188, 0.148139265555683, 0.65012500297816, 0.82970239607941,
0.98475483572049, 3.48468701581947, 3.45516644446294, 8.28283805412418,
9.5546080107533, 106.566326082132, -8.4059856673066, 136.702102750715,
-7.97509319346756, -9.65644617841705, -66.4363930169088, -72.0098875866613,
-24.3158796139066, -6.1988529506953, -13.7044921103796, 2.98196655360075,
27.705159582663, -2.6774142696737, -2.64104407255444, -3.01266615508847,
-15.4414872227932, -11.4932157142113, -3.05245054086814, 0.912643312835983,
0.376129346746173, 11.1387845048052, -7.11691433937843, 9.61060027882159,
-12.9765287694212, -5.67396777577145, -0.362964911940461, 7.15437079406064,
-0.809097105496422, 1.35224187333621, -1.23075264056478, -2.97134675299893,
-4.50835807234897, 4.36839894967037, -1.10797564355453, 2.42223049178652,
-1.67111895593451, 2.02613961608675, 12.1764092996962, 0.149218747547633,
-2.62695129351176, -1.44788286325, -0.869156660324555, -0.628698207215824,
-2.97110159051323, 1.89612173055238, -2.23996469444859, -0.24492418965564,
-14.7940925475231, -3.16254513667387, -1.52445292135167, -3.23100699247941,
1.55341327394996, -1.87035442833437, -10.550723496796, -1.59032323370544,
3.89321488983673, 0.90104711689204, 26.9477421102614, 0.346026397414036,
0.350734294086829, 0.114371620042446, -1.11520830232016, 0.371307471846682,
0.514956010112907, 3.49970995671813, 2.69870761352848, 2.69456927840329,
0.279003230202838, -0.0225711009518213, -1.63067119943349, -0.599693559777168,
-2.56052035234386, -0.846365082204427, -5.42273142828007, -0.607372870656234,
7.42844641587937, -3.50660448466716, -13.5314318519484, 41.6795464524761,
-33.5119663802369, -3.67599936282413, -13.0496154516734, 7.2316432610428,
3.30908108695209, 5.55275503969583, 3.85099158300275, -1.75597753662526,
9.07123637878036, -3.71450440697352, -2.82876372575196, 26.9334541462943,
-2.16901098952482, -4.80863436490251, -4.91493961086461, -3.63857345204952,
-3.63608261370314, -4.80503757527894, -3.55857146168661, -0.566768973352482,
-1.09179105417579, -4.75020279883772, -3.96107100582678, -3.8185226284532,
2.46998106156591, 3.79867882937243, -8.89003812162545, 14.6773858053186,
4.51965724318235, 0.680551662459277, -7.79647123577452, -1.84316898195118,
-1.77996273644294, -6.68347148060333, -1.68132994983378, 5.59757907996578,
-12.2857215270019, -15.5450216523343, -8.88175096187388, -15.1518675705718,
26.9069022226759, 5.81328310693763, -3.18165833285836, -4.13183220797559,
-2.29209372974387, -3.5717355365426, -3.96494332315338, -7.62459016232794,
-14.1184744077418, -8.80744232458462, -0.558188273327847, -2.15736663831821,
-0.302670424368334, -5.9666731908502, -2.91143679799658, 0.78193344751675,
-12.3730563990833, -11.2345797439574, 0.580719551998759, 5.50192512168596,
3.14362071385508, 4.42954409963619, -2.63594117852702, -4.67529303597675,
-8.49801360194862, -0.530431046698544, 3.62897613236058, -7.97511967956899,
0.77288784156167, -22.1871534520521, 17.46379541586, -2.81617618157617,
-4.81896806636922, -4.6609249091753, -11.3676003160432, -4.38548871151348,
-6.0696655207222, 3.42114071095143, 10.0298776712577, -3.7323480151313,
4.06029043013707, -0.774765238927718, 3.03849716672757, -9.58655960203042,
0.577044533244037, 0.912588696849156, -1.13372337503903, 0.0488029971462183,
0.201812215030437, -0.75545074368071, -12.3827058554518, -3.43598697198347,
3.26610839122928, 0.986772864305677, 6.32443180317462, 2.0181742152896,
-0.616998876325967, 1.98941690678587, 0.796388406110153, 3.39213613292779,
4.19845521046369, -1.63041961396707, 1.35354928613412, 1.33456998177465,
-2.19345310582715, -1.27023292659084, 0.458779324945803, 2.35649297440622,
0.472567239466579, -0.134201165275529, 0.422020168259809, -0.0774992104338274,
-5.10174927347748, -2.11045995214029, -2.91046480470787, -2.22008940343392,
-1.64820685253815, -1.13278568364395, -2.99297085126001, -0.541262894155119
)), row.names = c(NA, -278L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x000001a408b982e0>)
DATASET B
structure(list(DateTime = structure(c(1598861250, 1598866650,
1598872050, 1598877450, 1598861850, 1598867250, 1598872650, 1598878050,
1598862450, 1598867850, 1598873250, 1598878650, 1598857800, 1598863050,
1598868450, 1598873850, 1598879250, 1598859450, 1598864850, 1598870250,
1598875650, 1598881050, 1598860050, 1598865450, 1598870850, 1598876250,
1598881650), tzone = "", class = c("POSIXct", "POSIXt")), ID = c(1,
1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8), X1 = c(0.198140995, 0.19808281, 0.198024625,
0.19761733, 0.10382311, 0.1050256, 0.105277735, 0.105044995,
0.343487125, 0.343176805, 0.343215595, 0.34342894, -0.6956, 0.322579315,
0.32228839, 0.32201686, 0.32151259, 0.19816039, 0.1981216, 0.19788886,
0.1977337, 0.19738459, 0.104307985, 0.10405585, 0.105394105,
0.104967415, 0.104967415), X2 = c(15.62, 14.8, 14.6, 14.1, 13.07,
13, 12.9, 12.46, 14.23, 13.78, 13.6, 13.22, -50, 13.04, 13.1,
12.87, 12.49, 16.31, 14.99, 14.6, 14.29, 13.75, 13.98, 12.82,
13, 12.6, 11.97)), row.names = c(NA, -27L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x000001a408b982e0>)
I've used fuzzyjoin
for similar joins for time variables, which allows you to not only define what variables you want to join by, but apply a function to how you want to join on that variable:
fuzzyjoin::fuzzy_left_join(a, b,
by = c("ID" = "ID", "DATETIME" = "DATETIME"),
match_fun = list(`==`, \(x, y) difftime(x, y, units = "mins") <= 10))
Output:
DATETIME.x ID.x W DATETIME.y ID.y X1 X3
1 2020-12-02 18:02:01 1 0.25 2020-12-02 18:08:01 1 1.3 99.3
2 2020-12-02 19:06:21 1 0.35 <NA> NA NA NA
3 2020-12-02 18:12:08 2 0.44 2020-12-02 18:21:11 2 4.2 33.2
4 2020-12-03 10:03:03 3 0.98 2020-12-03 10:09:22 3 7.1 39.9
You could also use dplyr
's join_by()
within full_join
but you cant use functions in that so will need to create a temporary variable. Same result, just a bit more cumbersome:
a %>%
mutate(high = DATETIME + 10) %>%
full_join(b, join_by(ID == ID, high <= DATETIME)) %>%
select(-high)
Data:
a <- structure(list(DATETIME = structure(c(1606932121, 1606935981,
1606932728, 1606989783), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
ID = c(1L, 1L, 2L, 3L), W = c(0.25, 0.35, 0.44, 0.98)), row.names = c("1",
"2", "3", "4"), class = "data.frame")
b <- structure(list(DATETIME = structure(c(1606932481, 1606933271,
1606990162), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
ID = 1:3, X1 = c(1.3, 4.2, 7.1), X3 = c(99.3, 33.2, 39.9)), row.names = c(NA,
-3L), class = "data.frame")
merged <- structure(list(DATETIME.x = structure(c(1606932121, 1606935981,
1606932728, 1606989783), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
ID.x = c(1L, 1L, 2L, 3L), W = c(0.25, 0.35, 0.44, 0.98),
DATETIME.y = structure(c(1606932481, NA, 1606933271, 1606990162
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), ID.y = c(1L,
NA, 2L, 3L), X1 = c(1.3, NA, 4.2, 7.1), X3 = c(99.3, NA,
33.2, 39.9)), row.names = c(NA, -4L), class = "data.frame")