When I try and filter a df by an exact time, not between two different times it produces a NA. I am trying to create a variable in df1 that is based on information in df2.
Here is my data
dput(df2)
structure(list(Time = structure(c(1647531450.72, 1647531451.757,
1647531452.794, 1647531453.83, 1647531454.867, 1647531455.818,
1647531456.854, 1647531457.891, 1647531458.928, 1647531459.878,
1647531460.915, 1647531461.952, 1647531462.902, 1647531463.939,
1647531464.976, 1647531466.013, 1647531467.05, 1647531468, 1647531469.037,
1647531470.074, 1647531471.11, 1647531472.147, 1647531473.098,
1647531474.134, 1647531475.171, 1647531476.208, 1647531477.245,
1647531478.195, 1647531479.232, 1647531480.269, 1647531481.306,
1647531482.342, 1647531483.293, 1647531484.33, 1647531485.366,
1647531486.317, 1647531487.354, 1647531488.39, 1647531489.427,
1647531490.378, 1647531491.414, 1647531492.451, 1647531493.488,
1647531494.438, 1647531495.475, 1647531496.512, 1647531497.549,
1647531498.586, 1647531499.536, 1647531500.573, 1647531501.61,
1647531502.56, 1647531503.597, 1647531504.634, 1647531505.67,
1647531506.621, 1647531507.658, 1647531508.694, 1647531509.645
), tzone = "", class = c("POSIXct", "POSIXt")), LAT = c(17.8799454,
17.8799729, 17.8799952, 17.8800159, 17.8800416, 17.8800708, 17.8801,
17.8801292, 17.8801567, 17.8801877, 17.8802237, 17.8802581, 17.8802873,
17.8803148, 17.8803474, 17.8803818, 17.8804161, 17.8804471, 17.8804763,
17.8805089, 17.8805381, 17.880569, 17.8806034, 17.880636, 17.8806721,
17.8807048, 17.8807374, 17.8808061, 17.8808405, 17.8808783, 17.8808783,
17.8809556, 17.8809968, 17.8810346, 17.8810724, 17.8810724, 17.8811497,
17.8811892, 17.8812288, 17.88127, 17.8813112, 17.8813524, 17.8813954,
17.8814383, 17.8814813, 17.8815208, 17.8815603, 17.8815964, 17.8816359,
17.8816737, 17.8817132, 17.8817562, 17.8817974, 17.8818438, 17.8818885,
17.8819314, 17.8819726, 17.8820104, 17.88205), LON = c(-62.8613544,
-62.8613338, -62.8613063, -62.8612857, -62.861265, -62.8612513,
-62.8612307, -62.8612101, -62.8611894, -62.8611757, -62.8611688,
-62.8611482, -62.8611276, -62.8611139, -62.8611001, -62.8610795,
-62.8610658, -62.861052, -62.8610314, -62.8610176, -62.860997,
-62.8609833, -62.8609627, -62.8609489, -62.8609352, -62.8609214,
-62.8609008, -62.8608733, -62.8608665, -62.8608596, -62.8608596,
-62.8608459, -62.860839, -62.8608321, -62.8608252, -62.8608252,
-62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608115,
-62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608046,
-62.8607977, -62.8607909, -62.860784, -62.8607771, -62.8607771,
-62.8607771, -62.8607771, -62.8607771, -62.8607771, -62.8607703,
-62.8607634, -62.8607496, -62.8607428)), class = "data.frame", row.names = c(NA,
-59L))
and the df containing the information I want to filter by
dput(df1)
structure(list(date = structure(19068, class = "Date"), RaceStartTime = structure(1647531480, tzone = "", class = c("POSIXct",
"POSIXt"))), class = "data.frame", row.names = "event.2")
I have tried the following
df1$lon <- df2$LON[match(df1$RaceStartTime, df2$Time)]
I have also tried
df1$lon <- df2%>%
filter(Time == df1$RaceStartTime)
Both of these produce empty rows, can some one point out the obvious mistake?!
EDIT: The structure appears the same
str(df1$RaceStartTime)
POSIXct[1:1], format: "2022-03-17 15:38:00"
str(df2$Time)
POSIXct[1:59], format: "2022-03-17 15:37:30"
Thanks
POSIXct format by default only prints whole seconds, but its underlying representation can contain fractional seconds (as your data in df2
does).
You can remove the fractional seconds by doing:
df2$Time <- lubridate::floor_date(df2$Time)
So now you get:
df2%>%
filter(Time == df1$RaceStartTime)
#> Time LAT LON
#> 1 2022-03-17 15:38:00 17.88088 -62.86086