This question is a follow up to my other question, How to get tidy fill function to use groups when filling data rows.
I want to get one row per TIMESTAMP value. I want to fill in real data, by column, whether it's character, float, or whatever. If both TIMESTAMP rows have NA's, then I need NA returned. I want all columns returned.
A tiny portion of my data:
dupped_records <- structure(list(Station_Number = c("3999", "3999", "3999", "3999"
), Station_Name = c("Eagle 3NW Beta", "Eagle 3NW Beta", "Eagle 3NW Beta","Eagle 3NW
Beta"), NWSLI = c("EEMN1", "EEMN1", "EEMN1", "EEMN1"), Station_Legacy_Name = c("Rogers
Farm Tower", "Rogers Farm Tower", "Rogers Farm Tower", "Rogers Farm Tower"), Lat =
c(40.846994, 40.846994, 40.846994, 40.846994), Lon = c(-96.467199, -96.467199,
-96.467199, -96.467199), Elev_m = c("368.7", "368.7", "368.7", "368.7"), TIMESTAMP =
c("2021-07-28 20:05:00", "2021-07-28 20:05:00", "2021-07-28 20:06:00", "2021-07-28
20:06:00"), RECORD = c(18L, 18L, 19L, 19L), Ta_2m_Avg = c(34.3, NA, 34.26, NA), TaMax_2m = c(34.39,
NA, 34.35, NA), TaMaxTime_2m = c("2021-07-28 20:04:15", NA, "2021-07-28 20:05:05",
NA), TaMin_2m = c(34.25, NA, 34.16, NA), TaMinTime_2m = c("2021-07-28 20:04:45",
NA, "2021-07-28 20:05:50", NA), RH_2m_Avg = c(57.35, NA, 56.97,
NA), RHMax_2m = c(58.68, NA, 57.99, NA), RHMaxTime_2m = c("2021-07-28 20:04:30",
NA, "2021-07-28 20:05:40", NA), RHMin_2m = c(55.86, NA, 55.41,
NA), RHMinTime_2m = c("2021-07-28 20:04:45", NA, "2021-07-28 20:05:20",
NA), Dp_2m_Avg = c(24.68, NA, 24.53, NA), DpMax_2m = c(25.06,
NA, 24.77, NA), DpMaxTime_2m = c("2021-07-28 20:04:25", NA, "2021-07-28 20:05:40",
NA), DpMin_2m = c(24.19, NA, 24.06, NA), DpMinTime_2m = c("2021-07-28 20:04:45",
NA, "2021-07-28 20:05:20", NA), HeatIndex_2m_Avg = c(41.9, NA,
41.64, NA), HeatIndexMax_2m = c(42.51, NA, 41.95, NA), HeatIndexMaxTime_2m = c("2021-07-28 20:04:25",
NA, "2021-07-28 20:05:45", NA), WindChill_2m_Avg = c(37.65, NA,
37.77, NA), WindChillMin_2m = c(37.47, NA, 37.56, NA), WindChillMinTime_2m = c("2021-07-28 20:04:45",
NA, "2021-07-28 20:05:20", NA), WndAveSpd_3m = c(2.746, NA, 3.796,
NA), WndVecMagAve_3m = c(2.594, NA, 3.607, NA), WndAveDir_3m = c(166.5,
NA, 152.6, NA), WndAveDirSD_3m = c(19.03, NA, 18.05, NA), WndMaxSpd5s_3m = c(5.165,
NA, 5.498, NA), WndMaxSpd5sTime_3m = c("2021-07-28 20:05:00",
NA, "2021-07-28 20:05:05", NA), WndMax_5sec_Dir_3m = c(158.3,
NA, 162.8, NA), PresAvg_1pnt5m = c(972.3953, NA, 972.3508, NA
), PresMax_1pnt5m = c(972.4536, NA, 972.42, NA), PresMaxTime_1pnt5m = c("2021-07-28 20:04:25",
NA, "2021-07-28 20:05:10", NA), PresMin_1pnt5m = c(972.3868,
NA, 972.2872, NA), PresMinTime_1pnt5m = c("2021-07-28 20:04:35",
NA, "2021-07-28 20:05:50", NA), Solar_2m_Avg = c(771.1, NA, 776.1,
NA), Rain_1m_Tot = c(0, NA, 0, NA), Ts_bare_10cm_Avg = c(32.98,
NA, 33.01, NA), TsMax_bare_10cm = c(33, NA, 33.02, NA), TsMaxTime_bare_10cm = c("2021-07-28 20:05:00",
NA, "2021-07-28 20:06:00", NA), TsMin_bare_10cm = c(32.97, NA,
33, NA), TsMinTime_bare_10cm = c("2021-07-28 20:04:10", NA, "2021-07-28 20:05:05",
NA), BattVolts_Min = c(12.77, NA, 12.77, NA), LithBatt_Min = c(3.56,
NA, 3.56, NA), MaintMode = c(0L, NA, 0L, NA), WS_ms_S_WVT = c(NA,
3.082, NA, 3.614), SlrW_Avg = c(NA, 854, NA, 859), SlrW_Max = c(NA,
858, NA, 862), SlrW_TMx = c(NA, "2021-07-28 20:04:05", NA, "2021-07-28 20:06:00"
), SlrW = c(NA, 856, NA, 862), SlrW_Std = c(NA, 2.334, NA, 2.376
), WS_Dir = c(NA, 164L, NA, 128L), WS_ms_Avg = c(NA, 3.082, NA,
3.614), WS_ms_Max = c(NA, 5.17, NA, 5.71), WS_ms_TMx = c(NA,
"2021-07-28 20:05:00", NA, "2021-07-28 20:05:05"), WS_Diag = c(NA,
0L, NA, 0L), SmplsF = c(NA, 1L, NA, 1L), Diag1F = c(NA, 0L, NA,
0L), Diag2F = c(NA, 0L, NA, 0L), Diag4F = c(NA, 0L, NA, 0L),
Diag8F = c(NA, 0L, NA, 0L), Diag9F = c(NA, 0L, NA, 0L), Diag10F = c(NA,
0L, NA, 0L), NNDF = c(NA, 0L, NA, 0L), WindDir_D1_WVT = c(NA,
170.6, NA, 150.3), WindDir_SD1_WVT = c(NA, 16.43, NA, 14.95
), source = c("E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122",
"E:\\Data_Collected_withDups/3999/3999_1min_exp.csv-20230216_0122",
"E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122",
"E:\\Data_Collected_withDups/3999/3999_1min_exp.csv-20230216_0122"
)), class = "data.frame", row.names = 1886274:1886277)
The result I want is:
no_dupped_records <- structure(list(Station_Number = c(3999L, 3999L), Station_Name = c("Eagle 3NW Beta",
"Eagle 3NW Beta"), NWSLI = c("EEMN1", "EEMN1"), Station_Legacy_Name = c("Rogers Farm Tower",
"Rogers Farm Tower"), Lat = c(40.846994, 40.846994), Lon = c(-96.467199,
-96.467199), Elev_m = c(368.7, 368.7), TIMESTAMP = c("2021-07-28 20:05:00",
"2021-07-28 20:06:00"), RECORD = 18:19, Ta_2m_Avg = c(34.3, 34.26
), TaMax_2m = c(34.39, 34.35), TaMaxTime_2m = c("2021-07-28 20:04:15",
"2021-07-28 20:05:05"), TaMin_2m = c(34.25, 34.16), TaMinTime_2m = c("2021-07-28 20:04:45",
"2021-07-28 20:05:50"), RH_2m_Avg = c(57.35, 56.97), RHMax_2m = c(58.68,
57.99), RHMaxTime_2m = c("2021-07-28 20:04:30", "2021-07-28 20:05:40"
), RHMin_2m = c(55.86, 55.41), RHMinTime_2m = c("2021-07-28 20:04:45",
"2021-07-28 20:05:20"), Dp_2m_Avg = c(24.68, 24.53), DpMax_2m = c(25.06,
24.77), DpMaxTime_2m = c("2021-07-28 20:04:25", "2021-07-28 20:05:40"
), DpMin_2m = c(24.19, 24.06), DpMinTime_2m = c("2021-07-28 20:04:45",
"2021-07-28 20:05:20"), HeatIndex_2m_Avg = c(41.9, 41.64), HeatIndexMax_2m = c(42.51,
41.95), HeatIndexMaxTime_2m = c("2021-07-28 20:04:25", "2021-07-28 20:05:45"
), WindChill_2m_Avg = c(37.65, 37.77), WindChillMin_2m = c(37.47,
37.56), WindChillMinTime_2m = c("2021-07-28 20:04:45", "2021-07-28 20:05:20"
), WndAveSpd_3m = c(2.746, 3.796), WndVecMagAve_3m = c(2.594,
3.607), WndAveDir_3m = c(166.5, 152.6), WndAveDirSD_3m = c(19.03,
18.05), WndMaxSpd5s_3m = c(5.165, 5.498), WndMaxSpd5sTime_3m = c("2021-07-28 20:05:00",
"2021-07-28 20:05:05"), WndMax_5sec_Dir_3m = c(158.3, 162.8),
PresAvg_1pnt5m = c(972.3953, 972.3508), PresMax_1pnt5m = c(972.4536,
972.42), PresMaxTime_1pnt5m = c("2021-07-28 20:04:25", "2021-07-28 20:05:10"
), PresMin_1pnt5m = c(972.3868, 972.2872), PresMinTime_1pnt5m = c("2021-07-28 20:04:35",
"2021-07-28 20:05:50"), Solar_2m_Avg = c(771.1, 776.1), Rain_1m_Tot = c(0L,
0L), Ts_bare_10cm_Avg = c(32.98, 33.01), TsMax_bare_10cm = c(33,
33.02), TsMaxTime_bare_10cm = c("2021-07-28 20:05:00", "2021-07-28 20:06:00"
), TsMin_bare_10cm = c(32.97, 33), TsMinTime_bare_10cm = c("2021-07-28 20:04:10",
"2021-07-28 20:05:05"), BattVolts_Min = c(12.77, 12.77),
LithBatt_Min = c(3.56, 3.56), MaintMode = c(0L, 0L), WS_ms_S_WVT = c(3.082,
3.614), SlrW_Avg = c(854L, 859L), SlrW_Max = c(858L, 862L
), SlrW_TMx = c("2021-07-28 20:04:05", "2021-07-28 20:06:00"
), SlrW = c(856L, 862L), SlrW_Std = c(2.334, 2.376), WS_Dir = c(164L,
128L), WS_ms_Avg = c(3.082, 3.614), WS_ms_Max = c(5.17, 5.71
), WS_ms_TMx = c("2021-07-28 20:05:00", "2021-07-28 20:05:05"
), WS_Diag = c(0L, 0L), SmplsF = c(1L, 1L), Diag1F = c(0L,
0L), Diag2F = c(0L, 0L), Diag4F = c(0L, 0L), Diag8F = c(0L,
0L), Diag9F = c(0L, 0L), Diag10F = c(0L, 0L), NNDF = c(0L,
0L), WindDir_D1_WVT = c(170.6, 150.3), WindDir_SD1_WVT = c(16.43,
14.95), source = c("E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122",
"E:\\Data_Collected_withDups/3999/3999_1min.csv-20230216_0122"
)), class = "data.frame", row.names = c(NA, -2L))
It looks like I need to use mutate somehow to get what I want, based on the answers to my previous question. But how? I need all columns returned.
Here are two statements I thought might work. I get the error that everything must be used within a selecting function.
dupped_records |> group_by(TIMESTAMP) |> mutate_all(fill(everything(), .direction = 'up'))
This statement does nothing. Why?
dupped_records |> group_by(TIMESTAMP) |> mutate(across(fill(everything(), .direction = 'up')))
Anyway, I appreciate your help, as always. A quick EDIT: Is there a way to do this with dplyr::coalesce?
library(tidyverse)
dupped_records |>
mutate(TIMESTAMP = ymd_hms(TIMESTAMP)) |>
summarise(across(Station_Number:RECORD, first), across(Ta_2m_Avg:WindDir_SD1_WVT, na.omit), .by = "TIMESTAMP")
TIMESTAMP Station_Number Station_Name NWSLI
1 2021-07-28 20:05:00 3999 Eagle 3NW Beta EEMN1
2 2021-07-28 20:06:00 3999 Eagle 3NW Beta EEMN1
Station_Legacy_Name Lat Lon Elev_m RECORD Ta_2m_Avg TaMax_2m
1 Rogers \n Farm Tower 40.84699 -96.4672 368.7 18 34.30 34.39
2 Rogers Farm Tower 40.84699 -96.4672 368.7 19 34.26 34.35
TaMaxTime_2m TaMin_2m TaMinTime_2m RH_2m_Avg RHMax_2m
1 2021-07-28 20:04:15 34.25 2021-07-28 20:04:45 57.35 58.68
2 2021-07-28 20:05:05 34.16 2021-07-28 20:05:50 56.97 57.99
RHMaxTime_2m RHMin_2m RHMinTime_2m Dp_2m_Avg DpMax_2m
1 2021-07-28 20:04:30 55.86 2021-07-28 20:04:45 24.68 25.06
2 2021-07-28 20:05:40 55.41 2021-07-28 20:05:20 24.53 24.77
DpMaxTime_2m DpMin_2m DpMinTime_2m HeatIndex_2m_Avg
1 2021-07-28 20:04:25 24.19 2021-07-28 20:04:45 41.90
2 2021-07-28 20:05:40 24.06 2021-07-28 20:05:20 41.64
HeatIndexMax_2m HeatIndexMaxTime_2m WindChill_2m_Avg WindChillMin_2m
1 42.51 2021-07-28 20:04:25 37.65 37.47
2 41.95 2021-07-28 20:05:45 37.77 37.56
WindChillMinTime_2m WndAveSpd_3m WndVecMagAve_3m WndAveDir_3m WndAveDirSD_3m
1 2021-07-28 20:04:45 2.746 2.594 166.5 19.03
2 2021-07-28 20:05:20 3.796 3.607 152.6 18.05
WndMaxSpd5s_3m WndMaxSpd5sTime_3m WndMax_5sec_Dir_3m PresAvg_1pnt5m
1 5.165 2021-07-28 20:05:00 158.3 972.3953
2 5.498 2021-07-28 20:05:05 162.8 972.3508
PresMax_1pnt5m PresMaxTime_1pnt5m PresMin_1pnt5m PresMinTime_1pnt5m
1 972.4536 2021-07-28 20:04:25 972.3868 2021-07-28 20:04:35
2 972.4200 2021-07-28 20:05:10 972.2872 2021-07-28 20:05:50
Solar_2m_Avg Rain_1m_Tot Ts_bare_10cm_Avg TsMax_bare_10cm TsMaxTime_bare_10cm
1 771.1 0 32.98 33.00 2021-07-28 20:05:00
2 776.1 0 33.01 33.02 2021-07-28 20:06:00
TsMin_bare_10cm TsMinTime_bare_10cm BattVolts_Min LithBatt_Min MaintMode
1 32.97 2021-07-28 20:04:10 12.77 3.56 0
2 33.00 2021-07-28 20:05:05 12.77 3.56 0
WS_ms_S_WVT SlrW_Avg SlrW_Max SlrW_TMx SlrW SlrW_Std WS_Dir
1 3.082 854 858 2021-07-28 20:04:05 856 2.334 164
2 3.614 859 862 2021-07-28 20:06:00 862 2.376 128
WS_ms_Avg WS_ms_Max WS_ms_TMx WS_Diag SmplsF Diag1F Diag2F Diag4F
1 3.082 5.17 2021-07-28 20:05:00 0 1 0 0 0
2 3.614 5.71 2021-07-28 20:05:05 0 1 0 0 0
Diag8F Diag9F Diag10F NNDF WindDir_D1_WVT WindDir_SD1_WVT
1 0 0 0 0 170.6 16.43
2 0 0 0 0 150.3 14.95