I'm new to R and currently struggling with the following problem.
My dataframe looks like this (note that there are more columns and rows):
My goal is, to subset the dataframe so i only get 1 row per hour. Normally I should get 24 data rows per days.
I tried the following code:
sorting_test <- NULL
sorting_test <- transform_data_test %>%
mutate(Date = ymd_hms(date), dt = as_date(date), hr = hour(date)) %>%
group_by(dt, hr) %>%
filter(Date == min(Date)) %>%
ungroup()
However, my result is missing complete data for some days and hours. result
I checked multiple slack threads but couldntfind a proper solution. What am I missing?
Edit: As requested the dput of my dataframe
structure(list(event.id = c(8384310912, 8384310913, 8384310914,
8384310915, 8384310916, 8384310917), visible = c("true", "true",
"true", "true", "true", "true"), date = c("2019-01-02 14:00:23.000",
"2019-01-02 19:00:41.000", "2019-01-03 12:40:23.000", "2019-01-03 13:20:07.000",
"2019-01-03 13:40:07.000", "2019-01-03 14:00:06.000"), long = c(-5.8788868,
-5.9539697, -5.8788406, -5.8780982, -5.8787327, -5.8787414),
lat = c(37.2227809, 37.1668986, 37.2230289, 37.2221521, 37.2229884,
37.223007), bar.barometric.pressure = c(NA, NA, NA, NA, NA,
NA), data.decoding.software = c(8L, 8L, 8L, 8L, 8L, 8L),
eobs.activity = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), eobs.activity.samples = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), eobs.battery.voltage = c(3706L, 3701L, 3725L, 3735L, 3730L,
3730L), eobs.fix.battery.voltage = c(3671L, 3666L, 3696L,
3710L, 3706L, 3706L), eobs.horizontal.accuracy.estimate = c(5.12,
5.38, 12.8, 7.94, 7.42, 7.17), eobs.key.bin.checksum = c(1795615137,
3161364829, 1830803994, 2772018959, 2497038035, 3520440277
), eobs.speed.accuracy.estimate = c(0.28, 0.3, 0.27, 0.25,
0.35, 0.42), eobs.start.timestamp = c("2019-01-02 13:59:57.000",
"2019-01-02 19:00:05.000", "2019-01-03 12:39:57.000", "2019-01-03 13:20:00.000",
"2019-01-03 13:40:00.000", "2019-01-03 14:00:00.000"), eobs.status = c("A",
"A", "A", "A", "A", "A"), eobs.temperature = c(18L, 11L,
13L, 15L, 14L, 15L), eobs.type.of.fix = c(3L, 3L, 3L, 3L,
3L, 3L), eobs.used.time.to.get.fix = c(25L, 35L, 25L, 6L,
6L, 5L), gps.dop = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), gps.satellite.count = c(NA, NA, NA,
NA, NA, NA), ground.speed = c(0.05, 0.03, 0.07, 10.51, 0.16,
0.18), heading = c(0, 0, 0, 346.03, 208.4, 205.13), height.above.ellipsoid = c(108.3,
43.8, 106.9, 114.1, 106, 114.7), import.marked.outlier = c("false",
"false", "false", "false", "false", "false"), mag.magnetic.field.raw.x = c(NA,
NA, NA, NA, NA, NA), mag.magnetic.field.raw.y = c(NA, NA,
NA, NA, NA, NA), mag.magnetic.field.raw.z = c(NA, NA, NA,
NA, NA, NA), manually.marked.outlier = c(NA, NA, NA, NA,
NA, NA), orientation.quaternion.raw.w = c(NA, NA, NA, NA,
NA, NA), orientation.quaternion.raw.x = c(NA, NA, NA, NA,
NA, NA), orientation.quaternion.raw.y = c(NA, NA, NA, NA,
NA, NA), orientation.quaternion.raw.z = c(NA, NA, NA, NA,
NA, NA), sensor.type = c("gps", "gps", "gps", "gps", "gps",
"gps"), individual.taxon.canonical.name = c("Ciconia ciconia",
"Ciconia ciconia", "Ciconia ciconia", "Ciconia ciconia",
"Ciconia ciconia", "Ciconia ciconia"), tag.local.identifier = c(3907L,
3907L, 3907L, 3907L, 3907L, 3907L), id = c("Mirabell + / DER AN910 (eobs 3907)",
"Mirabell + / DER AN910 (eobs 3907)", "Mirabell + / DER AN910 (eobs 3907)",
"Mirabell + / DER AN910 (eobs 3907)", "Mirabell + / DER AN910 (eobs 3907)",
"Mirabell + / DER AN910 (eobs 3907)"), study.name = c("LifeTrack White Stork SW Germany",
"LifeTrack White Stork SW Germany", "LifeTrack White Stork SW Germany",
"LifeTrack White Stork SW Germany", "LifeTrack White Stork SW Germany",
"LifeTrack White Stork SW Germany"), year = c("2019", "2019",
"2019", "2019", "2019", "2019"), burst = c("Mirabell DER AN910 (eobs 3907) 2019",
"Mirabell DER AN910 (eobs 3907) 2019", "Mirabell DER AN910 (eobs 3907) 2019",
"Mirabell DER AN910 (eobs 3907) 2019", "Mirabell DER AN910 (eobs 3907) 2019",
"Mirabell DER AN910 (eobs 3907) 2019")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
burst = "Mirabell DER AN910 (eobs 3907) 2019", .rows = structure(list(
1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
Maybe distinct
helps?
library(lubridate)
library(dplyr)
transform_data_test %>%
mutate(Date = ymd_hms(date), dt = as_date(date), hr = hour(date)) %>%
group_by(burst) %>%
distinct(dt, hr, .keep_all = T)