Example data are as follows:
library(dplyr)
library(lubridate)
library(tidyr)
set.seed(123)
dummy_data <- tibble(
focalbear_event = rep(c("event1", "event2"), each = 10),
timestamp_focal.bhv.change = c(
seq(ymd_hms("2023-09-22 10:00:00"), by = "1 min", length.out = 10), # Event 1 timestamps
seq(ymd_hms("2023-09-22 11:00:00"), by = "2 min", length.out = 10) # Event 2 with missing intervals
),
behavior = sample(c("Fishing", "Alertness", "Resting"), 20, replace = TRUE),
land_human_num = sample(0:5, 20, replace = TRUE),
num_boats = sample(0:3, 20, replace = TRUE)
)
dummy_data <- dummy_data %>%
slice(-c(5, 16)) %>% # Removing some rows to create gaps
bind_rows(tibble(focalbear_event = "event1", timestamp_focal.bhv.change = ymd_hms("2023-09-22 10:03:00"), behavior = "Fishing", land_human_num = 2, num_boats = 1)) # Adding a duplicate
I'd like each case to pluck 2 minute time intervals from the timestamps like such:
# Remove duplicate timestamps within the same minute for each group, keeping only the first occurrence
dummy_data <- dummy_data %>%
group_by(focalbear_event) %>%
arrange(timestamp_focal.bhv.change) %>%
distinct(floor_timestamp = floor_date(timestamp_focal.bhv.change, unit = "2 minutes"), .keep_all = TRUE) %>%
ungroup() %>%
select(-floor_timestamp)
# Function to expand timestamps for each group
expand_timestamps <- function(df) {
df %>%
group_by(focalbear_event) %>%
complete(timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change), max(timestamp_focal.bhv.change), by = "2 mins")) %>%
fill(everything(), .direction = "down") %>%
ungroup()
}
dummy_data <- expand_timestamps(dummy_data)
But there are 1 minute time intervals in some cases, rather than 2. Any idea why?
Based on the logic in your code, you can use:
library(dplyr)
library(lubridate)
library(tidyr)
dummy_data <- structure(list(focalbear_event = c("event1", "event1", "event1",
"event1", "event1", "event1", "event1", "event1", "event1", "event2",
"event2", "event2", "event2", "event2", "event2", "event2", "event2",
"event2", "event1"), timestamp_focal.bhv.change = structure(c(1695376800,
1695376860, 1695376920, 1695376980, 1695377100, 1695377160, 1695377220,
1695377280, 1695377340, 1695380400, 1695380520, 1695380640, 1695380760,
1695380880, 1695381120, 1695381240, 1695381360, 1695381480, 1695376980
), tzone = "UTC", class = c("POSIXct", "POSIXt")), behavior = c("Resting",
"Resting", "Resting", "Alertness", "Alertness", "Alertness",
"Alertness", "Resting", "Fishing", "Alertness", "Alertness",
"Fishing", "Alertness", "Resting", "Resting", "Resting", "Fishing",
"Fishing", "Fishing"), land_human_num = c(0, 4, 2, 1, 0, 5, 2,
3, 5, 0, 2, 4, 3, 1, 0, 0, 1, 2, 2), num_boats = c(3, 0, 2, 0,
3, 1, 0, 1, 0, 0, 3, 3, 2, 0, 0, 0, 2, 0, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -19L))
dummy_data |>
group_by(focalbear_event) |>
arrange(timestamp_focal.bhv.change) |>
distinct(timestamp_focal.bhv.change, .keep_all = TRUE) |>
complete(
timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change),
max(timestamp_focal.bhv.change),
by = "1 mins")) |>
fill(everything(), .direction = "down") |>
filter(minute(timestamp_focal.bhv.change) %% 2 == 0) |>
ungroup()
# # A tibble: 15 × 5
# focalbear_event timestamp_focal.bhv.change behavior land_human_num num_boats
# <chr> <dttm> <chr> <dbl> <dbl>
# 1 event1 2023-09-22 10:00:00 Resting 0 3
# 2 event1 2023-09-22 10:02:00 Resting 2 2
# 3 event1 2023-09-22 10:04:00 Alertness 1 0
# 4 event1 2023-09-22 10:06:00 Alertness 5 1
# 5 event1 2023-09-22 10:08:00 Resting 3 1
# 6 event2 2023-09-22 11:00:00 Alertness 0 0
# 7 event2 2023-09-22 11:02:00 Alertness 2 3
# 8 event2 2023-09-22 11:04:00 Fishing 4 3
# 9 event2 2023-09-22 11:06:00 Alertness 3 2
# 10 event2 2023-09-22 11:08:00 Resting 1 0
# 11 event2 2023-09-22 11:10:00 Resting 1 0
# 12 event2 2023-09-22 11:12:00 Resting 0 0
# 13 event2 2023-09-22 11:14:00 Resting 0 0
# 14 event2 2023-09-22 11:16:00 Fishing 1 2
# 15 event2 2023-09-22 11:18:00 Fishing 2 0