Search code examples
rdplyrtidyrlubridate

Reformat data to include 2 minute time intervals


Example data are as follows:

library(dplyr)
library(lubridate)
library(tidyr)

set.seed(123)
dummy_data <- tibble(
  focalbear_event = rep(c("event1", "event2"), each = 10),
  timestamp_focal.bhv.change = c(
    seq(ymd_hms("2023-09-22 10:00:00"), by = "1 min", length.out = 10),  # Event 1 timestamps
    seq(ymd_hms("2023-09-22 11:00:00"), by = "2 min", length.out = 10)   # Event 2 with missing intervals
  ),
  behavior = sample(c("Fishing", "Alertness", "Resting"), 20, replace = TRUE),
  land_human_num = sample(0:5, 20, replace = TRUE),
  num_boats = sample(0:3, 20, replace = TRUE)
)

dummy_data <- dummy_data %>%
  slice(-c(5, 16)) %>%  # Removing some rows to create gaps
  bind_rows(tibble(focalbear_event = "event1", timestamp_focal.bhv.change = ymd_hms("2023-09-22 10:03:00"), behavior = "Fishing", land_human_num = 2, num_boats = 1))  # Adding a duplicate

I'd like each case to pluck 2 minute time intervals from the timestamps like such:

# Remove duplicate timestamps within the same minute for each group, keeping only the first occurrence
dummy_data <- dummy_data %>%
  group_by(focalbear_event) %>%
  arrange(timestamp_focal.bhv.change) %>%
  distinct(floor_timestamp = floor_date(timestamp_focal.bhv.change, unit = "2 minutes"), .keep_all = TRUE) %>%
  ungroup() %>%
  select(-floor_timestamp)

# Function to expand timestamps for each group
expand_timestamps <- function(df) {
  df %>%
    group_by(focalbear_event) %>%
    complete(timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change), max(timestamp_focal.bhv.change), by = "2 mins")) %>%
    fill(everything(), .direction = "down") %>%
    ungroup()
}


dummy_data <- expand_timestamps(dummy_data)

But there are 1 minute time intervals in some cases, rather than 2. Any idea why?


Solution

  • Based on the logic in your code, you can use:

    library(dplyr)
    library(lubridate)
    library(tidyr)
    
    dummy_data <- structure(list(focalbear_event = c("event1", "event1", "event1", 
    "event1", "event1", "event1", "event1", "event1", "event1", "event2", 
    "event2", "event2", "event2", "event2", "event2", "event2", "event2", 
    "event2", "event1"), timestamp_focal.bhv.change = structure(c(1695376800, 
    1695376860, 1695376920, 1695376980, 1695377100, 1695377160, 1695377220, 
    1695377280, 1695377340, 1695380400, 1695380520, 1695380640, 1695380760, 
    1695380880, 1695381120, 1695381240, 1695381360, 1695381480, 1695376980
    ), tzone = "UTC", class = c("POSIXct", "POSIXt")), behavior = c("Resting", 
    "Resting", "Resting", "Alertness", "Alertness", "Alertness", 
    "Alertness", "Resting", "Fishing", "Alertness", "Alertness", 
    "Fishing", "Alertness", "Resting", "Resting", "Resting", "Fishing", 
    "Fishing", "Fishing"), land_human_num = c(0, 4, 2, 1, 0, 5, 2, 
    3, 5, 0, 2, 4, 3, 1, 0, 0, 1, 2, 2), num_boats = c(3, 0, 2, 0, 
    3, 1, 0, 1, 0, 0, 3, 3, 2, 0, 0, 0, 2, 0, 1)), class = c("tbl_df", 
    "tbl", "data.frame"), row.names = c(NA, -19L))
    
    
    dummy_data |>
      group_by(focalbear_event) |>
      arrange(timestamp_focal.bhv.change) |>
      distinct(timestamp_focal.bhv.change, .keep_all = TRUE) |>
      complete(
        timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change),
                                         max(timestamp_focal.bhv.change),
                                         by = "1 mins")) |>
      fill(everything(), .direction = "down") |>
      filter(minute(timestamp_focal.bhv.change) %% 2 == 0) |>
      ungroup()
    
    # # A tibble: 15 × 5
    #    focalbear_event timestamp_focal.bhv.change behavior  land_human_num num_boats
    #    <chr>           <dttm>                     <chr>              <dbl>     <dbl>
    #  1 event1          2023-09-22 10:00:00        Resting                0         3
    #  2 event1          2023-09-22 10:02:00        Resting                2         2
    #  3 event1          2023-09-22 10:04:00        Alertness              1         0
    #  4 event1          2023-09-22 10:06:00        Alertness              5         1
    #  5 event1          2023-09-22 10:08:00        Resting                3         1
    #  6 event2          2023-09-22 11:00:00        Alertness              0         0
    #  7 event2          2023-09-22 11:02:00        Alertness              2         3
    #  8 event2          2023-09-22 11:04:00        Fishing                4         3
    #  9 event2          2023-09-22 11:06:00        Alertness              3         2
    # 10 event2          2023-09-22 11:08:00        Resting                1         0
    # 11 event2          2023-09-22 11:10:00        Resting                1         0
    # 12 event2          2023-09-22 11:12:00        Resting                0         0
    # 13 event2          2023-09-22 11:14:00        Resting                0         0
    # 14 event2          2023-09-22 11:16:00        Fishing                1         2
    # 15 event2          2023-09-22 11:18:00        Fishing                2         0