Is there an R package that provides grouping of dates and times for all of the typical time units (seconds, minutes, etc.)?
Or put another way: how would I truncate to multiples a particular time unit?
I often need to group responses to HTTP requests into 15 second intervals.
Function lubridate::floor_date()
and lubridate::ceiling_date()
got me started, but
I came up with something based on the modulo (%%
), but it feels like I'm reinventing the wheel regarding operating on atomic time components.
library(magrittr)
group_date <- function(
x,
interval = 15,
unit = c(
"seconds",
"minutes",
"hours",
"days",
"weeks",
"months",
"bimonths",
"quarters",
"seasons",
"halfyears",
"years"
)
) {
# Validate units:
unit <- match.arg(unit)
# Possibly base units on "basic units" as {lubridate} does:
# parsed_unit <- lubridate:::parse_period_unit(unit)
# n <- parsed_unit$n
# basic_unit <- lubridate:::standardise_period_names(parsed_unit$unit)
if (unit %in% c("bimonths", "halfyears", "season")) {
stop(stringr::str_glue("Unit '{unit}' not supported yet"))
}
# No clue how these would need to be handled yet
# Extract unit value:
unit_value <- dplyr::case_when(
unit == "seconds" ~ as.numeric(lubridate::second(x)),
unit == "minutes" ~ as.numeric(lubridate::minute(x)),
unit == "hours" ~ as.numeric(lubridate::hour(x)),
unit == "days" ~ as.numeric(lubridate::day(x)),
unit == "weeks" ~ as.numeric(lubridate::isoweek(x)),
unit == "months" ~ as.numeric(lubridate::month(x)),
unit == "quarters" ~ as.numeric(lubridate::quarter(x)),
unit == "year" ~ as.numeric(lubridate::year(x))
)
offset_factor <- dplyr::case_when(
unit == "seconds" ~ 1,
unit == "minutes" ~ 60,
unit == "hours" ~ 60 * 60,
unit == "days" ~ 60 * 60 * 24,
unit == "weeks" ~ NA_real_, # Seconds per week -> no clue how to do that,
unit == "months" ~ NA_real_, # Seconds per month -> no clue how to do that
unit == "quarters" ~ NA_real_, # Seconds per quarter -> no clue how to do that
unit == "year" ~ NA_real_ # Seconds per year -> no clue how to do that
)
# Calculate time offset to lower group boundary:
time_offset <- unit_value %% interval
# Apply offset:
x - (time_offset * offset_factor)
}
x <- c(
"2020-01-31 13:01:14",
"2020-01-31 13:01:15",
"2020-01-31 13:01:16",
"2020-01-31 13:01:29",
"2020-01-31 13:01:30",
"2020-01-31 13:01:31",
"2020-01-31 13:01:44",
"2020-01-31 13:01:45",
"2020-01-31 13:01:46",
"2020-01-31 13:01:59",
"2020-01-31 13:02:00",
"2020-01-31 13:02:01"
) %>%
lubridate::ymd_hms()
x %>% group_date()
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:15 UTC"
#> [3] "2020-01-31 13:01:15 UTC" "2020-01-31 13:01:15 UTC"
#> [5] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [7] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:45 UTC"
#> [9] "2020-01-31 13:01:45 UTC" "2020-01-31 13:01:45 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
x %>% group_date(30)
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [3] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [5] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [7] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [9] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
x %>% group_date(45)
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [3] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [5] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [7] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:45 UTC"
#> [9] "2020-01-31 13:01:45 UTC" "2020-01-31 13:01:45 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
x %>% group_date(60)
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [3] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [5] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [7] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [9] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
x <- c(
"2020-01-31 13:04:00",
"2020-01-31 13:05:00",
"2020-01-31 13:06:00",
"2020-01-31 13:29:00",
"2020-01-31 13:30:00",
"2020-01-31 13:31:00",
"2020-01-31 13:44:00",
"2020-01-31 13:45:00",
"2020-01-31 13:46:00"
) %>%
lubridate::ymd_hms()
x %>% group_date(15, "minutes")
#> [1] "2020-01-31 13:00:00 UTC" "2020-01-31 13:00:00 UTC"
#> [3] "2020-01-31 13:00:00 UTC" "2020-01-31 13:15:00 UTC"
#> [5] "2020-01-31 13:30:00 UTC" "2020-01-31 13:30:00 UTC"
#> [7] "2020-01-31 13:30:00 UTC" "2020-01-31 13:45:00 UTC"
#> [9] "2020-01-31 13:45:00 UTC"
x %>% group_date(30, "minutes")
#> [1] "2020-01-31 13:00:00 UTC" "2020-01-31 13:00:00 UTC"
#> [3] "2020-01-31 13:00:00 UTC" "2020-01-31 13:00:00 UTC"
#> [5] "2020-01-31 13:30:00 UTC" "2020-01-31 13:30:00 UTC"
#> [7] "2020-01-31 13:30:00 UTC" "2020-01-31 13:30:00 UTC"
#> [9] "2020-01-31 13:30:00 UTC"
x %>% group_date(45, "minutes")
#> [1] "2020-01-31 13:00:00 UTC" "2020-01-31 13:00:00 UTC"
#> [3] "2020-01-31 13:00:00 UTC" "2020-01-31 13:00:00 UTC"
#> [5] "2020-01-31 13:00:00 UTC" "2020-01-31 13:00:00 UTC"
#> [7] "2020-01-31 13:00:00 UTC" "2020-01-31 13:45:00 UTC"
#> [9] "2020-01-31 13:45:00 UTC"
Just came across hms::trunc_hms()
.
Seems to give me what I need for secs = 15
and secs = 30
, so it would solve my immediate problem. But I can't see how it would work for time units other than seconds:
library(magrittr)
x <- c(
"2020-01-31 13:01:14",
"2020-01-31 13:01:15",
"2020-01-31 13:01:16",
"2020-01-31 13:01:29",
"2020-01-31 13:01:30",
"2020-01-31 13:01:31",
"2020-01-31 13:01:44",
"2020-01-31 13:01:45",
"2020-01-31 13:01:46",
"2020-01-31 13:01:59",
"2020-01-31 13:02:00",
"2020-01-31 13:02:01"
) %>%
lubridate::ymd_hms()
x %>% hms::trunc_hms(15)
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:15 UTC"
#> [3] "2020-01-31 13:01:15 UTC" "2020-01-31 13:01:15 UTC"
#> [5] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [7] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:45 UTC"
#> [9] "2020-01-31 13:01:45 UTC" "2020-01-31 13:01:45 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
x %>% hms::trunc_hms(30)
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [3] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [5] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [7] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [9] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
x %>% hms::trunc_hms(45)
#> [1] "2020-01-31 13:00:45 UTC" "2020-01-31 13:00:45 UTC"
#> [3] "2020-01-31 13:00:45 UTC" "2020-01-31 13:00:45 UTC"
#> [5] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [7] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [9] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
#> [11] "2020-01-31 13:01:30 UTC" "2020-01-31 13:01:30 UTC"
x %>% hms::trunc_hms(60)
#> [1] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [3] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [5] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [7] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [9] "2020-01-31 13:01:00 UTC" "2020-01-31 13:01:00 UTC"
#> [11] "2020-01-31 13:02:00 UTC" "2020-01-31 13:02:00 UTC"
Looking into the suggestion to use cut()
x %>%
tibble::enframe() %>%
dplyr::mutate(
grp = value %>% group_date(15)
) %>%
dplyr::group_by(
grp_2 = cut(as.POSIXct(value, '%Y-%m-%d %H:%M:%S'), '15 secs')
)
# # A tibble: 12 x 4
# # Groups: grp [4]
# name value grp_2 grp
# <int> <dttm> <dttm> <fct>
# 1 1 2020-01-31 13:01:14.000 2020-01-31 13:01:00.000 2020-01-31 13:01:14
# 2 2 2020-01-31 13:01:15.000 2020-01-31 13:01:15.000 2020-01-31 13:01:14
# 3 3 2020-01-31 13:01:16.000 2020-01-31 13:01:15.000 2020-01-31 13:01:14
# 4 4 2020-01-31 13:01:29.000 2020-01-31 13:01:15.000 2020-01-31 13:01:29
# 5 5 2020-01-31 13:01:30.000 2020-01-31 13:01:30.000 2020-01-31 13:01:29
# 6 6 2020-01-31 13:01:31.000 2020-01-31 13:01:30.000 2020-01-31 13:01:29
# 7 7 2020-01-31 13:01:44.000 2020-01-31 13:01:30.000 2020-01-31 13:01:44
# 8 8 2020-01-31 13:01:45.000 2020-01-31 13:01:45.000 2020-01-31 13:01:44
# 9 9 2020-01-31 13:01:46.000 2020-01-31 13:01:45.000 2020-01-31 13:01:44
# 10 10 2020-01-31 13:01:59.000 2020-01-31 13:01:45.000 2020-01-31 13:01:59
# 11 11 2020-01-31 13:02:00.000 2020-01-31 13:02:00.000 2020-01-31 13:01:59
# 12 12 2020-01-31 13:02:01.000 2020-01-31 13:02:00.000 2020-01-31 13:01:59
Created on 2020-01-31 by the reprex package (v0.3.0)
How about this simple function to round to any given number of seconds, minutes, hours, days, or weeks? In case you want to start your chunks at a specific date and time, there is an optional default origin time. The arguments to "units" are matched so you can abbreviate. It defaults to seconds
time_group <- function(times, intervals, since = as.POSIXct("2000-01-01"),
units = c("secs", "mins", "hours", "days", "weeks"))
{
all_units <- c("secs", "mins", "hours", "days", "weeks")
units <- match.arg(units, all_units)
intervals <- intervals * c(1, 60, 3600, 86400, 604800)[match(units, all_units)]
cuts <- intervals * floor(as.numeric(difftime(times, since, units = "secs"))/intervals)
return(as.POSIXct(cuts, origin = since))
}
This allows you to do this:
# Units default to seconds so this groups by 15 seconds at a time
time_group(x, 15)
#> [1] "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:15 GMT" "2020-01-31 13:01:15 GMT"
#> [4] "2020-01-31 13:01:15 GMT" "2020-01-31 13:01:30 GMT" "2020-01-31 13:01:30 GMT"
#> [7] "2020-01-31 13:01:30 GMT" "2020-01-31 13:01:45 GMT" "2020-01-31 13:01:45 GMT"
#> [10] "2020-01-31 13:01:45 GMT" "2020-01-31 13:02:00 GMT" "2020-01-31 13:02:00 GMT"
# We have used argument matching so we can abbreviate minutes to "m"
time_group(x, 1, units = "m")
#> [1] "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:00 GMT"
#> [4] "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:00 GMT"
#> [7] "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:00 GMT" "2020-01-31 13:01:00 GMT"
#> [10] "2020-01-31 13:01:00 GMT" "2020-01-31 13:02:00 GMT" "2020-01-31 13:02:00 GMT"