I'm trying to group together transactions that are between a period of one hour... so I'm not getting anything. follow the example below
structure(list(`data hora` = c("01/10/2023 01:00", "01/10/2023 01:32",
"01/10/2023 01:45", "04/10/2023 03:10", "04/10/2023 03:12", "06/10/2023 01:00",
"01/10/2023 01:00", "01/10/2023 01:32", "02/10/2023 01:45"),
user = c("12345ag", "12345ag", "12345ag", "12345ag", "12345ag",
"12345ag", "4569lç", "4569lç", "4569lç"),
id_transacion = c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
I would like the outcome to look like this
mydf <- structure(list(`data hora` = c("01/10/2023 01:00", "01/10/2023 01:32",
"01/10/2023 01:45", "04/10/2023 03:10", "04/10/2023 03:12", "06/10/2023 01:00",
"01/10/2023 01:00", "01/10/2023 01:32", "02/10/2023 01:45"),
user = c("12345ag", "12345ag", "12345ag", "12345ag", "12345ag",
"12345ag", "4569lç", "4569lç", "4569lç"),
id_transacion = c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
mydf <- as.data.frame(mydf)
library(lubridate)
library(dplyr)
library(tidyr)
mydf |>
mutate(data.hora = dmy_hm(data.hora)) |>
group_by(user) |>
mutate(check = if_else(row_number() == 1, 1, NA_integer_),
check_time = if_else(abs(data.hora - lag(data.hora)) < 60, lag(data.hora), NA_Date_),
check = if_else(is.na(check_time), row_number(), lag(check))) |>
fill(check, .direction = "down") |>
group_by(user, check) |>
mutate(max_data_hora = last(data.hora),
max_id_transacion = last(id_transacion)) |>
filter(row_number() == 1) |>
ungroup() |>
select(min_data_hora = data.hora, max_data_hora, user, min_id_transacion = id_transacion, max_id_transacion)
# A tibble: 5 × 5
min_data_hora max_data_hora user min_id_transacion max_id_transacion
<dttm> <dttm> <chr> <dbl> <dbl>
1 2023-10-01 01:00:00 2023-10-01 01:45:00 12345ag 1 3
2 2023-10-04 03:10:00 2023-10-04 03:12:00 12345ag 4 5
3 2023-10-06 01:00:00 2023-10-06 01:00:00 12345ag 6 6
4 2023-10-01 01:00:00 2023-10-01 01:32:00 4569lç 7 8
5 2023-10-02 01:45:00 2023-10-02 01:45:00 4569lç 9 9
There's likely a much more efficient method than what I strung up together, but it works with the data at hand.