Search code examples
rdplyrtidyverse

Compute the actual_tat by excluding the non-business hours in R


I have a dataframe like this

library(dplyr)
library(lubridate)

ticket_id <- c("id_1234","id_2234","id_1334","id_1244","id_1294",
               "id_1264","id_1634","id_1734","id_1834","id_1934")
created <- c("2024-09-22 22:00:00","2024-09-21 23:00:00","2024-09-19 11:30:00","2024-09-23 6:00:00","2024-09-20 18:30:00",
             "2024-09-15 10:00:00","2024-09-14 23:00:00","2024-09-11 5:30:00","2024-09-16 7:30:00","2024-09-13 19:27:00")
tat <- c(14.4, 38, 106, 6.4, 64, 29, 37, 129.5, 2.8, 66)
df.sample <- data.frame(ticket_id,created,tat,stringsAsFactors=FALSE)
df.sample$created <- ymd_hms(df.sample$created)
df.sample$tat = as.numeric(df.sample$tat)

I want to create a new column called actual_tat that is computed by excluding the times in the tat for any tickets created between previous week Friday 6 PM to current week Monday 8 AM

For any tickets created outside of the previous week Friday 6 PM to current week Monday 8 AM, actual_tat is same as tat

Eg: If the ticket was created on Sunday at 7 AM and the tat shows 14 hours, the final actual_tat column should show 1 hour (14-13), which is the number of hours after current week Monday 8 AM

I tried doing something like this but not getting it right

df.sample %>% 
  mutate(
    created_dt = ymd_hms(created),  # Convert Created column to datetime format
    day_of_week = wday(created_dt, label = TRUE),  # Get day of week
    hour_of_day = hour(created_dt),  # Get hour of day
    exclude_hours = ifelse(
      (day_of_week == "Fri" & hour_of_day >= 18) | 
      (day_of_week == "Sat") | 
      (day_of_week == "Sun") | 
      (day_of_week == "Mon" & hour_of_day < 8), 
      TRUE, FALSE
    ),
    current_monday_8am = (floor_date(created_dt, "week") + days(1)) + hours(8),  # Current week Monday 8 AM
    time_diff = as.numeric(difftime(current_monday_8am, created_dt, units = "hours")),  # Calculate time difference
    actual_tat = ifelse(
      exclude_hours, 
      pmax(tat - time_diff, 0), 
      tat
    )
  )

My desired output is

   ticket_id             created   tat actual_tat
1    id_1234 2024-09-22 22:00:00  14.4        4.4
2    id_2234 2024-09-21 23:00:00  38.0        5.0
3    id_1334 2024-09-19 11:30:00 106.0      106.0
4    id_1244 2024-09-23 06:00:00   6.4        4.4
5    id_1294 2024-09-20 18:30:00  64.0        2.5
6    id_1264 2024-09-15 10:00:00  29.0        7.0
7    id_1634 2024-09-14 23:00:00  37.0        4.0
8    id_1734 2024-09-11 05:30:00 129.5      129.5
9    id_1834 2024-09-16 07:30:00   2.8        2.3
10   id_1934 2024-09-13 19:27:00  66.0        5.5

Can someone point me in the right direction?


Solution

  • You need to use ceiling_date instead of floor_date, and also subtract 8 hours first, then add the 8 hours back. This handles those cases where the created_dt was between midnight on Sunday and 8am on Monday. Finally, tell ceiling_date to start the week on Monday, not Sunday (the default).

    library(lubridate)
    library(dplyr)
    
    df.sample %>% 
      mutate(
        created_dt = ymd_hms(created),  # Convert Created column to datetime format
        day_of_week = wday(created_dt, label = TRUE),  # Get day of week
        hour_of_day = hour(created_dt),  # Get hour of day
        exclude_hours = ifelse(
          (day_of_week == "Fri" & hour_of_day >= 18) | 
            (day_of_week == "Sat") | 
            (day_of_week == "Sun") | 
            (day_of_week == "Mon" & hour_of_day < 8), 
          TRUE, FALSE
        ),
        current_monday_8am = ceiling_date(created_dt - hours(8), "week", week_start=1) + hours(8),  # Current week Monday 8 AM
        time_diff = as.numeric(difftime(current_monday_8am, created_dt, units = "hours")),  # Calculate time difference
        actual_tat = ifelse(
          exclude_hours, 
          tat - time_diff, 
          tat
        )
      )  |>
      select(ticket_id, tat, created_dt, actual_tat)
    

       ticket_id   tat          created_dt actual_tat
    1    id_1234  14.4 2024-09-22 22:00:00       4.40
    2    id_2234  38.0 2024-09-21 23:00:00       5.00
    3    id_1334 106.0 2024-09-19 11:30:00     106.00
    4    id_1244   6.4 2024-09-23 06:00:00       4.40
    5    id_1294  64.0 2024-09-20 18:30:00       2.50
    6    id_1264  29.0 2024-09-15 10:00:00       7.00
    7    id_1634  37.0 2024-09-14 23:00:00       4.00
    8    id_1734 129.5 2024-09-11 05:30:00     129.50
    9    id_1834   2.8 2024-09-16 07:30:00       2.30
    10   id_1934  66.0 2024-09-13 19:27:00       5.45