Search code examples
rdplyrlubridate

Classifying overlapping time intervals based on factor


I am looking at production data from a manufacturing facility and need to classify the day's production for all production lines based on the size of bottle produced on one line. The basic setup is that line FL 5S only produces 5l jugs, FL 25 produces 25l jugs, and FL 5 can produce 1l, 3l, and 5l jugs. I need a new column called "Recipe" which should reflect the size of jug produced on FL 5 for all orders on any line produced concurrently with orders produced on FL 5. For example, if Size == "001" over the time interval 2020-04-28 08:46:56 UTC--2020-04-29 07:49:09 UTC then Recipe should be 001 for all orders produced on the other lines whose Started and Ended times overlap with these times for the 1l order.

Data:

library(tidyverse)
library(lubridate)
Prod_data_ex<-structure(list(Order = c(27380, 27388, 27395, 27381, 27389, 27382, 
                                       27396, 27397, 27393, 27392, 27383, 27384, 27385, 27386, 27398, 
                                       27409, 27410, 27411, 27412, 27416, 27420, 27421, 27417, 27418, 
                                       27432, 27433, 27419, 27413, 27399, 27414, 27415, 27428, 27424, 
                                       27429, 27431), 
                             Prod_line = c("FL 5S", "FL 5", "FL 25", "FL 5S", 
                                           "FL 5", "FL 5S", "FL 25", "FL 5", "FL 5", "FL 5", "FL 5S", "FL 5S", 
                                           "FL 5S", "FL 5S", "FL 5", "FL 5S", "FL 5S", "FL 5S", "FL 5S", 
                                           "FL 25", "FL 5S", "FL 5S", "FL 25", "FL 25", "FL 5S", "FL 5S", 
                                          "FL 25", "FL 5S", "FL 5", "FL 5S", "FL 5S", "FL 5S", "FL 25", 
                                          "FL 5S", "FL 5"), 
                             Produced = c(5400, 6373, 1440, 6372, 864, 5400, 
                                          288, 1080, 864, 5402, 3240, 864, 5293, 2700, 11547, 4427, 3672, 
                                          864, 2651, 96, 648, 1620, 96, 480, 2160, 1363, 480, 4320, 11528, 
                                          4320, 864, 12745, 600, 3782, 6373), 
                             Size = c("005", "005", "025","005", "005", "005", "025", "005", "005", "005", "005", "005", 
                                      "005", "005", "001", "005", "005", "005", "005", "025", "005", 
                                      "005", "025", "025", "005", "005", "025", "005", "001", "005", 
                                      "005", "005", "025", "005", "005"), 
                             Started = structure(c(1587969512, 1587970869, 1587972010, 1587984935, 1587998820, 1587999421, 1588003819, 
                                                  1588004233, 1588008484, 1588013628, 1588014019, 1588022405, 1588024038, 
                                                  1588062901, 1588063616, 1588070291, 1588076547, 1588083066, 1588085684, 
                                                  1588090216, 1588093080, 1588094491, 1588094881, 1588100218, 1588101215, 
                                                  1588105405, 1588105729, 1588110352, 1588146549, 1588147618, 1588155560, 
                                                  1588157595, 1588172423, 1588179841, 1588180603), tzone = "UTC", class = c("POSIXct", 
                                                  "POSIXt")), Ended = structure(c(1587984935, 1587998820, 1588003819, 
                                                  1587999421, 1588004233, 1588014019, 1588090216, 1588008484, 1588013628, 
                                                  1588063616, 1588022405, 1588024038, 1588062901, 1588070291, 1588146549, 
                                                  1588076547, 1588083066, 1588085684, 1588093080, 1588094881, 1588094491, 
                                                  1588101215, 1588100218, 1588105729, 1588105405, 1588110352, 1588172423, 
                                                  1588147618, 1588180603, 1588155560, 1588157595, 1588179841, 1588188834, 
                                                  1588191295, 1588241708), tzone = "UTC", class = c("POSIXct", 
                                                  "POSIXt"))), row.names = c(NA, -35L), class = c("tbl_df", "tbl", "data.frame"))

Here's the closest I have gotten to working code, but still doesn't work for orders on other lines.

Prod_data_recpies<- Prod_data_ex%>%
  mutate(interval= interval(Started, Ended))%>%
  mutate(recipe= ifelse(Size=="001", map_int(interval,~ any(int_overlaps(.x, interval[Size=="001"]))),
                        ifelse(Size=="003", map_int(interval,~ any(int_overlaps(.x, interval[Size=="003"]))), "005")))

Thank you!


Solution

  • I am not sure how you want to deal with overlaps with several FL 5 orders and non-existing overlaps, but you can modify the approach below to fit your needs. I used foverlaps() from the data.table package to identify overlaps. You can use the type parameter if you want to specify interval overlaps (e.g. fully within, start or end overlap).

    library(data.table)
    dt <- as.data.table(Prod_data_ex)
    dt[, Ended := Ended - 1] # prevent overlaps of consecutive time intervals
    setkeyv(dt, c("Started", "Ended"))
    dt[, Recipe := foverlaps(dt, dt[Prod_line == "FL 5"], which = TRUE)[,
      list(list(unique(yid[yid != xid]))), by = xid][, -1]]
    dt$Recipe <- sapply(dt$Recipe, function(x) {
      paste(unique(dt[Prod_line == "FL 5"][x]$Size), collapse = ",")
    })
    dt[, Ended := Ended + 1][] # add back the second
    

    Result:

    #>     Order Prod_line Produced Size             Started               Ended
    #>  1: 27380     FL 5S     5400  005 2020-04-27 06:38:32 2020-04-27 10:55:35
    #>  2: 27388      FL 5     6373  005 2020-04-27 07:01:09 2020-04-27 14:47:00
    #>  3: 27395     FL 25     1440  025 2020-04-27 07:20:10 2020-04-27 16:10:19
    #>  4: 27381     FL 5S     6372  005 2020-04-27 10:55:35 2020-04-27 14:57:01
    #>  5: 27389      FL 5      864  005 2020-04-27 14:47:00 2020-04-27 16:17:13
    #>  6: 27382     FL 5S     5400  005 2020-04-27 14:57:01 2020-04-27 19:00:19
    #>  7: 27396     FL 25      288  025 2020-04-27 16:10:19 2020-04-28 16:10:16
    #>  8: 27397      FL 5     1080  005 2020-04-27 16:17:13 2020-04-27 17:28:04
    #>  9: 27393      FL 5      864  005 2020-04-27 17:28:04 2020-04-27 18:53:48
    #> 10: 27392      FL 5     5402  005 2020-04-27 18:53:48 2020-04-28 08:46:56
    #> 11: 27383     FL 5S     3240  005 2020-04-27 19:00:19 2020-04-27 21:20:05
    #> 12: 27384     FL 5S      864  005 2020-04-27 21:20:05 2020-04-27 21:47:18
    #> 13: 27385     FL 5S     5293  005 2020-04-27 21:47:18 2020-04-28 08:35:01
    #> 14: 27386     FL 5S     2700  005 2020-04-28 08:35:01 2020-04-28 10:38:11
    #> 15: 27398      FL 5    11547  001 2020-04-28 08:46:56 2020-04-29 07:49:09
    #> 16: 27409     FL 5S     4427  005 2020-04-28 10:38:11 2020-04-28 12:22:27
    #> 17: 27410     FL 5S     3672  005 2020-04-28 12:22:27 2020-04-28 14:11:06
    #> 18: 27411     FL 5S      864  005 2020-04-28 14:11:06 2020-04-28 14:54:44
    #> 19: 27412     FL 5S     2651  005 2020-04-28 14:54:44 2020-04-28 16:58:00
    #> 20: 27416     FL 25       96  025 2020-04-28 16:10:16 2020-04-28 17:28:01
    #> 21: 27420     FL 5S      648  005 2020-04-28 16:58:00 2020-04-28 17:21:31
    #> 22: 27421     FL 5S     1620  005 2020-04-28 17:21:31 2020-04-28 19:13:35
    #> 23: 27417     FL 25       96  025 2020-04-28 17:28:01 2020-04-28 18:56:58
    #> 24: 27418     FL 25      480  025 2020-04-28 18:56:58 2020-04-28 20:28:49
    #> 25: 27432     FL 5S     2160  005 2020-04-28 19:13:35 2020-04-28 20:23:25
    #> 26: 27433     FL 5S     1363  005 2020-04-28 20:23:25 2020-04-28 21:45:52
    #> 27: 27419     FL 25      480  025 2020-04-28 20:28:49 2020-04-29 15:00:23
    #> 28: 27413     FL 5S     4320  005 2020-04-28 21:45:52 2020-04-29 08:06:58
    #> 29: 27399      FL 5    11528  001 2020-04-29 07:49:09 2020-04-29 17:16:43
    #> 30: 27414     FL 5S     4320  005 2020-04-29 08:06:58 2020-04-29 10:19:20
    #> 31: 27415     FL 5S      864  005 2020-04-29 10:19:20 2020-04-29 10:53:15
    #> 32: 27428     FL 5S    12745  005 2020-04-29 10:53:15 2020-04-29 17:04:01
    #> 33: 27424     FL 25      600  025 2020-04-29 15:00:23 2020-04-29 19:33:54
    #> 34: 27429     FL 5S     3782  005 2020-04-29 17:04:01 2020-04-29 20:14:55
    #> 35: 27431      FL 5     6373  005 2020-04-29 17:16:43 2020-04-30 10:15:08
    #>     Order Prod_line Produced Size             Started               Ended
    #>      Recipe
    #>  1:        
    #>  2:     005
    #>  3:     005
    #>  4:     005
    #>  5:     005
    #>  6:     005
    #>  7: 005,001
    #>  8:     005
    #>  9:     005
    #> 10:     005
    #> 11:     005
    #> 12:     005
    #> 13:     005
    #> 14: 005,001
    #> 15:     001
    #> 16:     001
    #> 17:     001
    #> 18:     001
    #> 19:     001
    #> 20:     001
    #> 21:     001
    #> 22:     001
    #> 23:     001
    #> 24:     001
    #> 25:     001
    #> 26:     001
    #> 27:     001
    #> 28:     001
    #> 29:     001
    #> 30:     001
    #> 31:     001
    #> 32:     001
    #> 33: 001,005
    #> 34: 001,005
    #> 35:     005
    #>      Recipe
    

    Created on 2020-12-18 by the reprex package (v0.3.0)