Search code examples
rbupardaqapo

Save activity_log object outputs in a list


I have the code below

library(bupar)
library(daqapo)
hospital<-hospital
hospital %>%
  rename(start = start_ts,
         complete = complete_ts) -> hospital
hospital %>%
  convert_timestamps(c("start","complete"), format = dmy_hms) -> hospital
hospital %>%
  activitylog(case_id = "patient_visit_nr",
              activity_id = "activity",
              resource_id = "originator",
              timestamps = c("start", "complete")) -> hospital
hospital %>%
  detect_time_anomalies()

which gives

*** OUTPUT ***
For 5 rows in the activity log (9.43%), an anomaly is detected.
The anomalies are spread over the activities as follows:
# A tibble: 3 × 3
  activity      type                  n
  <chr>         <chr>             <int>
1 Registration  negative duration     3
2 Clinical exam zero duration         1
3 Trage         negative duration     1
Anomalies are found in the following rows:
# Log of 10 events consisting of:
3 traces 
3 cases 
5 instances of 3 activities 
5 resources 
Events occurred from 2017-11-21 11:22:16 until 2017-11-21 19:00:00 
 
# Variables were mapped as follows:
Case identifier:        patient_visit_nr 
Activity identifier:        activity 
Resource identifier:        originator 
Timestamps:     start, complete 

# A tibble: 5 × 10
  patient_visit_nr activity      originator start               complete            triagecode specialization .order durat…¹ type 
             <dbl> <chr>         <chr>      <dttm>              <dttm>                   <dbl> <chr>           <int>   <dbl> <chr>
1              518 Registration  Clerk 12   2017-11-21 11:45:16 2017-11-21 11:22:16          4 PED                 1  -23    nega…
2              518 Registration  Clerk 6    2017-11-21 11:45:16 2017-11-21 11:22:16          4 PED                 2  -23    nega…
3              518 Registration  Clerk 9    2017-11-21 11:45:16 2017-11-21 11:22:16          4 PED                 3  -23    nega…
4              520 Trage         Nurse 17   2017-11-21 13:43:16 2017-11-21 13:39:00          5 URG                 4   -4.27 nega…
5              528 Clinical exam Doctor 1   2017-11-21 19:00:00 2017-11-21 19:00:00          3 TRAU                5    0    zero…
# … with abbreviated variable name ¹​duration

I would like to save all thse outputs (*** OUTPUT ***,# A tibble: 3 × 3,# Log of 10 events,# Variables were mapped as follows: consisting of:) in a list in order to be able to extract the from there.


Solution

  • Here is one way of modifying the function to return all the desired output. My modifications start with output <- list().

    Oh, not tested.

    detect_time_anomalies <- function(activitylog, anomaly_type = c("both", "negative","zero") ,
                                      details = TRUE, filter_condition = NULL){
      
      # Predefine variables
      type <- NULL
      duration <- NULL
      activity <- NULL
      complete <- NULL
      start <- NULL
      anomaly_type <- match.arg(anomaly_type)
      # Generate warning if inappropriate anomaly type is selected
      
      
      # Apply filter condition when specified
      filter_specified <- FALSE
      tryCatch({
        is.null(filter_condition)
      }, error = function(e) {
        filter_specified <<- TRUE
      }
      )
      
      if(!filter_specified) {
        # geen filter gespecifieerd.
        
      } else {
        filter_condition_q <- enquo(filter_condition)
        activitylog <- APPLY_FILTER(activitylog, filter_condition_q = filter_condition_q)
      }
      
      # Calculate durations
      activitylog %>%
        mutate(duration = as.double(complete - start, units = "mins")) -> anomalies
      
      # Determine time anomalies
      if(anomaly_type == "negative"){
        anomalies <- anomalies %>% filter(duration < 0)
      } else if(anomaly_type == "zero"){
        anomalies <- anomalies %>% filter(duration == 0)
      } else{
        anomalies <- anomalies %>% filter(duration <= 0) %>%
          mutate(type = ifelse(duration < 0, "negative duration", "zero duration"))
      }
      
      output <- list()
      
      # Print output
      message("Selected anomaly type: ", anomaly_type, "\n")
      output$anomaly_type <- anomaly_type
      
      message("*** OUTPUT ***")
      output$percent_anomalies <- round(nrow(anomalies) / nrow(activitylog) * 100, 2)
      output$num_anomalies <- nrow(anomalies)
      message("For ", nrow(anomalies), " rows in the activity log (", output$percent_anomalies, "%), an anomaly is detected.")
      
      
      if(nrow(anomalies) > 0){
        message("The anomalies are spread over the activities as follows:")
        if(anomaly_type == "both"){
          output$both <- anomalies %>% group_by(!!activity_id_(activitylog), type) %>% summarize(n = n()) %>% arrange(desc(n))
          print(output$both)
        } else{
          output$other <- anomalies %>% group_by(!!activity_id_(activitylog)) %>% summarize(n = n()) %>% arrange(desc(n))
          print(output$other)
        }
        
        if(details == TRUE){
          message("Anomalies are found in the following rows:")
          output$details <- anomalies
          return(output$details)
        }
      }