Search code examples
rif-statementnested

My nested ifelse statements are not functioning as expected


Only the first ifelse statement is working as expected. Otherwise, most of the Transaction Days incorrectly fall into the '1-7 Days' category. The Status can either be "SOLD" or "AVAILABLE". Transaction_Days is a character variable which is >=0.

pl_df$Event_Time_Category <- ifelse(pl_df$Status=="SOLD",
                                 ifelse(pl_df$Transaction_Days<1, '0-1 Day of Game',
                                  ifelse(pl_df$Transaction_Days>0 & pl_df$Transaction_Days<8 , '1-7 Days',
                                    ifelse(pl_df$Transaction_Days>7 & pl_df$Transaction_Days<32, '7-31 Days',
                                       ifelse(pl_df$Transaction_Days>31 & pl_df$Transaction_Days<91, '31-90 Days', '90+ Days Out')))), '')

Actual Output:

Transaction_Days Event_Time_Category 
   72            1-7 Days   
    3            1-7 Days   
   10            1-7 Days   
   37            1-7 Days   
   61            1-7 Days   
   35            1-7 Days   
  126            1-7 Days   
   92            90+ Days Out   
   53            1-7 Days   
   11            1-7 Days   
   48            1-7 Days   
   19            1-7 Days   
   21            1-7 Days   
   66            1-7 Days   
   20            1-7 Days   
   49            1-7 Days   
   21            1-7 Days   
   43            1-7 Days   
   31            1-7 Days   
    0            0-1 Day of Game   

Expected Output:

Transaction_Days Event_Time_Category 
   72            31-90 Days   
    3            1-7 Days   
   10            7-31 Days   
   37            31-90 Days   
   61            31-90 Days   
   35            31-90 Days   
  126            90+ Days Out   
   92            90+ Days Out   
   53            31-90 Days   
   11            7-31 Days   
   48            31-90 Days   
   19            7-31 Days   
   21            7-31 Days   
   66            31-90 Days   
   20            7-31 Days   
   49            31-90 Days   
   21            7-31 Days   
   43            31-90 Days   
   31            7-31 Days   
    0            0-1 Day of Game   

Below are more details about the dataframe:

structure(list(Promoter = c("ABC", "ABC", "ABC", "ABC", "ABC", 
"ABC"), Event.Date = c("2022-07-27 13:10:00", "2022-07-27 13:10:00", 
"2022-07-27 13:10:00", "2022-07-27 13:10:00", "2022-07-27 13:10:00", 
"2022-07-27 13:10:00"), Description = c("Twins @ Brewers", "Twins @ Brewers", 
"Twins @ Brewers", "Twins @ Brewers", "Twins @ Brewers", "Twins @ Brewers"
), Performer = c("Milwaukee Brewers", "Milwaukee Brewers", "Milwaukee Brewers", 
"Milwaukee Brewers", "Milwaukee Brewers", "Milwaukee Brewers"
), Category = c("MLB", "MLB", "MLB", "MLB", "MLB", "MLB"), Transaction.Date = c("2022-05-16 17:42:00", 
"2022-07-24 21:12:00", "2022-07-17 14:16:00", "2022-06-20 13:24:00", 
"2022-05-27 17:24:00", "2022-06-22 19:25:00"), Zone = c("Field Diamond Box", 
"Field Diamond Box", "Field Diamond Box", "Field Infield Box", 
"Field Infield Box", "Field Infield Box"), Section = c(111L, 
111L, 111L, 110L, 110L, 110L), Row = c("3", "4", "4", "6", "6", 
"6"), Seat = c("9, 10", "5, 6", "7, 8", "10, 11, 12", "5, 6, 7", 
"8, 9"), Quantity = c(2L, 2L, 2L, 3L, 3L, 2L), Status = c("SOLD", 
"SOLD", "SOLD", "SOLD", "SOLD", "SOLD"), Price = c(85.47, 72.05, 
72.86, 45.36, 44.73, 43.75), Cost = c(164, 164, 164, 174, 174, 
116), Revenue = c(170.94, 144.1, 145.72, 136.08, 134.19, 87.5
), Profit = c(6.94, -19.9, -18.28, -37.92, -39.81, -28.5),  Event_Date = structure(c(19200, 19200, 19200, 19200, 
19200, 19200), class = "Date"), Transaction_Date = structure(c(19128, 
19197, 19190, 19163, 19139, 19165), class = "Date"), Alias = c(" Twins @ Brewers", 
" Twins @ Brewers", " Twins @ Brewers", " Twins @ Brewers", " Twins @ Brewers", 
" Twins @ Brewers"), Cost_Per_Ticket = c(82, 82, 82, 58, 58, 
58), Revenue_Per_Ticket = c("85.47", "72.05", "72.86", "45.36", 
"44.73", "43.75"), Pack_Size = c("2", "2", "2", "3", "3", "2"
), Transaction_Days = c("72", "3", "10", "37", "61", "35"), Event_Time_Category = c("1-7 Days", 
"1-7 Days", "1-7 Days", "1-7 Days", "1-7 Days", "1-7 Days"), 
    Event_Time = c("01:10 PM", "01:10 PM", "01:10 PM", "01:10 PM", 
    "01:10 PM", "01:10 PM")), row.names = c(NA, 6L), class = "data.frame")

Solution

  • Let's change the text to numbers first, then we can do comparisons that work numerically instead of alphabetically. (8 < 10 but "8" > "10")

    Then with your ifelse chain, you could remove redundant tests. For instance, if we know the number is not <1, then we don't need to test if it's >0.

    pl_df$Transaction_Days = as.numeric(pl_df$Transaction_Days)
    
    pl_df$Event_Time_Category <- 
      ifelse(pl_df$Status=="SOLD",
             ifelse(pl_df$Transaction_Days<1, '0-1 Day of Game',
                    ifelse(pl_df$Transaction_Days<8 , '1-7 Days',
                           ifelse(pl_df$Transaction_Days<32, '7-31 Days',
                                  ifelse(pl_df$Transaction_Days<91, '31-90 Days', '90+ Days Out')))), '')
    

    This is a pretty good use case for dplyr::case_when, which I think is easier to read and maintain. It works where the output is the one corresponding to the first TRUE test it encounters.

    library(dplyr)
    pl_df %>%
      mutate(Event_Time_Category = case_when(
        Status != "SOLD"      ~ '',  # Blank if not SOLD
        Transaction_Days < 1  ~ '0-1 Day of Game',
        Transaction_Days < 8  ~ '1-7 Days',
        Transaction_Days < 32 ~ '7-31 Days',
        Transaction_Days < 91 ~ '31-90 Days', 
        TRUE ~ '90+ Days Out'))