Search code examples
rdplyrtidyrreshape

Mutate() error is caused when trying to subtract values from 2 different columns


I have the code below in which I basically subtract the values on column qty based on if they have the same value in column activity and with the logic in-out from the in_out column. The result will be saved in the qty_scrap column.

The result dataframe will then have 4 columns. The activity column with the unique name of the activity, the in column with the in value of the qty, the out column with the out value of the qty and the qty_scrap column with their difference.

but I get:

error in `mutate()`:
! Problem while computing `qty_scrap = `in` - out`.
Caused by error in `` `in` - out ``:
p<-structure(list(case_id = c(3396922, 3396922, 3396922, 3396922, 
                              3396922, 3396922, 3396922, 3396922, 3397364, 3397364, 3397364, 
                              3397364, 3397364, 3397364, 3397364, 3397364, 3397364, 3397364, 
                              3397364, 3397364), action = c("3396922-RAAMELK", "3396922-RAAMELK", 
                                                            "3396922-Separering", "3396922-Separering", "3396922-P11-R", 
                                                            "3396922-P11-R", "3396922-T51-R", "3396922-T51-R", "3397364-RAAMELK", 
                                                            "3397364-RAAMELK", "3397364-Separering", "3397364-Separering", 
                                                            "3397364-P10-R", "3397364-P10-R", "3397364-T42-R", "3397364-T42-R", 
                                                            "3397364-T61-R", "3397364-T61-R", "3397364-T41-R", "3397364-T41-R"
                              ), resource = c("RAAMELK", "RAAMELK", "Separering", "Separering", 
                                              "P11-R", "P11-R", "T51-R", "T51-R", "RAAMELK", "RAAMELK", "Separering", 
                                              "Separering", "P10-R", "P10-R", "T42-R", "T42-R", "T61-R", "T61-R", 
                                              "T41-R", "T41-R"), lifecycle = c(1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 
                                                                               2, 2, 3, 3, 4, 4, 5, 5, 6, 6), registration_type = structure(c(1L, 
                                                                                                                                              2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                                                                                                                                              2L, 1L, 2L), levels = c("start", "complete"), class = "factor"), 
                  timestamp = structure(c(1667266620, 1667269440, 1667266620, 
                                          1667269500, 1667266620, 1667269500, 1667291460, 1667304000, 
                                          1667523600, 1667531220, 1667531220, 1667531220, 1667523600, 
                                          1667531220, 1667548320, 1667556720, 1667528880, 1667535420, 
                                          1667624400, 1667635860), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
                  activity = c("RAAMELK", "RAAMELK", "Separering", "Separering", 
                               "Sweetmilk Pasteurizer 8332", "Sweetmilk Pasteurizer 8332", 
                               "9005 - T51 kartong 70x70", "9005 - T51 kartong 70x70", "RAAMELK", 
                               "RAAMELK", "Separering", "Separering", "Sweetmilk Pasteurizer 8331", 
                               "Sweetmilk Pasteurizer 8331", "9004 - T42 kartong 70x70", 
                               "9004 - T42 kartong 70x70", "9006 - T61 BIB", "9006 - T61 BIB", 
                               "9004 - T41 kartong 70x70", "9004 - T41 kartong 70x70"), 
                  activity_description = c("Forbrukt r\xe5melk", "Forbrukt r\xe5melk", 
                                           "Utbytte r\xe5fl\xf8te", "Utbytte r\xe5fl\xf8te", "Pasteurisert melk til tank", 
                                           "Pasteurisert melk til tank", "Produsert melk", "Produsert melk", 
                                           "Forbrukt r\xe5melk", "Forbrukt r\xe5melk", "Utbytte r\xe5fl\xf8te", 
                                           "Utbytte r\xe5fl\xf8te", "Pasteurisert melk til tank", "Pasteurisert melk til tank", 
                                           "Produsert melk", "Produsert melk", "Produsert melk", "Produsert melk", 
                                           "Produsert melk", "Produsert melk"), ...9 = c(NA, NA, NA, 
                                                                                         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                                                                         NA, NA), product = c("K101149", "K101149", "K101149", "K101149", 
                                                                                                              "K101149", "K101149", "K101149", "K101149", "K101152", "K101152", 
                                                                                                              "K101152", "K101152", "K101152", "K101152", "K101152", "K101152", 
                                                                                                              "K101152", "K101152", "K101152", "K101152"), product_type_text = c("200100 - Milk", 
                                                                                                                                                                                 "200100 - Milk", "200100 - Milk", "200100 - Milk", "200100 - Milk", 
                                                                                                                                                                                 "200100 - Milk", "200100 - Milk", "200100 - Milk", "200100 - Milk", 
                                                                                                                                                                                 "200100 - Milk", "200100 - Milk", "200100 - Milk", "200100 - Milk", 
                                                                                                                                                                                 "200100 - Milk", "200100 - Milk", "200100 - Milk", "200100 - Milk", 
                                                                                                                                                                                 "200100 - Milk", "200100 - Milk", "200100 - Milk"), qty = c(NA, 
                                                                                                                                                                                                                                             19976.92, 19976.92, 2386, 17590.92, 17481, 17480, 17694, 
                                                                                                                                                                                                                                             NA, 31, 31, 9, 31, 31, 6, 6, 3, 3, 28, 28), in_out = c("in", 
                                                                                                                                                                                                                                                                                                    "out", "in", "out", "in", "out", "in", "out", "in", "out", 
                                                                                                                                                                                                                                                                                                    "in", "out", "in", "out", "in", "out", "in", "out", "in", 
                                                                                                                                                                                                                                                                                                    "out"), qty_scrap = c(NA, NA, NA, NA, NA, 109.92, NA, -214, 
                                                                                                                                                                                                                                                                                                                          NA, NA, NA, NA, NA, -270.64, NA, 524, NA, 260, NA, NA), `FP ordre` = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                 NA, NA, NA, NA, NA, 3513481, 3513481, NA, NA, NA, NA, NA, 
                                                                                                                                                                                                                                                                                                                                                                                                 NA, 3395035, 3395035, 4531159, 4531159, 4385337, 4385337), 
                  Artikkeltype = c("SF", "SF", "SF", "SF", "SF", "SF", "FP", 
                                   "FP", "SF", "SF", "SF", "SF", "SF", "SF", "FP", "FP", "FP", 
                                   "FP", "FP", "FP"), .order = 1:20), row.names = c(NA, -20L
                                   ), class = c("eventlog", "log", "tbl_df", "tbl", "data.frame"
                                   ), case_id = "case_id", activity_id = "activity", activity_instance_id = "action", lifecycle_id = "registration_type", resource_id = "resource", timestamp = "timestamp")

library(tidyr)
library(dplyr)
vol<-p[,c("activity","qty","in_out","qty_scrap")]
    vol<-as.data.frame(vol)
    
    
    
    vol[is.na(vol)] <- 0
    vol<-vol %>% 
      pivot_wider(id_cols = "activity", names_from = "in_out", values_from = "qty") %>% 
      mutate(qty_scrap = `in`-out)

Solution

  • There are duplicates before reshaping into wide format, you may want to add case_id.

    vol <- p[c('case_id', "activity","qty","in_out","qty_scrap")] |> as.data.frame()
    

    Then try reshape.

    reshape(vol, idvar=c('case_id','activity'), timevar='in_out', direction='wide') |>
      transform(qty_scrap=qty.in - qty.out)
    #    case_id                   activity   qty.in qty_scrap.in  qty.out qty_scrap.out qty_scrap
    # 1  3396922                    RAAMELK     0.00            0 19976.92          0.00 -19976.92
    # 3  3396922                 Separering 19976.92            0  2386.00          0.00  17590.92
    # 5  3396922 Sweetmilk Pasteurizer 8332 17590.92            0 17481.00        109.92    109.92
    # 7  3396922   9005 - T51 kartong 70x70 17480.00            0 17694.00       -214.00   -214.00
    # 9  3397364                    RAAMELK     0.00            0    31.00          0.00    -31.00
    # 11 3397364                 Separering    31.00            0     9.00          0.00     22.00
    # 13 3397364 Sweetmilk Pasteurizer 8331    31.00            0    31.00       -270.64      0.00
    # 15 3397364   9004 - T42 kartong 70x70     6.00            0     6.00        524.00      0.00
    # 17 3397364             9006 - T61 BIB     3.00            0     3.00        260.00      0.00
    # 19 3397364   9004 - T41 kartong 70x70    28.00            0    28.00          0.00      0.00
    

    Instead of transform you can still use dplyr::mutate.