Search code examples
rdatatable

Use data.table instead of pivot_longer


I would like some help to adjust the output called adjusted that I generate. My idea is to optimize somehow to generate faster. Notice that I'm using pivot_longer, which takes longer. One idea would be to continue using data.table as I did to generate SPV. However, I don't know how to do that in this case for adjusted. Can you help me?

I would like to generate the same output table as in the question.

library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
                "2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
       Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
       Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
       DR1 = c(4,1,4,3,3,4,3,5),
       DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
       DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
       DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
       DR08 = c(0,0,0,1,2,0,0,0),DR09 = c(0,0,0,0,0,0,0,0),DR010 = c(0,0,0,0,0,0,0,0),DR011 = c(4,0,0,0,0,0,0,0), 
       DR012 = c(0,0,0,3,0,0,0,5),DR013 = c(0,0,1,0,0,0,2,0),DR014 = c(0,0,0,0,0,2,0,0)),
  class = "data.frame", row.names = c(NA, -8L))

selection = startsWith(names(df1), "DRM")

df1[selection][is.na(df1[selection])] = 0

dt1 <- as.data.table(df1)

cols <- grep("^DR0", colnames(dt1), value = TRUE)

medi_ana <- 
  dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
  ][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]

f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana,  (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]

dmda<-"2021-07-09"
code<-"CDE"

adjusted<-SPV %>%
filter(date2==dmda,Code == code) %>%
group_by(Code) %>%
summarize(across(starts_with("DR0"), sum),.groups = 'drop') %>%
pivot_longer(cols= -Code, names_pattern = "DR0(.+)", values_to = "val") %>%
mutate(name = readr::parse_number(name))
    
    > adjusted
    # A tibble: 14 x 3
       Code   name   val
       <chr> <dbl> <dbl>
     1 CDE       1     5
     2 CDE       2     5
     3 CDE       3     5
     4 CDE       4     5
     5 CDE       5     5
     6 CDE       6     5
     7 CDE       7     5
     8 CDE       8     5
     9 CDE       9     5
    10 CDE      10     5
    11 CDE      11     5
    12 CDE      12     5
    13 CDE      13     5
    14 CDE      14     5

Solution

  • With data.table, use melt after doing the filtering and grouping

    library(data.table)
    melt(SPV[date2 == dmda & Code == code][, 
       lapply(.SD, sum, na.rm = TRUE), by = Code, 
       .SDcols = patterns("^DR0")],
        id.var = "Code", variable.name = "name", value.name = "val")[, 
          name := readr::parse_number(as.character(name))][]
    

    -output

         Code  name   val
        <char> <num> <num>
     1:    CDE     1     5
     2:    CDE     2     5
     3:    CDE     3     5
     4:    CDE     4     5
     5:    CDE     5     5
     6:    CDE     6     5
     7:    CDE     7     5
     8:    CDE     8     5
     9:    CDE     9     5
    10:    CDE    10     5
    11:    CDE    11     5
    12:    CDE    12     5
    13:    CDE    13     5
    14:    CDE    14     5
    

    Or using the R chain (|>)

    SPV[date2 == dmda & Code == code] |> 
      {\(.) .[, lapply(.SD, sum, na.rm = TRUE), by = Code, 
         .SDcols = patterns("^DR0")]}() |>
       melt(id.var = "Code", variable.name = "name", value.name = "val") |> 
       {\(.) .[, name := readr::parse_number(as.character(name))][]}()