Search code examples
rdplyrdata.tablepivot

How to translate R data.table melt and dcast into dplr pivot_longer and pivot_wider?


I am trying to convert data.table code that uses melt and dcast into code that does the same but with dplyr pivot_longer() and pivot_wider(). Example data:

input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1, 
1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2), 
    wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03, 
    0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154, 
    0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125, 
    0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA, 
-6L))
gives

 input_ds_wt
  id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2      RSE_v1 RSE_v2
1  1        1.0        0.8     0.10     0.03 0.100000000 0.0375
2  2        1.0        0.2     0.01     0.30 0.010000000 1.5000
3  3        1.3        0.8     0.20     0.01 0.153846154 0.0125
4  4        2.3        0.2     0.02     0.10 0.008695652 0.5000
5  5        1.0        0.8     0.30     0.40 0.300000000 0.5000
6  6        0.0        0.2     0.03     0.04         Inf 0.2000

The data.table code that I want to mimic with pivot_longer and pivot_wider:

library(data.table)

setDT(input_ds_wt)

#1.reshape to get version
x <- melt(input_ds_wt, id.vars = "id")
x[, c("variable", "version") := tstrsplit(variable, split = "_") ]

#2.reshape to spread variables per version
x <- dcast(x, id  + version ~ variable, value.var = "value")

#3.calculate suppress
x[, suppress := fifelse(RSE < 0.3 & wt.mean > 0.9, 0, 1) ]

#4.reshape to add versions to variable names
x <- melt(x, id.vars = c("id", "version") )
x[, variable := paste(variable, version, sep = "_") ]

#5.reshape to get original data structure
x <- dcast(x, id ~ variable, value.var = "value")

which results in:

x
Key: <id>
      id      RSE_v1 RSE_v2 suppress_v1 suppress_v2 wt.SE_v1 wt.SE_v2 wt.mean_v1 wt.mean_v2
   <num>       <num>  <num>       <num>       <num>    <num>    <num>      <num>      <num>
1:     1 0.100000000 0.0375           0           1     0.10     0.03        1.0        0.8
2:     2 0.010000000 1.5000           0           1     0.01     0.30        1.0        0.2
3:     3 0.153846154 0.0125           0           1     0.20     0.01        1.3        0.8
4:     4 0.008695652 0.5000           0           1     0.02     0.10        2.3        0.2
5:     5 0.300000000 0.5000           1           1     0.30     0.40        1.0        0.8
6:     6         Inf 0.2000           1           1     0.03     0.04        0.0        0.2

I can do Step 1 with pivot_longer:

input_ds_wt = input_ds_wt %>% as_tibble
#1.reshape to get version:
step1 = input_ds_wt %>% pivot_longer(cols = !c(id),
                             names_to = c("variable","version"),
                             names_pattern = "(.*)_(.*)")

which produces step1:

step1
# A tibble: 36 x 4
      id variable version  value
   <dbl> <chr>    <chr>    <dbl>
 1     1 wt.mean  v1      1     
 2     1 wt.mean  v2      0.8   
 3     1 wt.SE    v1      0.1   
 4     1 wt.SE    v2      0.03  
 5     1 RSE      v1      0.1   
 6     1 RSE      v2      0.0375
 7     2 wt.mean  v1      1     
 8     2 wt.mean  v2      0.2   
 9     2 wt.SE    v1      0.01  
10     2 wt.SE    v2      0.3  

but how to do I translate the step2 dcast into pivot_wider? The end of step2 should look like this:

step2
Key: <id, version>
       id version         RSE wt.SE wt.mean
    <num>  <char>       <num> <num>   <num>
 1:     1      v1 0.100000000  0.10     1.0
 2:     1      v2 0.037500000  0.03     0.8
 3:     2      v1 0.010000000  0.01     1.0
 4:     2      v2 1.500000000  0.30     0.2
 5:     3      v1 0.153846154  0.20     1.3
 6:     3      v2 0.012500000  0.01     0.8
 7:     4      v1 0.008695652  0.02     2.3
 8:     4      v2 0.500000000  0.10     0.2
 9:     5      v1 0.300000000  0.30     1.0
10:     5      v2 0.500000000  0.40     0.8
11:     6      v1         Inf  0.03     0.0
12:     6      v2 0.200000000  0.04     0.2

Solution

  • You can achieve this with a single pivot_longer() call when using ".value" in names_to :

    library(dplyr, warn.conflicts = FALSE)
    library(tidyr)
    input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1, 
    1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2), 
        wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03, 
        0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154, 
        0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125, 
        0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA, 
    -6L))
    
    input_ds_wt |> 
      pivot_longer(-id, names_to = c(".value", "version"), names_sep = "_") 
    #> # A tibble: 12 × 5
    #>       id version wt.mean wt.SE       RSE
    #>    <dbl> <chr>     <dbl> <dbl>     <dbl>
    #>  1     1 v1          1    0.1    0.1    
    #>  2     1 v2          0.8  0.03   0.0375 
    #>  3     2 v1          1    0.01   0.01   
    #>  4     2 v2          0.2  0.3    1.5    
    #>  5     3 v1          1.3  0.2    0.154  
    #>  6     3 v2          0.8  0.01   0.0125 
    #>  7     4 v1          2.3  0.02   0.00870
    #>  8     4 v2          0.2  0.1    0.5    
    #>  9     5 v1          1    0.3    0.3    
    #> 10     5 v2          0.8  0.4    0.5    
    #> 11     6 v1          0    0.03 Inf      
    #> 12     6 v2          0.2  0.04   0.2
    

    With added suppress and pivoted back to wider:

    input_ds_wt |> 
      pivot_longer(-id, names_to = c(".value", "version"), names_sep = "_") |> 
      mutate(suppress = if_else(RSE < .3 & wt.mean > .9, 0, 1)) |> 
      pivot_wider(names_from = version, values_from = -c(id, version))
    #> # A tibble: 6 × 9
    #>      id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2    RSE_v1 RSE_v2 suppress_v1 suppress_v2
    #>   <dbl>      <dbl>      <dbl>    <dbl>    <dbl>     <dbl>  <dbl>       <dbl>       <dbl>
    #> 1     1        1          0.8     0.1      0.03   0.1     0.0375           0           1
    #> 2     2        1          0.2     0.01     0.3    0.01    1.5              0           1
    #> 3     3        1.3        0.8     0.2      0.01   0.154   0.0125           0           1
    #> 4     4        2.3        0.2     0.02     0.1    0.00870 0.5              0           1
    #> 5     5        1          0.8     0.3      0.4    0.3     0.5              1           1
    #> 6     6        0          0.2     0.03     0.04 Inf       0.2              1           1
    

    Created on 2025-02-05 with reprex v2.1.1