I am trying to convert data.table code that uses melt and dcast into code that does the same but with dplyr pivot_longer() and pivot_wider(). Example data:
input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1,
1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2),
wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03,
0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154,
0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125,
0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA,
-6L))
gives
input_ds_wt
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000
6 6 0.0 0.2 0.03 0.04 Inf 0.2000
The data.table code that I want to mimic with pivot_longer and pivot_wider:
library(data.table)
setDT(input_ds_wt)
#1.reshape to get version
x <- melt(input_ds_wt, id.vars = "id")
x[, c("variable", "version") := tstrsplit(variable, split = "_") ]
#2.reshape to spread variables per version
x <- dcast(x, id + version ~ variable, value.var = "value")
#3.calculate suppress
x[, suppress := fifelse(RSE < 0.3 & wt.mean > 0.9, 0, 1) ]
#4.reshape to add versions to variable names
x <- melt(x, id.vars = c("id", "version") )
x[, variable := paste(variable, version, sep = "_") ]
#5.reshape to get original data structure
x <- dcast(x, id ~ variable, value.var = "value")
which results in:
x
Key: <id>
id RSE_v1 RSE_v2 suppress_v1 suppress_v2 wt.SE_v1 wt.SE_v2 wt.mean_v1 wt.mean_v2
<num> <num> <num> <num> <num> <num> <num> <num> <num>
1: 1 0.100000000 0.0375 0 1 0.10 0.03 1.0 0.8
2: 2 0.010000000 1.5000 0 1 0.01 0.30 1.0 0.2
3: 3 0.153846154 0.0125 0 1 0.20 0.01 1.3 0.8
4: 4 0.008695652 0.5000 0 1 0.02 0.10 2.3 0.2
5: 5 0.300000000 0.5000 1 1 0.30 0.40 1.0 0.8
6: 6 Inf 0.2000 1 1 0.03 0.04 0.0 0.2
I can do Step 1 with pivot_longer:
input_ds_wt = input_ds_wt %>% as_tibble
#1.reshape to get version:
step1 = input_ds_wt %>% pivot_longer(cols = !c(id),
names_to = c("variable","version"),
names_pattern = "(.*)_(.*)")
which produces step1:
step1
# A tibble: 36 x 4
id variable version value
<dbl> <chr> <chr> <dbl>
1 1 wt.mean v1 1
2 1 wt.mean v2 0.8
3 1 wt.SE v1 0.1
4 1 wt.SE v2 0.03
5 1 RSE v1 0.1
6 1 RSE v2 0.0375
7 2 wt.mean v1 1
8 2 wt.mean v2 0.2
9 2 wt.SE v1 0.01
10 2 wt.SE v2 0.3
but how to do I translate the step2 dcast into pivot_wider? The end of step2 should look like this:
step2
Key: <id, version>
id version RSE wt.SE wt.mean
<num> <char> <num> <num> <num>
1: 1 v1 0.100000000 0.10 1.0
2: 1 v2 0.037500000 0.03 0.8
3: 2 v1 0.010000000 0.01 1.0
4: 2 v2 1.500000000 0.30 0.2
5: 3 v1 0.153846154 0.20 1.3
6: 3 v2 0.012500000 0.01 0.8
7: 4 v1 0.008695652 0.02 2.3
8: 4 v2 0.500000000 0.10 0.2
9: 5 v1 0.300000000 0.30 1.0
10: 5 v2 0.500000000 0.40 0.8
11: 6 v1 Inf 0.03 0.0
12: 6 v2 0.200000000 0.04 0.2
You can achieve this with a single pivot_longer()
call when using ".value"
in names_to
:
library(dplyr, warn.conflicts = FALSE)
library(tidyr)
input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1,
1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2),
wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03,
0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154,
0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125,
0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA,
-6L))
input_ds_wt |>
pivot_longer(-id, names_to = c(".value", "version"), names_sep = "_")
#> # A tibble: 12 × 5
#> id version wt.mean wt.SE RSE
#> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 1 v1 1 0.1 0.1
#> 2 1 v2 0.8 0.03 0.0375
#> 3 2 v1 1 0.01 0.01
#> 4 2 v2 0.2 0.3 1.5
#> 5 3 v1 1.3 0.2 0.154
#> 6 3 v2 0.8 0.01 0.0125
#> 7 4 v1 2.3 0.02 0.00870
#> 8 4 v2 0.2 0.1 0.5
#> 9 5 v1 1 0.3 0.3
#> 10 5 v2 0.8 0.4 0.5
#> 11 6 v1 0 0.03 Inf
#> 12 6 v2 0.2 0.04 0.2
With added suppress and pivoted back to wider:
input_ds_wt |>
pivot_longer(-id, names_to = c(".value", "version"), names_sep = "_") |>
mutate(suppress = if_else(RSE < .3 & wt.mean > .9, 0, 1)) |>
pivot_wider(names_from = version, values_from = -c(id, version))
#> # A tibble: 6 × 9
#> id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1 suppress_v2
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 0.8 0.1 0.03 0.1 0.0375 0 1
#> 2 2 1 0.2 0.01 0.3 0.01 1.5 0 1
#> 3 3 1.3 0.8 0.2 0.01 0.154 0.0125 0 1
#> 4 4 2.3 0.2 0.02 0.1 0.00870 0.5 0 1
#> 5 5 1 0.8 0.3 0.4 0.3 0.5 1 1
#> 6 6 0 0.2 0.03 0.04 Inf 0.2 1 1
Created on 2025-02-05 with reprex v2.1.1