I've many list of dataframes like split_plot_data2
, and I want to make a separate single dataframe in which columns 3 and 2 will be there. Column 3 is common for all and col 2 will be separately added with the name of col 1.
head(split_plot_data2)
$EE87786ln1
# A tibble: 1,584 × 4
EE_variant lambda variable value
<chr> <dbl> <chr> <dbl>
1 EE87786ln1 0.0000228 blood_vessel_h16 0
2 EE87786ln1 0.0000228 adrenal_gland_h16 0
3 EE87786ln1 0.0000228 bone_element_h16 0
4 EE87786ln1 0.0000228 Bronchus_h16 0
5 EE87786ln1 0.0000228 esophagus_h16 0
6 EE87786ln1 0.0000228 extraembryonic_structure_h16 0
7 EE87786ln1 0.0000228 eye_h16 0
8 EE87786ln1 0.0000228 gonad_h16 0
9 EE87786ln1 0.0000228 heart_h16 0
10 EE87786ln1 0.0000228 kidney_h16 0
# ℹ 1,574 more rows
# ℹ Use `print(n = ...)` to see more rows
$EE87787ln1
# A tibble: 1,560 × 4
EE_variant lambda variable value
<chr> <dbl> <chr> <dbl>
1 EE87787ln1 0.0000751 blood_vessel_h16 0
2 EE87787ln1 0.0000751 adrenal_gland_h16 0
3 EE87787ln1 0.0000751 bone_element_h16 0
4 EE87787ln1 0.0000751 Bronchus_h16 0
5 EE87787ln1 0.0000751 esophagus_h16 0
6 EE87787ln1 0.0000751 extraembryonic_structure_h16 0
7 EE87787ln1 0.0000751 eye_h16 0
8 EE87787ln1 0.0000751 gonad_h16 0
9 EE87787ln1 0.0000751 heart_h16 0
10 EE87787ln1 0.0000751 kidney_h16 0
# ℹ 1,550 more rows
# ℹ Use `print(n = ...)` to see more rows
$EE87788ln1
# A tibble: 1,608 × 4
EE_variant lambda variable value
<chr> <dbl> <chr> <dbl>
1 EE87788ln1 0.0000761 blood_vessel_h16 0
2 EE87788ln1 0.0000761 adrenal_gland_h16 0
3 EE87788ln1 0.0000761 bone_element_h16 0
4 EE87788ln1 0.0000761 Bronchus_h16 0
5 EE87788ln1 0.0000761 esophagus_h16 0
6 EE87788ln1 0.0000761 extraembryonic_structure_h16 0
7 EE87788ln1 0.0000761 eye_h16 0
8 EE87788ln1 0.0000761 gonad_h16 0
9 EE87788ln1 0.0000761 heart_h16 0
10 EE87788ln1 0.0000761 kidney_h16 0
# ℹ 1,598 more rows
# ℹ Use `print(n = ...)` to see more rows
$EE87789ln1
# A tibble: 1,608 × 4
EE_variant lambda variable value
<chr> <dbl> <chr> <dbl>
1 EE87789ln1 0.0000732 blood_vessel_h16 0
2 EE87789ln1 0.0000732 adrenal_gland_h16 0
3 EE87789ln1 0.0000732 bone_element_h16 0
4 EE87789ln1 0.0000732 Bronchus_h16 0
5 EE87789ln1 0.0000732 esophagus_h16 0
6 EE87789ln1 0.0000732 extraembryonic_structure_h16 0
7 EE87789ln1 0.0000732 eye_h16 0
8 EE87789ln1 0.0000732 gonad_h16 0
9 EE87789ln1 0.0000732 heart_h16 0
10 EE87789ln1 0.0000732 kidney_h16 0
# ℹ 1,598 more rows
# ℹ Use `print(n = ...)` to see more rows
$EE87790ln1
# A tibble: 1,536 × 4
EE_variant lambda variable value
<chr> <dbl> <chr> <dbl>
1 EE87790ln1 0.0000328 blood_vessel_h16 0
2 EE87790ln1 0.0000328 adrenal_gland_h16 0
3 EE87790ln1 0.0000328 bone_element_h16 0
4 EE87790ln1 0.0000328 Bronchus_h16 0
5 EE87790ln1 0.0000328 esophagus_h16 0
6 EE87790ln1 0.0000328 extraembryonic_structure_h16 0
7 EE87790ln1 0.0000328 eye_h16 0
8 EE87790ln1 0.0000328 gonad_h16 0
9 EE87790ln1 0.0000328 heart_h16 0
10 EE87790ln1 0.0000328 kidney_h16 0
# ℹ 1,526 more rows
# ℹ Use `print(n = ...)` to see more rows
$EE87791ln1
# A tibble: 1,536 × 4
EE_variant lambda variable value
<chr> <dbl> <chr> <dbl>
1 EE87791ln1 0.0000444 blood_vessel_h16 0
2 EE87791ln1 0.0000444 adrenal_gland_h16 0
3 EE87791ln1 0.0000444 bone_element_h16 0
4 EE87791ln1 0.0000444 Bronchus_h16 0
5 EE87791ln1 0.0000444 esophagus_h16 0
6 EE87791ln1 0.0000444 extraembryonic_structure_h16 0
7 EE87791ln1 0.0000444 eye_h16 0
8 EE87791ln1 0.0000444 gonad_h16 0
9 EE87791ln1 0.0000444 heart_h16 0
10 EE87791ln1 0.0000444 kidney_h16 0
# ℹ 1,526 more rows
# ℹ Use `print(n = ...)` to see more rows
The expected dataframe would look like this,
head(comb_plot_lamb)
# A tibble: 6 × 8
Variable Lambda_EE87802 Lambda_EE87786 Lambda_EE87787 Lambda_EE87788 Lambda_EE87789 Lambda_EE87790 Lambda_EE87791
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 blood_vessel_h16 0.0000348 0.0000228 0.0000751 0.0000761 0.0000732 0.0000328 0.0000444
2 adrenal_gland_h16 0.0000348 0.0000228 0.0000751 0.0000761 0.0000732 0.0000328 0.0000444
3 bone_element_h16 0.0000348 0.0000228 0.0000751 0.0000761 0.0000732 0.0000328 0.0000444
4 Bronchus_h16 0.0000348 0.0000228 0.0000751 0.0000761 0.0000732 0.0000328 0.0000444
5 esophagus_h16 0.0000348 0.0000228 0.0000751 0.0000761 0.0000732 0.0000328 0.0000444
6 extraembryonic_struct… 0.0000348 0.0000228 0.0000751 0.0000761 0.0000732 0.0000328 0.0000444
What I did, but I wonder how can I make this thing at a go rather than writing multiple lines to get every column of lambda values.
n1 <- sapply(split_plot_data2, nrow)
n2 <- max(n1)
n4 <- which.max(n1)
comb_plot_data <- split_plot_data2[[n4]][1:4]
comb_plot_data2 <- comb_plot_data[, c(3,2)]
colnames(comb_plot_data2) <- c('Variable','Lambda_EE87802)
comb_plot_lamb <- comb_plot_data2
comb_plot_lamb$Lambda_EE87786 <- c(split_plot_data2[[1]]$lambda, rep(NA, nrow(comb_plot_lamb)-length(split_plot_data2[[1]]$lambda)))
comb_plot_lamb$Lambda_EE87787 <- c(split_plot_data2[[2]]$lambda, rep(NA, nrow(comb_plot_lamb)-length(split_plot_data2[[2]]$lambda)))
comb_plot_lamb$Lambda_EE87788 <- c(split_plot_data2[[3]]$lambda, rep(NA, nrow(comb_plot_lamb)-length(split_plot_data2[[3]]$lambda)))
comb_plot_lamb$Lambda_EE87789 <- c(split_plot_data2[[4]]$lambda, rep(NA, nrow(comb_plot_lamb)-length(split_plot_data2[[4]]$lambda)))
comb_plot_lamb$Lambda_EE87790 <- c(split_plot_data2[[5]]$lambda, rep(NA, nrow(comb_plot_lamb)-length(split_plot_data2[[5]]$lambda)))
comb_plot_lamb$Lambda_EE87791 <- c(split_plot_data2[[6]]$lambda, rep(NA, nrow(comb_plot_lamb)-length(split_plot_data2[[6]]$lambda)))
Here is a tidyverse
solution.
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(purrr)
})
df1 %>%
group_split(EE_variant) %>%
map(\(x) {
newname <- first(x$EE_variant) %>% as.character()
newname <- paste0("Lambda_", newname)
x %>%
select(-EE_variant) %>%
rename(!!sym(newname) := lambda) %>%
select(2, 1)
}) %>%
reduce(left_join, by = "variable")
#> # A tibble: 50 × 4
#> variable Lambda_EE87786 Lambda_EE87787 Lambda_EE87802
#> <chr> <dbl> <dbl> <dbl>
#> 1 var 1 1.4 2.5 0.2
#> 2 var 2 1.5 1.9 0.2
#> 3 var 3 1.5 2.1 0.2
#> 4 var 4 1.3 1.8 0.2
#> 5 var 5 1.5 2.2 0.2
#> 6 var 6 1.3 2.1 0.4
#> 7 var 7 1.6 1.7 0.3
#> 8 var 8 1 1.8 0.2
#> 9 var 9 1.3 1.8 0.2
#> 10 var 10 1.4 2.5 0.1
#> # ℹ 40 more rows
Created on 2023-09-03 with reprex v2.0.2
Inspired in Limey's answer, here is a solution that binds the rows and then reshapes the data to wide format.
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(purrr)
})
df1 %>%
group_by(variable, EE_variant) %>%
mutate(id = row_number()) %>%
ungroup() %>%
group_split(EE_variant) %>%
map(select, -value) %>%
bind_rows() %>%
pivot_wider(
id_cols = c(id, variable),
names_from = EE_variant,
names_glue = "Lambda_{EE_variant}",
values_from = lambda
) %>%
select(-id) %>%
arrange(variable)
#> # A tibble: 50 × 4
#> variable Lambda_EE87786 Lambda_EE87787 Lambda_EE87802
#> <chr> <dbl> <dbl> <dbl>
#> 1 var 01 1.6 1.7 0.3
#> 2 var 01 1 1.8 0.2
#> 3 var 01 1.3 1.8 0.2
#> 4 var 01 1.4 2.5 0.1
#> 5 var 01 1.7 1.8 0.2
#> 6 var 01 1 1.6 0.2
#> 7 var 01 1.4 2.3 0.3
#> 8 var 01 1.3 2.5 0.4
#> 9 var 01 1.1 2.3 0.2
#> 10 var 02 1.3 2.1 0.4
#> # ℹ 40 more rows
Created on 2023-09-03 with reprex v2.0.2
# use a built.in data set to make the example reproducible
set.seed(2023)
df1 <- iris[c(5:3)]
df1$variable <- sprintf("var %02d", sample(10, 50, TRUE))
df1 <- df1[c(1, 2, 4, 3)]
names(df1) <- c("EE_variant", "lambda", "variable", "value")
df1$EE_variant <- as.character(df1$EE_variant)
df1$EE_variant <- rep(c("EE87802", "EE87786", "EE87787"), each = 50)
head(df1)
#> EE_variant lambda variable value
#> 1 EE87802 0.2 var 05 1.4
#> 2 EE87802 0.2 var 09 1.4
#> 3 EE87802 0.2 var 08 1.3
#> 4 EE87802 0.2 var 03 1.5
#> 5 EE87802 0.2 var 10 1.4
#> 6 EE87802 0.4 var 02 1.7
Created on 2023-09-03 with reprex v2.0.2