I have a sample data frame, something like below
V1 V2
TEST_1
TEST_1a AA
TEST_2
TEST_2a BB
TEST_3
TEST_4
TEST_4a CC
TEST_4b DD
I want to convert this dataframe based on _** value in column V1 to something like below
Col1a Col1b Col2a Col2b Col3a Col3b Col4a Col4b
TEST_1 TEST_2 TEST_3 TEST_4
TEST_1a AA TEST_2a BB TEST_4a CC
TEST_4b DD
and so on.
I have already looked into reshape2 functions and none of them seem to fit my need here. Any help is much appreciated
An option is dcast
. Based on the output showed, we may need to do the reshaping separately
library(data.table)
library(gtools)
setDT(df1)
df1[, grp := paste0("Col", rleid(sub("[a-z]+$", "", V1)))
][, c("grp1", "grp2") := .(paste0(grp, "a"), paste0(grp, "b"))
][]
out <- cbind(dcast(df1, rowid(grp1) ~ grp1, value.var = "V1", fill = ""),
dcast(df1, rowid(grp2) ~ grp2, value.var = "V2", fill = ""))[,
c('grp1', 'grp2') := NULL][]
setcolorder(out, mixedsort(names(out)))
out
# Col1a Col1b Col2a Col2b Col3a Col3b Col4a Col4b
#1: TEST_1 TEST_2 TEST_3 TEST_4
#2: TEST_1a AA TEST_2a BB TEST_4a CC
#3: TEST_4b DD
or using tidyverse
library(dplyr)
library(tidyr)
df1 %>%
mutate(grp = str_remove(V1, "[a-z]$"),
grp = str_c("Col", group_indices(., grp)),
grp1 = str_c(grp, "a"),
grp2 = str_c(grp, 'b')) %>%
{list(select(., V1, grp1), select(., V2, grp2))} %>%
map(~ .x %>%
group_by_at(2) %>%
mutate(rn = row_number()) %>%
ungroup %>%
spread(!! rlang::sym(names(.)[2]),
!! rlang::sym(names(.)[1]), fill = "") ) %>%
reduce(inner_join, by = 'rn') %>%
select(-rn) %>%
select(mixedsort(names(.)))
# A tibble: 3 x 8
# Col1a Col1b Col2a Col2b Col3a Col3b Col4a Col4b
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 TEST_1 "" TEST_2 "" TEST_3 "" TEST_4 ""
#2 TEST_1a AA TEST_2a BB "" "" TEST_4a CC
#3 "" "" "" "" "" "" TEST_4b DD
df1 <- structure(list(V1 = c("TEST_1", "TEST_1a", "TEST_2", "TEST_2a",
"TEST_3", "TEST_4", "TEST_4a", "TEST_4b"), V2 = c("", "AA", "",
"BB", "", "", "CC", "DD")), row.names = c(NA, -8L), class = "data.frame")