r tidyr reshape data-transform datashape

convert wide to long dataset in R

set.seed(123)
dataset <- data.frame(ID = 1:10,
                      height_1 = rnorm(10,0,1),
                      height_2 = rnorm(10,0,1),
                      common_fam = rnorm(10,0,1),
                      weight_1 = rnorm(10,0,1),
                      weight_2 = rnorm(10,0,1),
                      common2_fam2 = rnorm(10,0,1))

So, the above is a reproducible minimum data example. my real data is more complicated.

data3 <- dataset %>%
  pivot_longer(-c('ID', ends_with("fam") | ends_with("fam2")),
               names_to = "fID", 
               values_to = "value",
               names_sep = "_")

What I intended is that

data3 contains columns such as:

"ID", "common_fam", "common2_fam2", 
"fID" (it contains values either 1 or 2), 
"height" and "weight"

Solution

I think this is what you are looking for:

library(dplyr)
library(tidyr)

set.seed(123)
dataset <- data.frame(ID = 1:10,
                      height_1 = rnorm(10,0,1),
                      height_2 = rnorm(10,0,1),
                      common_fam = rnorm(10,0,1),
                      weight_1 = rnorm(10,0,1),
                      weight_2 = rnorm(10,0,1),
                      common2_fam2 = rnorm(10,0,1))

data3 <- dataset %>%
   pivot_longer(-c('ID', contains("fam") ),
                names_to = c(".value", "fID"), 
                values_to = "value",
                names_sep = "_")

data3
# A tibble: 20 × 6
     ID common_fam common2_fam2 fID    height  weight
  <int>      <dbl>        <dbl> <chr>   <dbl>   <dbl>
1     1     -1.07        0.253  1     -0.560   0.426 
2     1     -1.07        0.253  2      1.22   -0.695 
3     2     -0.218      -0.0285 1     -0.230  -0.295 
4     2     -0.218      -0.0285 2      0.360  -0.208 
5     3     -1.03       -0.0429 1      1.56    0.895 
6     3     -1.03       -0.0429 2      0.401  -1.27  
...