Search code examples
rdplyrpivotdata-manipulation

Pivot longer multiple columns while pivot wider others


Hello I have a dataset of 3-5 rows per group as the following and I want to put some columns in a longer format and have on column in a wider format.

The first dataset below represents the original format and I want to transform it to the second one. I have used pivot wider cols = c("Jan", "Feb") but i can't manage to pivot the Type column to be longer at the same time.

data <- as.data.frame(matrix(ncol=5, nrow=6))
colnames(data) <- c("names", "group", "Type", "Jan", "Feb")
data$names <- c("P1", "P1", "P1", "P2", "P2", "P2")
data$group <- "S"
data$Type <- c("Beg", "Middle", "End", "Beg", "Middle", "End")
data$Jan <- c(1, 2, 3, 10, 5, 15)
data$Feb <- c(5, 5, 10, 5, 2, 7)

    
   names group Type     Jan  Feb
1   P1    S    Beg       1   5
2   P1    S    Middle    2   5
3   P1    S    End       3   10
4   P2    S    Beg       10  5
5   P2    S    Middle    5   2
6   P2    S    End       15  7


data_transformed <- as.data.frame(matrix(ncol=6, nrow=4))
colnames(data_transformed) <- c("names", "group", "Month", "Beg", "Middle", "End")
data_transformed$names <- c("P1", "P1", "P2", "P2")
data_transformed$group <- "S"
data_transformed$Month <- c("Jan", "Feb")
data_transformed$Beg <- c(1, 10, 5, 5)
data_transformed$Middle <- c(2, 5, 5, 2)
data_transformed$End <- c(2, 15, 10, 7)

  names group Month   Beg Middle End
1   P1  S     Jan      1    2    2
2   P1  S     Feb      10   5    15
3   P2  S     Jan      5    5    10
4   P2  S     Feb      5    2    7

Solution

  • Here, we need a pivot_longer + pivot_wider i.e. first reshape to 'long' with cols Jan to 'Feb', then reshape the long back to wider format with column names from 'Type'

    library(dplyr)
    library(tidyr)
    data %>%
         pivot_longer(cols = Jan:Feb, names_to = 'Month') %>% 
         pivot_wider(names_from = Type, values_from = value)
    

    -output

    # A tibble: 4 x 6
    #  names group Month   Beg Middle   End
    #  <chr> <chr> <chr> <dbl>  <dbl> <dbl>
    #1 P1    S     Jan       1      2     3
    #2 P1    S     Feb       5      5    10
    #3 P2    S     Jan      10      5    15
    #4 P2    S     Feb       5      2     7
    

    Or use recast from reshape2

    library(reshape2)
    recast(data, measure = c("Jan", "Feb"),
         names + group + variable ~ Type, values.var = 'value')