Search code examples
rdata.tablereshapereshape2melt

R Reshape Wide To Long Using Column Stub Strings


data1=data.frame("School"=c(1,1,2,2,3,3,4,4),
                 "Fund"=c(0,1,0,1,0,1,0,1),
                 "Total_A_Grade5"=c(22,20,21,24,24,26,25,22),
                 "Group1_A_Grade5"=c(10,6,6,10,9,9,9,10),
                 "Group2_A_Grade5"=c(5,9,9,8,10,8,8,6),
                 "Total_B_Grade5"=c(23,33,19,21,19,23,20,21),
                 "Group1_B_Grade5"=c(8,7,7,10,9,9,5,5),
                 "Group2_B_Grade5"=c(6,10,7,6,6,5,9,9),
                 "Total_A_Grade6"=c(18,24,16,24,26,25,16,19),
                 "Group1_A_Grade6"=c(7,7,5,9,10,9,5,7),
                 "Group2_A_Grade6"=c(5,8,6,7,10,8,8,9),
                 "Total_B_Grade6"=c(26,23,22,24,21,22,24,19),
                 "Group1_B_Grade6"=c(10,10,6,10,7,8,8,7),
                 "Group2_B_Grade6"=c(9,6,9,6,7,6,9,9),
                 "Total_A_Grade7"=c(20,19,18,25,16,21,19,26),
                 "Group1_A_Grade7"=c(9,7,7,9,7,7,5,8),
                 "Group2_A_Grade7"=c(8,5,7,9,6,5,5,9),
                 "Total_B_Grade7"=c(25,21,24,25,18,18,27,18),
                 "Group1_B_Grade7"=c(10,10,10,7,5,6,8,5),
                 "Group2_B_Grade7"=c(9,6,8,10,8,6,10,6))


data2=data.frame("School"=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
                 "Fund"=c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1),
                 "Type"=c('Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2'),
                 "Class"=c('A','A','A','A','A','A','B','B','B','B','B','B','A','A','A','A','A','A','B','B','B','B','B','B'),
"Grade"=c(5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6),
"Score"=c(22,20,10,6,5,9,23,33,8,7,6,10,18,24,7,7,5,8,26,23,10,10,9,6))

I have 'data1' and want to reshape to make 'data2' which just shows example for School 1 grade 5 and 6 but I want all of data1 reshaped.

The column names of 'data1' contain rich information. For example, Group2_B_Grade6 indicated 'Type' = Group2, 'Class' = B, 'Grade' = 6. I wish to reshape 'data1' and then use these stubs separated by "_" as colnames to prepare 'data2'


data3=data.frame("School"=c(1,1,2,2,3,3,4,4),
                 "Fund"=c(0,1,0,1,0,1,0,1),
                 "Grade_5"=c(22,20,21,24,24,26,25,22),
                 "Grade_6"=c(10,6,6,10,9,9,9,10),
                 "Grade_7"=c(5,9,9,8,10,8,8,6))

Solution

  • Using dplyr (and tidyr):

    library(dplyr)
    library(tidyr)
    
    data2 <- data1 %>% 
      pivot_longer(-c(School, Fund)) %>% 
      separate(name, into = c('Type', 'Class', 'Grade')) %>% 
      extract(Grade, 'Grade', "([0-9]+)")
    
    data2
    #> # A tibble: 144 x 6
    #>    School  Fund Type   Class Grade value
    #>     <dbl> <dbl> <chr>  <chr> <chr> <dbl>
    #>  1      1     0 Total  A     5        22
    #>  2      1     0 Group1 A     5        10
    #>  3      1     0 Group2 A     5         5
    #>  4      1     0 Total  B     5        23
    #>  5      1     0 Group1 B     5         8
    #>  6      1     0 Group2 B     5         6
    #>  7      1     0 Total  A     6        18
    #>  8      1     0 Group1 A     6         7
    #>  9      1     0 Group2 A     6         5
    #> 10      1     0 Total  B     6        26
    #> # … with 134 more rows
    

    Created on 2020-04-06 by the reprex package (v0.3.0)