Search code examples
rpivotgroup

group_by and pivot_wider combination


I have a data frame with three columns:

responseid = c(rep("R_cJ6dbDcou", 6), rep("R_a3LWPfGC", 6), rep("R_e3b9tIJo", 6))
year = c(rep(2022, 6), rep(2022, 6), rep(2022, 6)) 
code = c(000, 0500, 0033, 0036, 0102, 0486, 000, 0500, 0032, 
         0039, 0101, 0466, 000, 0500, 0012, 0049, 0111, 0446)
Data = data.frame(responseid , year , code)

I would like it to appear with each row as a unique responseid, and each code related to that responseid as a column:

responseid code1 code2 code3 code4 code5 code6
R_cJ6dbDcou 000 0500 0033 0036 0102 0486
R_a3LWPfGC 000 0500 0032 0039 0101 0466

I've tried grouping by responseid then pivot_wider, but this doesn't seem to work. I end up with each responseid as a column and a list in each cell with the codes for that responseid.


Solution

  • I didn't get from your question whether you expect each column to contain the same code or not.

    If yes, the following should work:

    library(tidyr)
    library(dplyr)
    
    responseid = c(rep("123xyz", 6), rep("456abc", 6), rep("789def", 6)) 
    year = c(rep(2022, 6), rep(2021, 6), rep(2022, 6))
    code = c(000, 0500, 0033, 0036, 0102, 0486, 000, 0500, 0032, 0039, 0101, 0466, 000, 0500, 0012, 0049, 0111, 0446) 
    Data = data.frame(responseid , year , code = as.character(code))
    
    Data_name <- Data |> 
      mutate(name = paste("code", code, sep = "_"))
    
    Data_piv <- Data_name |> 
      pivot_wider(id_cols = responseid,
                  values_from = code,
                  names_from = name)
    
    > Data_piv
    # A tibble: 3 × 15
      responseid code_0 code_500 code_33 code_36 code_102 code_486 code_32 code_39 code_101 code_466 code_12
      <chr>      <chr>  <chr>    <chr>   <chr>   <chr>    <chr>    <chr>   <chr>   <chr>    <chr>    <chr>  
    1 123xyz     0      500      33      36      102      486      NA      NA      NA       NA       NA     
    2 456abc     0      500      NA      NA      NA       NA       32      39      101      466      NA     
    3 789def     0      500      NA      NA      NA       NA       NA      NA      NA       NA       12     
    # ℹ 3 more variables: code_49 <chr>, code_111 <chr>, code_446 <chr>
    

    Else, this should work:

    Data_name2 <- Data |> 
      group_by(responseid) |>
      mutate(i = 1:n()) |>
      mutate(name = paste("code", i, sep = "_"))
    
    Data_piv2 <- Data_name2 |> 
      pivot_wider(id_cols = responseid,
                  values_from = code,
                  names_from = name)
    
    > Data_piv2
    # A tibble: 3 × 7
    # Groups:   responseid [3]
      responseid code_1 code_2 code_3 code_4 code_5 code_6
      <chr>      <chr>  <chr>  <chr>  <chr>  <chr>  <chr> 
    1 123xyz     0      500    33     36     102    486   
    2 456abc     0      500    32     39     101    466   
    3 789def     0      500    12     49     111    446   
    >