Search code examples
rtidyversedata-cleaning

Expanding Data Frame of Response Counts into Data Frame of Responses


I am trying to expand a data frame of counts of specific responses into a data frame of actual responses (rather than aggregated by counts)

Example data:

set.seed(44)
data <- data.frame(ID = LETTERS[1:5]
                   , Count_1 = sample(c(3:8), 5)
                   , Count_2 = sample(c(3:8), 5)
                   , Count_3 = sample(c(3:8), 5))

Which looks like this:

   ID Count_1 Count_2 Count_3
1  A       3       8       3
2  B       8       7       4
3  C       7       4       8
4  D       4       3       5
5  E       5       5       6

I want to expand this out to look like this:

desired <- rbind(data.frame(ID = c(rep("A", 3), rep("B", 8), rep("C", 7), rep("D", 4), rep("E", 5)
                             )
                      , Response = c(rep("1", 3), rep("1", 8) , rep("1", 7), rep("1", 4), rep("1", 5)
                                     )
                      )
                 , data.frame(ID = c(rep("A", 8), rep("B", 7), rep("C", 4), rep("D", 3), rep("E", 5)
                 )
                 
                 , Response = c(rep("2", 8), rep("2", 7) , rep("2", 4), rep("2", 3), rep("2", 5)
                 )
                 )
                 
                 , data.frame(ID = c(rep("A", 3), rep("B", 4), rep("C", 8), rep("D", 5), rep("E", 6)
                 )
                 , Response = c(rep("2", 3), rep("2", 4) , rep("2", 8), rep("2", 5), rep("2", 6)
                 )
                 )
                 
                 )


Which looks like:

   ID Response
1   A        1
2   A        1
3   A        1
4   B        1
5   B        1
6   B        1
7   B        1
8   B        1
9   B        1
10  B        1
11  B        1
12  C        1
13  C        1
14  C        1
15  C        1
16  C        1
17  C        1
18  C        1
19  D        1
20  D        1
21  D        1
22  D        1
23  E        1
24  E        1
25  E        1
26  E        1
27  E        1
28  A        2
29  A        2
30  A        2
31  A        2
32  A        2
33  A        2
34  A        2
35  A        2
36  B        2
37  B        2
38  B        2
39  B        2
40  B        2
41  B        2
42  B        2
43  C        2
44  C        2
45  C        2
46  C        2
47  D        2
48  D        2
49  D        2
50  E        2
51  E        2
52  E        2
53  E        2
54  E        2
55  A        2
56  A        2
57  A        2
58  B        2
59  B        2
60  B        2
61  B        2
62  C        2
63  C        2
64  C        2
65  C        2
66  C        2
67  C        2
68  C        2
69  C        2
70  D        2
71  D        2
72  D        2
73  D        2
74  D        2
75  E        2
76  E        2
77  E        2
78  E        2
79  E        2
80  E        2

I was thinking maybe a for loop to create vectors for each cell in each column, but I'm wondering if that's necessary or if there might be a more efficient way?


Solution

  • We could reshape to 'long' format with pivot_longer and then use uncount

    library(dplyr)
    library(tidyr)
    data %>%
       pivot_longer(cols = starts_with("Count"), names_prefix = 'Count_', 
         names_to = 'Response') %>% 
       arrange(Response) %>%
       uncount(value)
    

    -output

    # A tibble: 80 × 2
       ID    Response
       <chr> <chr>   
     1 A     1       
     2 A     1       
     3 A     1       
     4 B     1       
     5 B     1       
     6 B     1       
     7 B     1       
     8 B     1       
     9 B     1       
    10 B     1       
    # … with 70 more rows
    

    In base R, this can be done with rep/unlist/col/row

    vals <- unlist(data[-1])
    out <-  data.frame(ID = rep(data$ID[row(data[-1])], vals), 
          Response = rep(col(data[-1]), vals))
    

    data

    data <- structure(list(ID = c("A", "B", "C", "D", "E"), Count_1 = c(3L, 
    8L, 7L, 4L, 5L), Count_2 = c(8L, 7L, 4L, 3L, 5L), Count_3 = c(3L, 
    4L, 8L, 5L, 6L)), class = "data.frame", row.names = c("1", "2", 
    "3", "4", "5"))