have a dataset from a survey. 2 questions have multiple choice options and it doesn't allow me to use pivot_wider
here is a sample data.
nam=c("per_id","quest","answ")
per_id=c("0012","0023","0045","0003","0003","0045","0003","0012","0023","0004","0004","0001","0005","0546","0001","0546","0005","0004","0004")
quest=c("A","A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
answ=c("Self","Self","Self","Self","Father","Sister","Daughter","Mother","Father","Self","Father","X1","Y1","Z1","Y1","Z2","X1","X1","X1")
df=cbind.data.frame(per_id,quest,answ)
names(df)=nam
what I want to have is
You might try the following approach using split
+ Reduce
+ merge
based on unique rows.
l = lapply(split(df, df$quest), \(x) { x = unique(x); x$quest = NULL; x })
# names(l$A)[2] = "A"
# names(l$B)[2] = "B"
# in case you have more than two quests:
l = lapply(names(l), \(x) setNames(l[[x]], c(names(l[[x]])[1], x)))
Reduce(\(...) merge(..., all = TRUE), l)
per_id A B
1 0001 <NA> X1
2 0001 <NA> Y1
3 0003 Self <NA>
4 0003 Father <NA>
5 0003 Daughter <NA>
6 0004 Self X1
7 0004 Father X1
8 0005 <NA> Y1
9 0005 <NA> X1
10 0012 Self <NA>
11 0012 Mother <NA>
12 0023 Self <NA>
13 0023 Father <NA>
14 0045 Self <NA>
15 0045 Sister <NA>
16 0546 <NA> Z1
17 0546 <NA> Z2
df = structure(list(per_id = c("0012", "0023", "0045", "0003", "0003",
"0045", "0003", "0012", "0023", "0004", "0004", "0001", "0005",
"0546", "0001", "0546", "0005", "0004", "0004"), quest = c("A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "B", "B", "B"), answ = c("Self", "Self", "Self", "Self",
"Father", "Sister", "Daughter", "Mother", "Father", "Self", "Father",
"X1", "Y1", "Z1", "Y1", "Z2", "X1", "X1", "X1")), class = "data.frame", row.names = c(NA,
-19L))