I have the following data frame:
temp1=data.frame(id=c(1,2,3,4,5),p1=c(0,0,1,1,2),p2=c(9,2,3,5,3),p3=c(1,4,10,3,7),p4=c(4,4,7,1,10))
id p1 p2 p3 p4
1 0 9 1 4
2 0 2 3 4
3 1 3 10 7
4 1 5 3 1
5 2 3 7 10
for each id I want to extract top three columns with highest value and put them in a new data frame something like this:
id top1 top2 top3
1 p2 p4 p3
2 p4 p3 p2
3 p3 p4 p2
4 p2 p3 p4/p1
5 p4 p3 p2
in case there are two identical value we are allowed to put them in arbitrary order.
library("tidyr")
library("dplyr")
df <- data.frame(id=c(1,2,3,4,5),p1=c(0,0,1,1,2),p2=c(9,2,3,5,3),p3=c(1,4,10,3,7),p4=c(4,4,7,1,10))
df2 <- gather(df,col,val,-id)
res <- group_by(df2,id) %>% arrange(id,desc(val)) %>% summarise(top1 = first(col),top2 = nth(col,2),top3 = nth(col,3))
Results in
# id top1 top2 top3
# <dbl> <chr> <chr> <chr>
# 1 1 p2 p4 p3
# 2 2 p3 p4 p2
# 3 3 p3 p4 p2
# 4 4 p2 p3 p1
# 5 5 p4 p3 p2
following new information
res <- group_by(df2,id) %>% mutate(r=rank(-(val/sum(val)*100),ties.method = "min")) %>% arrange(id,r) %>% summarise(top1 = first(col),top2 = nth(col,2),top3 = nth(col,3))
results in
# id top1 top2 top3
# <dbl> <chr> <chr> <chr>
# 1 1 p2 p4 p3
# 2 2 p3 p4 p2
# 3 3 p3 p4 p2
# 4 4 p2 p3 p1
# 5 5 p4 p3 p2