Search code examples
rdplyr

assign a value based on which row in a pair is higher


I have:

> dput(for_stack)
structure(list(id = c("20230420-01", "20230420-02", "2023042110-01", 
"2023042110-02", "2023042112-01", "2023042112-02", "2023042114-01", 
"2023042114-02", "2023042214-01", "2023042214-02"), pair_id = c(20230420L, 
20230420L, 2023042110L, 2023042110L, 2023042112L, 2023042112L, 
2023042114L, 2023042114L, 2023042214L, 2023042214L), mean_blast = c(3.82352941176471, 
4.46153846153846, 1.71428571428571, 1.0625, 4.8125, 4, 3.5, 1.25, 
4.9375, 4.5)), row.names = c(NA, 10L), class = "data.frame")

There are two rows per pair_id and I want to know which row in each pair contains a higher value for mean_blast such that:

> dput(for_stack)
structure(list(id = c("20230420-01", "20230420-02", "2023042110-01", 
"2023042110-02", "2023042112-01", "2023042112-02", "2023042114-01", 
"2023042114-02", "2023042214-01", "2023042214-02"), pair_id = c(20230420L, 
20230420L, 2023042110L, 2023042110L, 2023042112L, 2023042112L, 
2023042114L, 2023042114L, 2023042214L, 2023042214L), mean_blast = c(3.82352941176471, 
4.46153846153846, 1.71428571428571, 1.0625, 4.8125, 4, 3.5, 1.25, 
4.9375, 4.5), higher = c(FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, 
TRUE, FALSE, TRUE, FALSE)), row.names = c(NA, 10L), class = "data.frame")

              id    pair_id mean_blast higher
1    20230420-01   20230420   3.823529  FALSE
2    20230420-02   20230420   4.461538   TRUE
3  2023042110-01 2023042110   1.714286   TRUE
4  2023042110-02 2023042110   1.062500  FALSE
5  2023042112-01 2023042112   4.812500   TRUE
6  2023042112-02 2023042112   4.000000  FALSE
7  2023042114-01 2023042114   3.500000   TRUE
8  2023042114-02 2023042114   1.250000  FALSE
9  2023042214-01 2023042214   4.937500   TRUE
10 2023042214-02 2023042214   4.500000  FALSE

I'm sure there is a way to do this with summarize and group_by, but I have not figured it out yet.


Solution

  • In dplyr, you can use max by group:

    library(dplyr)
    
    for_stack |>
      mutate(higher = mean_blast == max(mean_blast), .by = pair_id)