Search code examples
rkable

Highlight duplicate rows based on two columns in table, with unique colours for pairs


I have a dataset which I would like to turn into a table. When the ID AND timepoint are the same, I would like that pair (or triplet) of rows to be highlighted in a colour, and for the next pair, a different colour and so on. Is there a way to do this without specifying a list of colours and list of rows?

Here is the dataframe

   structure(list(ID = c("168", "168", "168", "002", "002", "002", 
"002", "002", "002", "002 15/5/13", "002 15/5/13", "062", "062", 
"062+2 (11/02/14)", "062+2 (11/02/14)", "074", "074", "074", 
"074", "074", "074", "074", "074", "093", "093", "093", "093", 
"093", "093", "093", "093", "105", "105", "105", "105", "105", 
"105", "105", "105", "127", "127", "142", "142", "142", "142", 
"145", "145", "149", "149", "149", "149", "155", "155", "155", 
"155", "155", "155", "156", "156", "156", "156", "158", "158", 
"158", "158", "168", "168", "174", "174", "180", "180", "183", 
"183", "201", "201", "205", "205"), timepoint = c("1", "1", "1", 
"3", "3", "5", "5", "7", "7", NA, NA, "5", "5", "1", "1", "2", 
"2", "4", "4", "5", "5", "7", "7", "2", "2", "3", "3", "5", "5", 
"7", "7", "2", "2", "3", "3", "5", "5", "7", "7", "3", "3", "1", 
"1", "3", "3", "1", "1", "1", "1", "3", "3", "1", "1", "3", "3", 
"7", "7", "3", "3", "5", "5", "1", "1", "3", "3", "3", "3", "5", 
"5", "1", "1", "1", "1", "1", "1", "1", "1"), Plate = c("Plate_7", 
"Plate_3", "Plate_6", "Plate_1", "Plate_7", "Plate_1", "Plate_7", 
"Plate_7", "Plate_1", "Plate_1", "Plate_7", "Plate_1", "Plate_7", 
"Plate_7", "Plate_1", "Plate_7", "Plate_1", "Plate_7", "Plate_1", 
"Plate_7", "Plate_1", "Plate_7", "Plate_1", "Plate_7", "Plate_2", 
"Plate_7", "Plate_2", "Plate_7", "Plate_2", "Plate_7", "Plate_2", 
"Plate_2", "Plate_7", "Plate_7", "Plate_2", "Plate_7", "Plate_2", 
"Plate_3", "Plate_7", "Plate_6", "Plate_3", "Plate_6", "Plate_3", 
"Plate_6", "Plate_3", "Plate_3", "Plate_6", "Plate_3", "Plate_6", 
"Plate_6", "Plate_3", "Plate_3", "Plate_6", "Plate_6", "Plate_3", 
"Plate_7", "Plate_6", "Plate_4", "Plate_5", "Plate_4", "Plate_5", 
"Plate_3", "Plate_6", "Plate_6", "Plate_3", "Plate_6", "Plate_3", 
"Plate_4", "Plate_5", "Plate_3", "Plate_6", "Plate_3", "Plate_7", 
"Plate_7", "Plate_5", "Plate_5", "Plate_7"), `Year Run` = c(2022, 
2020, 2022, 2020, 2022, 2020, 2022, 2022, 2020, 2020, 2022, 2020, 
2022, 2022, 2020, 2022, 2020, 2022, 2020, 2022, 2020, 2022, 2020, 
2022, 2020, 2022, 2020, 2022, 2020, 2022, 2020, 2020, 2022, 2022, 
2020, 2022, 2020, 2020, 2022, 2022, 2020, 2022, 2020, 2022, 2020, 
2020, 2022, 2020, 2022, 2022, 2020, 2020, 2022, 2022, 2020, 2022, 
2022, 2022, 2022, 2022, 2022, 2020, 2022, 2022, 2020, 2022, 2020, 
2022, 2022, 2020, 2022, 2020, 2022, 2022, 2022, 2022, 2022)), row.names = c(NA, 
-77L), class = c("tbl_df", "tbl", "data.frame"))

And this code works, but it is very manual, i.e. setting each var, and row. I would like it to work if I have different data

 color_scheme <- c("#DDDDDD", "#AAAAAA", "#777777", "#444444", "#111111")

# Create a grouping variable based on ID and Timepoint
group_var <- paste(id_table$ID, id_table$timepoint, sep = "_")

# Create a color vector based on the grouping variable
color_var <- as.numeric(factor(group_var))

# Generate the table and highlight rows based on the color vector
id_table <- id_table %>%
  select(-ID) %>%
  kbl() %>%
  row_spec(which(color_var == 1), background = color_scheme[1]) %>%
  row_spec(which(color_var == 2), background = color_scheme[2]) %>%
  row_spec(which(color_var == 3), background = color_scheme[3]) %>%
  row_spec(which(color_var == 4), background = color_scheme[4]) %>%
  row_spec(which(color_var == 5), background = color_scheme[5])

# Print the table
id_table

Solution

  • You could do it with a loop

    color_scheme <- c("#DDDDDD", "#AAAAAA", "#777777", "#444444", "#111111")
    
    # Create a grouping variable based on ID and Timepoint
    group_var <- paste(dt$ID, dt$timepoint, sep = "_")
    
    # Create a color vector based on the grouping variable
    color_var <- as.numeric(factor(group_var))
    
    # Create general table
    id_table <- dt %>%
      select(-ID) %>%
      kbl()
    
    # Set color to each row
    color_var2 <- which(color_var %in% c(1,2,3,4,5))
    color_scheme2 <-  color_scheme[color_var][color_var2]
    
    for (i in seq_along(color_var2)) {
      id_table <- row_spec(id_table, color_var2[i], background = color_scheme2[i])
    }
    
    # Print the table
    id_table
    

    Edit

    We can define the groups before hand and recycle or define unique colors.

    library(dplyr)
    library(kableExtra)
    
    # Create general table
    id_table <- dt %>%
      select(timepoint, Plate, `Year Run`) %>%
      kbl()
    
    
    groups <- 
    dt %>% 
      group_by(ID,timepoint) %>% 
      mutate(group_id = cur_group_id()) %>% 
      ungroup() %>% 
      mutate(group_id = unclass(forcats::fct_inorder(as.factor(group_id))))
    
    color_scheme <- c("#DDDDDD", "#AAAAAA", "#777777", "#444444", "#111111")
    
    # Recycling colors 
    color_var <- rep(color_scheme, round(nrow(groups)/length(color_scheme)))[groups$group_id]
      
    for (i in seq_along(color_var)) {
      id_table <- row_spec(id_table, i, background = color_var[i])
    }
    
    id_table
    
    # Unique colors per group
    color_scheme <- gray.colors(max(groups$group_id))
    color_var <- rep(color_scheme, round(nrow(groups)/length(color_scheme)))[groups$group_id]
    
    for (i in seq_along(color_var)) {
      id_table <- row_spec(id_table, i, background = color_var[i])
    }
    
    id_table