Search code examples
rggplot2ggalluvial

Alluvial plots using ggplot2 - highlight certain pairings


I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

enter image description here

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!


Solution

  • To highlight some of the catgories you can map a condition on the fill aes, then set your desired colors using scale_fill_manual, e.g. to highlight the top 3 categories you can do:

    library(ggplot2)
    library(ggalluvial)
    
    # Highlight Top 3
    .highlight <- a[order(a$n, decreasing = TRUE), ] |>
      head(3) |>
      subset(select = CTaa_alpha, drop = TRUE)
    
    ggplot(
      data = a,
      aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)
    ) +
      geom_alluvium(aes(fill = CTaa_alpha %in% .highlight)) +
      geom_stratum() +
      geom_text(
        stat = "stratum",
        aes(label = after_stat(stratum))
      ) +
      scale_x_discrete(
        limits = c("CDR3_alpha", "CDR3_beta"),
        expand = c(0.15, 0.05)
      ) +
      scale_fill_manual(
        values = c("grey65", "steelblue")
      ) +
      theme_classic() +
      theme(legend.position = "none")