Search code examples
pythonrgraphchartsgraphics

Is there a way to create sunburst/pie charts with histograms on the outermost layer?


I have a dataframe containing spatial data about the distribution of certain immune cells in tumors and I'm interested in displaying their distribution in a novel way. In essence I want to make a type of sunburst chart where the outermost layer is a histogram, roughly like this example below.

Example

At the moment, I was planning on trying to superimpose a radial histogram on top of a Sunburst chart, but I wanted to know if there was an existing function that could work.

Edit: below is the dataframe I've been working with

structure(list(slide = c("LU095", "LU095", "LU095", "LU095", 
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095", 
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095", 
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095", 
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095", 
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095", 
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095"
), stroma_bins = structure(c(1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L), levels = c("0-10% Stroma", 
"10-20% Stroma", "20-30% Stroma", "30-40% Stroma", "40-50% Stroma", 
"50-60% Stroma", "60-70% Stroma", "70-80% Stroma", "80-90% Stroma", 
"90-100% Stroma"), class = "factor"), cd8_percent_bins = structure(c(1L, 
1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), levels = c("0-2% CD8+ Cells", 
"2-4% CD8+ Cells", "4-6% CD8+ Cells", "6-8% CD8+ Cells", "8-10% CD8+ Cells", 
"10-15% CD8+ Cells", "15-20% CD8+ Cells", ">20% CD8+ Cells"), class = "factor"), 
    Freq = c(8L, 5L, 1L, 7L, 1L, 7L, 2L, 15L, 4L, 4L, 2L, 15L, 
    4L, 3L, 2L, 12L, 15L, 1L, 4L, 2L, 1L, 1L, 16L, 12L, 8L, 8L, 
    4L, 1L, 3L, 1L, 14L, 4L, 17L, 6L, 9L, 11L, 5L, 2L, 51L, 18L, 
    24L, 24L, 17L, 32L, 21L, 11L)), row.names = c(NA, -46L), class = c("data.table", 
"data.frame"))

Solution

  • I would probably do this directly in ggplot.

    First we reshape your data for plotting:

    library(tidyverse)
    library(geomtextpath)
    library(colorspace)
    
    plot_df <- df %>% 
      mutate(bar_pos = as.numeric(factor(cd8_percent_bins))) %>%
      group_by(stroma_bins) %>%
      mutate(max_bar = max(bar_pos)) %>%
      ungroup() %>%
      nest(data = -c(stroma_bins, max_bar)) %>%
      mutate(max_bar = cumsum(lag(max_bar, default = 0))) %>%
      unnest(cols = everything()) %>%
      mutate(bar_pos = max_bar + bar_pos + as.numeric(factor(stroma_bins)))
    

    And the plotting code could be something like:

    ggplot(plot_df, aes(bar_pos, Freq, fill = stroma_bins, color = stroma_bins)) +
      geom_col(alpha = 0.6, color = NA) +
      geom_rect(data = . %>% group_by(stroma_bins) %>% 
                  summarize(xmin = min(bar_pos) - 1, ymin = -0.5 * max(df$Freq),
                            xmax = max(bar_pos) + 1, ymax = -5, bar_pos = 1,
                            Freq = 1),
                aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, 
                    fill = after_scale(darken(color, 0.25)))) +
      geom_textpath(data = . %>% group_by(stroma_bins) %>% 
                      summarize(Freq = -0.25 * max(df$Freq) - 2.5, 
                                bar_pos = mean(bar_pos)), angle = 90,
                    aes(label = sub("-", "\u2013", sub(" Stroma", "", stroma_bins)), 
                        group = stroma_bins), color = "white") + 
      geom_textpath(aes(y = -2.5, label = sub("-", "\u2013",
                                              sub("% CD8\\+ Cells", "", 
                                                  cd8_percent_bins)),
                        group = seq_along(cd8_percent_bins)), size = 2,
                    color = "black") +
      annotate("text", x = 1, y = -max(df$Freq), label = "Stroma bin", size = 10, 
               color = "gray30") +
      scale_fill_discrete(guide = "none") +
      scale_color_discrete(guide = "none") +
      scale_y_continuous(limits = c(-max(df$Freq), max(df$Freq) + 1)) +
      scale_x_continuous(expand = c(0, 0),
                         lim = c(1, nrow(df) + length(unique(df$stroma_bins)) + 2))+
      coord_polar() +
      theme_void() +
      theme(plot.margin = margin(-100, -100, -100, -100))
    

    enter image description here