I have a dataframe containing spatial data about the distribution of certain immune cells in tumors and I'm interested in displaying their distribution in a novel way. In essence I want to make a type of sunburst chart where the outermost layer is a histogram, roughly like this example below.
At the moment, I was planning on trying to superimpose a radial histogram on top of a Sunburst chart, but I wanted to know if there was an existing function that could work.
Edit: below is the dataframe I've been working with
structure(list(slide = c("LU095", "LU095", "LU095", "LU095",
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095",
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095",
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095",
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095",
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095",
"LU095", "LU095", "LU095", "LU095", "LU095", "LU095", "LU095"
), stroma_bins = structure(c(1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L), levels = c("0-10% Stroma",
"10-20% Stroma", "20-30% Stroma", "30-40% Stroma", "40-50% Stroma",
"50-60% Stroma", "60-70% Stroma", "70-80% Stroma", "80-90% Stroma",
"90-100% Stroma"), class = "factor"), cd8_percent_bins = structure(c(1L,
1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), levels = c("0-2% CD8+ Cells",
"2-4% CD8+ Cells", "4-6% CD8+ Cells", "6-8% CD8+ Cells", "8-10% CD8+ Cells",
"10-15% CD8+ Cells", "15-20% CD8+ Cells", ">20% CD8+ Cells"), class = "factor"),
Freq = c(8L, 5L, 1L, 7L, 1L, 7L, 2L, 15L, 4L, 4L, 2L, 15L,
4L, 3L, 2L, 12L, 15L, 1L, 4L, 2L, 1L, 1L, 16L, 12L, 8L, 8L,
4L, 1L, 3L, 1L, 14L, 4L, 17L, 6L, 9L, 11L, 5L, 2L, 51L, 18L,
24L, 24L, 17L, 32L, 21L, 11L)), row.names = c(NA, -46L), class = c("data.table",
"data.frame"))
I would probably do this directly in ggplot.
First we reshape your data for plotting:
library(tidyverse)
library(geomtextpath)
library(colorspace)
plot_df <- df %>%
mutate(bar_pos = as.numeric(factor(cd8_percent_bins))) %>%
group_by(stroma_bins) %>%
mutate(max_bar = max(bar_pos)) %>%
ungroup() %>%
nest(data = -c(stroma_bins, max_bar)) %>%
mutate(max_bar = cumsum(lag(max_bar, default = 0))) %>%
unnest(cols = everything()) %>%
mutate(bar_pos = max_bar + bar_pos + as.numeric(factor(stroma_bins)))
And the plotting code could be something like:
ggplot(plot_df, aes(bar_pos, Freq, fill = stroma_bins, color = stroma_bins)) +
geom_col(alpha = 0.6, color = NA) +
geom_rect(data = . %>% group_by(stroma_bins) %>%
summarize(xmin = min(bar_pos) - 1, ymin = -0.5 * max(df$Freq),
xmax = max(bar_pos) + 1, ymax = -5, bar_pos = 1,
Freq = 1),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = after_scale(darken(color, 0.25)))) +
geom_textpath(data = . %>% group_by(stroma_bins) %>%
summarize(Freq = -0.25 * max(df$Freq) - 2.5,
bar_pos = mean(bar_pos)), angle = 90,
aes(label = sub("-", "\u2013", sub(" Stroma", "", stroma_bins)),
group = stroma_bins), color = "white") +
geom_textpath(aes(y = -2.5, label = sub("-", "\u2013",
sub("% CD8\\+ Cells", "",
cd8_percent_bins)),
group = seq_along(cd8_percent_bins)), size = 2,
color = "black") +
annotate("text", x = 1, y = -max(df$Freq), label = "Stroma bin", size = 10,
color = "gray30") +
scale_fill_discrete(guide = "none") +
scale_color_discrete(guide = "none") +
scale_y_continuous(limits = c(-max(df$Freq), max(df$Freq) + 1)) +
scale_x_continuous(expand = c(0, 0),
lim = c(1, nrow(df) + length(unique(df$stroma_bins)) + 2))+
coord_polar() +
theme_void() +
theme(plot.margin = margin(-100, -100, -100, -100))