Search code examples
rggplot2sankey-diagramdataflow-diagram

asymmetrical distance between groups in alluvial diagram


I would like to chance the distances between groups in an alluvial diagram using ggplot2 and ggalluvial

my example is from https://corybrunson.github.io/ggalluvial/articles/ggalluvial.html

data(vaccinations)
levels(vaccinations$response) <- rev(levels(vaccinations$response))
ggplot(vaccinations,
       aes(x = survey, stratum = response, alluvium = subject,
           y = freq,
           fill = response, label = response)) +
  scale_x_discrete(expand = c(.1, .1)) +
  geom_flow() +
  geom_stratum(alpha = .5) +
  geom_text(stat = "stratum", size = 3) +
  theme(legend.position = "none") +
  ggtitle("vaccination survey responses at three points in time")

gives me:

enter image description here

Now I would like to shorten the distance between ms153_NSA while keeping the same distance between ms432_NSA and ms460_NSA:

enter image description here

I tried to use following (without success):

https://rdrr.io/cran/ggalluvial/man/stat_flow.html

How to increase the space between the bars in a bar plot in ggplot2?

Even google does not show me an asymmetrical alluvial diagram: :'-/


Solution

  • Here's one way to hack it

    Data manipulation:

    # get layer data calculated by ggalluvial, & shift 2nd position x leftwards by desired amount
    
    library(dplyr)
    
    p <- ggplot(vaccinations,
                aes(x = survey, stratum = response, alluvium = subject,
                    y = freq,
                    fill = response, label = response)) +
      scale_x_discrete(expand = c(.1, .1)) +
      geom_flow() +
      theme(legend.position = "none") +
      ggtitle("vaccination survey responses at three points in time")
    
    shift.amt = 0.2
    
    new.df <- layer_data(p) %>%
      mutate(xmin = ifelse(x == 2, xmin - shift.amt, xmin),
             xmax = ifelse(x == 2, xmax - shift.amt, xmax),
             x = ifelse(x == 2, x - shift.amt, x))
    

    Plot:

    library(ggforce) # needed for geom_diagonal_wide
    
    ggplot(new.df, aes(fill = fill)) +
    
      # recreate each geom layer using the modified data
      geom_diagonal_wide(data = . %>%
                           select(alluvium, fill, side, xmin, xmax, ymin, ymax) %>%
                           group_by(alluvium,) %>%
                           summarise(fill = fill[side == "start"],
                                     x = list(c(xmax[side == "start"], xmin[side == "end"], 
                                                xmin[side == "end"], xmax[side == "start"])),
                                     y = list(c(ymax[side == "start"], ymax[side == "end"], 
                                                ymin[side == "end"], ymin[side == "start"]))) %>%
                           tidyr::unnest(),
                         aes(x = x, y = y, group = alluvium),
                         alpha = 0.5) +
      geom_rect(data = . %>% group_by(x, stratum, fill) %>%
                  summarise(xmin = min(xmin), xmax = max(xmax),
                            ymin = min(ymin), ymax = max(ymax)),
                aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
                alpha = 0.5, colour = "black") +
      geom_text(data = . %>% group_by(x, stratum, label) %>%
                  summarise(y = mean(range(y))),
                aes(x = x, y = y, label = label), 
                inherit.aes = FALSE, size = 3) +
    
      # recreate original x-axis breaks/labels, fill values, & axis titles
      scale_x_continuous(breaks = sort(unique(new.df$x)),
                         labels = layer_scales(p)$x$get_labels()) +
      scale_fill_identity() + # by default, this won't create a legend
      labs(title = "vaccination survey responses at three points in time",
           x = p$labels$x, y = p$labels$y)
    

    plot