Search code examples
rggplot2density-plot

How to add median values on top of a grouped density plot?


I apologise if this has been asked before. I am trying to add median values to the peak of a grouped density plot (example below).

library(dplyr)
library(forcats)

Catalan_elections %>%
  mutate(YearFct = fct_rev(as.factor(Year))) %>%
  ggplot(aes(y = YearFct)) +
  geom_density_ridges(
    aes(x = Percent, fill = paste(YearFct, Option)), 
    alpha = .8, color = "white", from = 0, to = 100
  ) +
  labs(
    x = "Vote (%)",
    y = "Election Year",
    title = "Indy vs Unionist vote in Catalan elections",
    subtitle = "Analysis unit: municipalities (n = 949)",
    caption = "Marc Belzunces (@marcbeldata) | Source: Idescat"
  ) +
  scale_y_discrete(expand = c(0, 0)) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_cyclical(
    breaks = c("1980 Indy", "1980 Unionist"),
    labels = c(`1980 Indy` = "Indy", `1980 Unionist` = "Unionist"),
    values = c("#ff0000", "#0000ff", "#ff8080", "#8080ff"),
    name = "Option", guide = "legend"
  ) +
  coord_cartesian(clip = "off") +
  theme_ridges(grid = FALSE)

enter image description here


Solution

  • Edit:

    Thanks for updating your question; I misunderstood and thought you wanted to highlight the medians (straightforward) but it sounds like you actually want the peaks (more complicated). I also thought that this was your code, not an example from https://cran.r-project.org/web/packages/ggridges/vignettes/gallery.html, so I didn't realise the Catalan_elections dataset was publicly available (e.g. from the ggjoy package).

    Here is a more relevant solution:

    library(tidyverse)
    library(palmerpenguins)
    library(ggridges)
    #install.packages("ggjoy")
    library(ggjoy)
    
    Catalan_elections_with_max_density <- Catalan_elections %>%
      group_by(Year, Option) %>%
      na.omit() %>%
      mutate(max_density = max(density(Percent, na.rm = TRUE)$y),
             which_max_density = which.max(density(Percent, na.rm = TRUE)$y)) %>%
      mutate(which_max_x_intercept = density(Percent, na.rm = TRUE)$x[which_max_density])
    
    Catalan_elections_with_max_density %>%
      mutate(YearFct = fct_rev(as.factor(Year))) %>%
      ggplot(aes(y = YearFct)) +
      geom_density_ridges(
        aes(x = Percent, fill = paste(YearFct, Option)), 
        alpha = .8, color = "white", from = 0, to = 100,
      ) +
      geom_segment(aes(x = which_max_x_intercept,
                       xend = which_max_x_intercept,
                       y = as.numeric(YearFct),
                       yend = as.numeric(YearFct) + max_density * 48),
                   color = "white", size = 0.75, alpha = 0.1) +
      labs(
        x = "Vote (%)",
        y = "Election Year",
        title = "Indy vs Unionist vote in Catalan elections",
        subtitle = "Analysis unit: municipalities (n = 949)",
        caption = "Marc Belzunces (@marcbeldata) | Source: Idescat"
      ) +
      scale_y_discrete(expand = c(0, 0)) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_fill_cyclical(
        breaks = c("1980 Indy", "1980 Unionist"),
        labels = c(`1980 Indy` = "Indy", `1980 Unionist` = "Unionist"),
        values = c("#ff0000", "#0000ff", "#ff8080", "#8080ff"),
        name = "Option", guide = "legend"
      ) +
      coord_cartesian(clip = "off") +
      theme_ridges(grid = FALSE)
    #> Picking joint bandwidth of 3.16
    

    Created on 2021-12-14 by the reprex package (v2.0.1)

    NB. I don't really understand how the scaling works in geom_density_ridges(), so I used "max_density * a constant" to get it approximately correct. Depending on your use-case you will need to adjust the constant or work out how the peak density relates to the y coordinates of the plot.

    Original answer:

    I don't have your dataset "Catalan_elections", so here is an example using the palmerpenguins dataset:

    library(tidyverse)
    library(palmerpenguins)
    library(ggridges)
    
    penguins %>%
      na.omit() %>%
      mutate(YearFct = fct_rev(as.factor(year))) %>%
      ggplot(aes(x = bill_length_mm, y = YearFct, fill = YearFct)) +
      geom_density_ridges(
        alpha = .8, color = "white", from = 0, to = 100,
        quantile_lines = TRUE, quantiles = 2
      ) +
      labs(
        x = "Vote (%)",
        y = "Election Year",
        title = "Indy vs Unionist vote in Catalan elections",
        subtitle = "Analysis unit: municipalities (n = 949)",
        caption = "Marc Belzunces (@marcbeldata) | Source: Idescat"
      ) +
      scale_y_discrete(expand = c(0, 0)) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_fill_cyclical(
        breaks = c("1980 Indy", "1980 Unionist"),
        labels = c(`1980 Indy` = "Indy", `1980 Unionist` = "Unionist"),
        values = c("#ff0000", "#0000ff", "#ff8080", "#8080ff"),
        name = "Option", guide = "legend"
      ) +
      coord_cartesian(clip = "off") +
      theme_ridges(grid = FALSE)
    #> Picking joint bandwidth of 1.92
    

    Created on 2021-12-13 by the reprex package (v2.0.1)