Search code examples
rggplot2density-plot

R is there a way to do the equivalent of geom_jitter but for a density ggplot?


I have the following dataframe and I use it to create a ggplot which is a group of density plots that I have arranged used facet_wrap.

structure(list(date = c("2021-07-25", "2021-07-25", "2021-07-25", 
"2021-07-25", "2021-07-25", "2021-07-25", "2021-07-25", "2021-07-25", 
"2021-07-25", "2021-07-25", "2021-07-26", "2021-07-26", "2021-07-26", 
"2021-07-26", "2021-07-26", "2021-07-26", "2021-07-26", "2021-07-26", 
"2021-07-26", "2021-07-26", "2021-07-27", "2021-07-27", "2021-07-27", 
"2021-07-27", "2021-07-27", "2021-07-27", "2021-07-27", "2021-07-27", 
"2021-07-27", "2021-07-27", "2021-07-28", "2021-07-28", "2021-07-28", 
"2021-07-28", "2021-07-28", "2021-07-28", "2021-07-28", "2021-07-28", 
"2021-07-28", "2021-07-28", "2021-07-29", "2021-07-29", "2021-07-29", 
"2021-07-29", "2021-07-29", "2021-07-29", "2021-07-29", "2021-07-29", 
"2021-07-29", "2021-07-29", "2021-07-30", "2021-07-30", "2021-07-30", 
"2021-07-30", "2021-07-30", "2021-07-30", "2021-07-30", "2021-07-30", 
"2021-07-30", "2021-07-30", "2021-07-31", "2021-07-31", "2021-07-31", 
"2021-07-31", "2021-07-31", "2021-07-31", "2021-07-31", "2021-07-31", 
"2021-07-31", "2021-07-31"), Order_Type = structure(c(1L, 1L, 
2L, 2L, 3L, 3L, 8L, 8L, 9L, 9L, 1L, 1L, 2L, 2L, 3L, 3L, 8L, 8L, 
9L, 9L, 1L, 1L, 2L, 2L, 3L, 3L, 8L, 8L, 9L, 9L, 1L, 1L, 2L, 2L, 
3L, 3L, 8L, 8L, 9L, 9L, 1L, 1L, 2L, 2L, 3L, 3L, 8L, 8L, 9L, 9L, 
1L, 1L, 2L, 2L, 3L, 3L, 8L, 8L, 9L, 9L, 1L, 1L, 2L, 2L, 3L, 3L, 
8L, 8L, 9L, 9L), .Label = c("group1", "group2", "group3", 
"group4", "group5", "group6", 
"group7", "group8", "group9"), class = "factor"), 
    ntf_normalized = c(1, 1, 1, 1, 1, 1, 0.5, 0.5, 0.8372252453, 
    0.8372252453, 0.3275361961, 0.3275361961, 1, 1, 1, 1, 0.3275361961, 
    0.3275361961, 0.1840258965, 0.1840258965, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 0.1603545867, 0.1603545867, 0.4950785714, 0.4950785714, 
    0.7142857145, 0.7142857145, 1, 1, 1, 1, 1, 1, 1, 1, 0.5526912352, 
    0.5526912352, 1, 1, 1, 1, 0, 0, 1, 1), ntf_first_touch = c(1, 
    1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1), spend = c(25.99294208, 
    25.99294208, 308.2773075, 308.2773075, 206.28925744, 206.28925744, 
    0.00398, 0.00398, 304.77034136, 304.77034136, 410.2332538, 
    410.2332538, 185.819275, 185.819275, 213.72726668, 213.72726668, 
    414.61074958, 414.61074958, 323.66596883, 323.66596883, 570.88491388, 
    570.88491388, 115.2426355, 115.2426355, 234.28878223, 234.28878223, 
    846.78931866, 846.78931866, 336.60236658, 336.60236658, 803.26558903, 
    803.26558903, 258.157645, 258.157645, 214.85833223, 214.85833223, 
    1157.49991513, 1157.49991513, 321.93666812, 321.93666812, 
    523.59701611, 523.59701611, 122.06332, 122.06332, 137.31790493, 
    137.31790493, 724.54289394, 724.54289394, 213.9509169, 213.9509169, 
    393.06862193, 393.06862193, 161.8394165, 161.8394165, 88.23073883, 
    88.23073883, 342.05106384, 342.05106384, 117.82164784, 117.82164784, 
    388.23691524, 388.23691524, 97.247412, 97.247412, 99.12069722, 
    99.12069722, 0, 0, 91.27114294, 91.27114294), names = c("cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch", "cac_normalized", "cac_first_touch", "cac_normalized", 
    "cac_first_touch"), values = c(25.99294208, 25.99294208, 
    308.2773075, 308.2773075, 206.28925744, 206.28925744, 0.00796, 
    0.00398, 364.024308954985, 152.38517068, 1252.4821948984, 
    410.2332538, 185.819275, 185.819275, 213.72726668, 213.72726668, 
    1265.84711710279, 414.61074958, 1758.80664072733, 323.66596883, 
    570.88491388, 570.88491388, 115.2426355, 115.2426355, 234.28878223, 
    234.28878223, 846.78931866, 846.78931866, 336.60236658, 336.60236658, 
    803.26558903, 803.26558903, 258.157645, 258.157645, 214.85833223, 
    214.85833223, 1157.49991513, 1157.49991513, 321.93666812, 
    321.93666812, 523.59701611, 523.59701611, 122.06332, 122.06332, 
    137.31790493, 137.31790493, 4518.37960391812, 724.54289394, 
    432.155478462706, 213.9509169, 550.296070536911, 393.06862193, 
    161.8394165, 161.8394165, 88.23073883, 88.23073883, 342.05106384, 
    342.05106384, 117.82164784, 117.82164784, 702.448113003837, 
    388.23691524, 97.247412, 97.247412, 99.12069722, 99.12069722, 
    0, 0, 91.27114294, 91.27114294)), row.names = c(NA, -70L), groups = structure(list(
    date = c("2021-07-25", "2021-07-26", "2021-07-27", "2021-07-28", 
    "2021-07-29", "2021-07-30", "2021-07-31"), .rows = structure(list(
        1:10, 11:20, 21:30, 31:40, 41:50, 51:60, 61:70), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -7L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))



ggplot(df1, aes(x = values, fill = names)) +
  geom_density(alpha = 0.3) +
  labs(title = "Advertising", subtitle = "CAC Distribution") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  xlab("CAC") +
  ylab("density") +
  facet_wrap(~Order_Type, scales = 'free') 

enter image description here

The problem is, I find what is happening in group2 and group3 a little hard to explain. It kind of looks like a mistake. It turns out that the numbers for group2 and group3 are the exact same, so one of the lines is being sort of wiped out. It looks like there is data for only one of the names (cac_normalized but not cac_first_touch). Is there a way to shift the line just a little in order to see both events? I know that geom_point can become geom_jitter for this exact reason. If I put geom_jitter into my ggplot I get an error. And if I change the alpha in geom_density it doesn't do anything. Does anyone have an idea here?


Solution

  • You could use the after_stat() function to give a small nudge to every subsequent group.

    library(ggplot2)
    
    ggplot(df1, aes(x = values, fill = names)) +
      geom_density(
        alpha = 0.3,
        aes(y = after_stat(density + 0.0001 * group))
      ) +
      labs(title = "Advertising", subtitle = "CAC Distribution") +
      theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
      theme(plot.subtitle = element_text(hjust = 0.5)) +
      xlab("CAC") +
      ylab("density") +
      facet_wrap(~Order_Type, scales = 'free')