Search code examples
rggplot2boxplotoutliersgeom-text

Why are all the bars in my boxplot lumped together?


I tried to create boxplot for each date. But they were not distributed by date. Instead, the data for all dates were gathered together.

Here is the original data:

df<-structure(list(traitement = c("WS", "WW", "WW", "WS", "WW", "WS", 
"WW", "WS", "WW", "WS", "WW", "WS", "WW", "WS", "WS", "WW", "WS", 
"WW", "WW", "WS", "WW", "WS", "WW", "WS", "WW", "WS", "WW", "WS", 
"WW", "WS", "WS", "WW", "WS", "WW", "WW", "WS", "WW", "WS", "WW", 
"WS", "WW", "WS", "WW", "WS", "WW", "WS", "WS", "WW", "WS", "WW", 
"WW", "WS", "WW", "WS", "WW", "WS", "WW", "WS", "WS", "WW"), 
    Variete = c("Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", 
    "Heihe", "Heihe", "Heihe", "Heihe", "Heihe", "Heihe"), Date_obs = structure(c(1654128000, 
    1654128000, 1654128000, 1654128000, 1654128000, 1654128000, 
    1654128000, 1654128000, 1654128000, 1654128000, 1654128000, 
    1654128000, 1654128000, 1654128000, 1654128000, 1654128000, 
    1654732800, 1654732800, 1654732800, 1654732800, 1654732800, 
    1654732800, 1654732800, 1654732800, 1654732800, 1654732800, 
    1654732800, 1654732800, 1654732800, 1654732800, 1654732800, 
    1654732800, 1654905600, 1654905600, 1654905600, 1654905600, 
    1654905600, 1654905600, 1654905600, 1654905600, 1654905600, 
    1654905600, 1654905600, 1654905600, 1654905600, 1654905600, 
    1654905600, 1654905600, 1655337600, 1655337600, 1655337600, 
    1655337600, 1655337600, 1655337600, 1655337600, 1655337600, 
    1655337600, 1655337600, 1655337600, 1655337600), class = c("POSIXct", 
    "POSIXt"), tzone = "UTC"), SF_Plante_Totale = c(72420.3104, 
    29114.0672, 75556.9088, 74015.0912, 78418.8832, 70472.4416, 
    112207.4432, 88377.9264, 69432.7936, 92212.8544, 77802.9408, 
    82542.1664, 88109.1872, 76698.56, 80511.9104, 79568.3808, 
    155241.024, 60505.552, 166123.9808, 155033.0944, 169152.6912, 
    161068.9376, 225060.2528, 196524.8576, 140758.5312, 183907.8464, 
    165533.5392, 179435.3984, 154201.376, 165504.1152, 173026.8512, 
    169172.3072, 181198.8768, 77153.6512, 194143.4752, 179708.0608, 
    191554.1632, 183735.2256, 250527.7056, 217831.7568, 167404.9056, 
    217874.912, 195002.656, 210768.0352, 164633.1648, 188760.8448, 
    200502.9824, 196662.1696, 198612, 159042.6048, 297564.912, 
    201389.6256, 267434.736, 227388.672, 264551.184, 223802.8672, 
    195124.2752, 201203.2736, 214950.1664, 273315.6128)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -60L))

Here is the code I used:

library(outliers)
library(ggplot2)
library(ggrepel)
df$Date_obs <- as.Date(df$Date_obs, format = "%Y-%m-%d")
    
    ggplot(df, aes(x = Date_obs, y = SF_Plante_Totale, fill = traitement)) +
      geom_boxplot(position = position_dodge2(width = 0.8), width = 0.7) +
      labs(title = "Heihe",x = "Date", y = "Totale leaf area") +
      scale_x_date(date_labels = "%Y-%m-%d", date_breaks = "1 day") +
      theme_minimal()+
      theme(axis.text.x = element_text(angle = 45, hjust = 1))

Here is the figure I got: enter image description here

This is the rendering I expected, but the dates on its abscissa are not consecutive.And is it possible to just show the outlier values in the figure? I tried geom_text_repel but it gave me all values which made the figure very messy.

enter image description here


Solution

  • Because your x variable is continuous, you may need to specify the grouping variable as the interaction between the x variable and fill variable:

    ggplot(df, aes(x = Date_obs, y = SF_Plante_Totale, fill = traitement,
                   group = interaction(Date_obs, traitement))) +
      geom_boxplot(position = position_dodge2(width = 0.8), width = 0.7) +
      labs(title = "Heihe",x = "Date", y = "Totale leaf area") +
      scale_x_date(date_labels = "%Y-%m-%d", date_breaks = "1 day") +
      theme_minimal()+
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    

    enter image description here