Search code examples
rggplot2boxplotaesthetics

Box plot representing the wrong aesthetics


I have the following data

structure(list(imaging_date = structure(c(19010, 19010, 19024, 
19024, 19010, 19024, 19010, 19024, 19010, 19024, 19010, 19024, 
19010, 19010, 19024, 19010, 19010, 19010, 19024, 19024, 19024, 
19010, 19010, 19010, 19024, 19024, 19010, 19010, 19010, 19010, 
19010, 19010, 19010, 19010, 19024, 19024), class = "Date"), diameter_on_mask = c(960L, 
960L, 960L, 960L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 
480L, 480L, 480L, 960L, 960L, 960L, 960L, 960L, 960L, 960L, 960L, 
960L, 960L, 960L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 
480L, 480L), diameter_measured = c(1020L, 1040L, 1210L, 1120L, 
532L, 626L, 541L, 595L, 519L, 602L, 515L, 638L, 519L, 518L, 593L, 
1030L, 989L, 999L, 1120L, 1140L, 1220L, 1000L, 1010L, 1010L, 
1370L, 1290L, 519L, 511L, 505L, 522L, 502L, 501L, 536L, 532L, 
686L, 754L), slideno_cno_region_id = c("3_1_1", "3_1_2", "3_1_1", 
"3_1_2", "3_1_3", "3_1_3", "3_1_4", "3_1_4", "3_1_5", "3_1_5", 
"3_1_6", "3_1_6", "", "", "", "", "", "", "", "", "", "3_1_7", 
"", "", "3_1_7", "", "", "", "", "", "", "", "", "", "", ""), 
    region = c("inlet", "inlet", "inlet", "inlet", "inlet", "inlet", 
    "inlet", "inlet", "inlet", "inlet", "inlet", "inlet", "inlet", 
    "inlet", "inlet", "middle", "middle", "middle", "middle", 
    "middle", "middle", "outlet", "outlet", "outlet", "outlet", 
    "outlet", "outlet", "outlet", "outlet", "outlet", "outlet", 
    "outlet", "outlet", "outlet", "outlet", "outlet"), norm_diameter = c(1.0625, 
    1.08333333333333, 1.26041666666667, 1.16666666666667, 1.10833333333333, 
    1.30416666666667, 1.12708333333333, 1.23958333333333, 1.08125, 
    1.25416666666667, 1.07291666666667, 1.32916666666667, 1.08125, 
    1.07916666666667, 1.23541666666667, 1.07291666666667, 1.03020833333333, 
    1.040625, 1.16666666666667, 1.1875, 1.27083333333333, 1.04166666666667, 
    1.05208333333333, 1.05208333333333, 1.42708333333333, 1.34375, 
    1.08125, 1.06458333333333, 1.05208333333333, 1.0875, 1.04583333333333, 
    1.04375, 1.11666666666667, 1.10833333333333, 1.42916666666667, 
    1.57083333333333)), row.names = c(NA, -36L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x7fc0c100d2e0>, index = structure(integer(0), "`__imaging_date`" = c(1L, 
2L, 5L, 7L, 9L, 11L, 13L, 14L, 16L, 17L, 18L, 22L, 23L, 24L, 
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 3L, 4L, 6L, 8L, 10L, 
12L, 15L, 19L, 20L, 21L, 25L, 26L, 35L, 36L)))

I want to plot tranparent boxplots under the actual data points. I did

ggplot(dt, aes(x=imaging_date, y=norm_diameter, colour = factor(region)))+
  geom_point()+
  geom_boxplot(aes(fill = after_scale(alpha(colour, 0.4))))

and got

enter image description here

First of all, I have data belonging to only 2 different days. Second of all, the location of the boxplots belong to neither of those dates. If I group the data inside with imaging_date, these problems disappear, but then they lose their colors.

enter image description here

What is wrong with my data/code?


Solution

  • If you want box-and-whiskers plots by regions, then that should be the x variable's value. And add position = position_dodge to the points layer.

    Also, you don't need to explicitly coerce to factor when the variable (region) is of class character.

    ggplot(dt, aes(x=region, y=norm_diameter, group = region, colour = region))+
      geom_point(position = position_dodge(width = 0.75)) +
      geom_boxplot(aes(fill = after_scale(alpha(colour, 0.4))))
    

    enter image description here